This page lists ready to run shogun examples for the C++ libshogun interface.
To run the examples you will need to manually compile them via
g++ name_of_example.cpp -lshogun
in case you installed libshogun to a nonstandard directory you will need to specify the appropriate library and include paths, e.g.
g++ -I/path/to/libshogun/includes name_of_example.cpp -L/path/to/libshogun/sofile -lshogun
Then the examples are standard binary executables and can be started via
./name_of_example
respectively if the libraries are in nonstandard locations (such that they cannot be found by the dynamic linker)
LD_LIBRARY_PATH=path/to/libshogun ./name_of_example
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/ParameterMap.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_value(SGParamInfo* key, ParameterMap* map) { SGParamInfo* current=map->get(key); key->print_param_info(); SG_SPRINT("value: "); if (current) current->print_param_info(); else SG_SPRINT("no element\n"); SG_SPRINT("\n"); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); ParameterMap* map=new ParameterMap(); EContainerType cfrom=CT_SCALAR; EContainerType cto=CT_MATRIX; EStructType sfrom=ST_NONE; EStructType sto=ST_STRING; EPrimitiveType pfrom=PT_BOOL; EPrimitiveType pto=PT_SGOBJECT; map->put(new SGParamInfo("2", cfrom, sfrom, pfrom), new SGParamInfo("zwei", cto, sto, pto)); map->put(new SGParamInfo("1", cfrom, sfrom, pfrom), new SGParamInfo("eins", cto, sto, pto)); map->put(new SGParamInfo("4", cfrom, sfrom, pfrom), new SGParamInfo("vier", cto, sto, pto)); map->put(new SGParamInfo("3", cfrom, sfrom, pfrom), new SGParamInfo("drei", cto, sto, pto)); SG_SPRINT("before finalization:\n"); map->print_map(); map->finalize_map(); SG_SPRINT("\n\nafter finalization:\n"); map->print_map(); SGParamInfo* key; SG_SPRINT("\n\ntesting map\n"); key=new SGParamInfo("1", cfrom, sfrom, pfrom); print_value(key, map); delete key; key=new SGParamInfo("2", cfrom, sfrom, pfrom); print_value(key, map); delete key; key=new SGParamInfo("2", cto, sfrom, pfrom); print_value(key, map); delete key; key=new SGParamInfo("2", cfrom, sto, pfrom); print_value(key, map); delete key; key=new SGParamInfo("2", cfrom, sfrom, pto); print_value(key, map); delete key; key=new SGParamInfo("5", cfrom, sfrom, pfrom); print_value(key, map); delete key; delete map; exit_shogun(); return 0; }
#include <shogun/base/init.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2008-2009 Soeren Sonnenburg * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society */ #include <shogun/kernel/GaussianKernel.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/common.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> using namespace shogun; #define NUM 100 #define DIMS 2 #define DIST 0.5 float64_t* lab; float64_t* feat; void gen_rand_data() { lab=SG_MALLOC(float64_t, NUM); feat=SG_MALLOC(float64_t, NUM*DIMS); for (int32_t i=0; i<NUM; i++) { if (i<NUM/2) { lab[i]=-1.0; for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0,1.0)+DIST; } else { lab[i]=1.0; for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0,1.0)-DIST; } } CMath::display_vector(lab,NUM); CMath::display_matrix(feat,DIMS, NUM); } int main() { const int32_t feature_cache=0; const int32_t kernel_cache=0; const float64_t rbf_width=10; const float64_t svm_C=10; const float64_t svm_eps=0.001; init_shogun(); gen_rand_data(); // create train labels CLabels* labels=new CLabels(SGVector<float64_t>(lab, NUM)); SG_REF(labels); // create train features CSimpleFeatures<float64_t>* features = new CSimpleFeatures<float64_t>(feature_cache); SG_REF(features); features->set_feature_matrix(feat, DIMS, NUM); // create gaussian kernel CGaussianKernel* kernel = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kernel); kernel->init(features, features); // create svm via libsvm and train CLibSVM* svm = new CLibSVM(svm_C, kernel, labels); SG_REF(svm); svm->set_epsilon(svm_eps); svm->train(); printf("num_sv:%d b:%f\n", svm->get_num_support_vectors(), svm->get_bias()); // classify + display output CLabels* out_labels=svm->apply(); for (int32_t i=0; i<NUM; i++) printf("out[%d]=%f\n", i, out_labels->get_label(i)); SG_UNREF(labels); SG_UNREF(out_labels); SG_UNREF(kernel); SG_UNREF(features); SG_UNREF(svm); exit_shogun(); return 0; }
#include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data float64_t* matrix = SG_MALLOC(float64_t, 6); for (int32_t i=0; i<6; i++) matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CSimpleFeatures<float64_t>* features= new CSimpleFeatures<float64_t>(); features->set_feature_matrix(matrix, 2, 3); // create three labels CLabels* labels=new CLabels(3); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CLibSVM* svm = new CLibSVM(10, kernel, labels); svm->train(); // classify on training examples for (int32_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, svm->apply(i)); // free up memory SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Alexander Binder * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <iostream> #include <shogun/io/SGIO.h> #include <shogun/lib/ShogunException.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/classifier/mkl/MKLMultiClass.h> // g++ -Wall -O3 classifier_mklmulticlass.cpp -I /home/theseus/private/alx/shoguntrunk/compiledtmp/include -L/home/theseus/private/alx/shoguntrunk/compiledtmp/lib -lshogun using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_warning(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_error(FILE* target, const char* str) { fprintf(target, "%s", str); } void getgauss(float64_t & y1, float64_t & y2) { float x1, x2, w; do { x1 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0; x2 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0; w = x1 * x1 + x2 * x2; } while ( (w >= 1.0)|| (w<1e-9) ); w = sqrt( (-2.0 * log( w ) ) / w ); y1 = x1 * w; y2 = x2 * w; } void gendata(std::vector<float64_t> & x,std::vector<float64_t> & y, CLabels*& lab) { int32_t totalsize=240; int32_t class1size=80; int32_t class2size=70; //generating three class data set x.resize(totalsize); y.resize(totalsize); for(size_t i=0; i< x.size();++i) getgauss(x[i], y[i]); for(size_t i=0; i< x.size();++i) { if((int32_t)i < class1size) { x[i]+=0; y[i]+=0; } else if( (int32_t)i< class1size+class2size) { x[i]+=+1; y[i]+=-1; } else { x[i]+=-1; y[i]+=+1; } } //set labels lab=new CLabels(x.size()); for(size_t i=0; i< x.size();++i) { if((int32_t)i < class1size) lab->set_int_label(i,0); else if( (int32_t)i< class1size+class2size) lab->set_int_label(i,1); else lab->set_int_label(i,2); } } void gentrainkernel(float64_t * & ker1 ,float64_t * & ker2, float64_t * & ker3 ,float64_t & autosigma,float64_t & n1,float64_t & n2, float64_t & n3, const std::vector<float64_t> & x, const std::vector<float64_t> & y) { autosigma=0; for(size_t l=0; l< x.size();++l) { for(size_t r=0; r<= l;++r) { float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r])); autosigma+=dist*2.0/(float64_t)x.size()/((float64_t)x.size()+1); } } float64_t fm1=0, mean1=0,fm2=0, mean2=0,fm3=0, mean3=0; ker1=SG_MALLOC(float64_t, x.size()*x.size()); ker2=SG_MALLOC(float64_t, x.size()*x.size()); ker3=SG_MALLOC(float64_t, x.size()*x.size()); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< x.size();++r) { float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r])); ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ; //ker2[l +r*x.size()]= exp( -dist/sigma2/sigma2) ; ker2[l +r*x.size()]= x[l]*x[r] + y[l]*y[r]; ker3[l +r*x.size()]= (x[l]*x[r] + y[l]*y[r]+1)*(x[l]*x[r] + y[l]*y[r]+1); fm1+=ker1[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); fm2+=ker2[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); fm3+=ker3[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); if(l==r) { mean1+=ker1[l +r*x.size()]/(float64_t)x.size(); mean2+=ker2[l +r*x.size()]/(float64_t)x.size(); mean3+=ker3[l +r*x.size()]/(float64_t)x.size(); } } } n1=(mean1-fm1); n2=(mean2-fm2); n3=(mean3-fm3); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< x.size();++r) { ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1; ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2; ker3[l +r*x.size()]=ker3[l +r*x.size()]/n3; } } } void gentestkernel(float64_t * & ker1 ,float64_t * & ker2,float64_t * & ker3, const float64_t autosigma,const float64_t n1,const float64_t n2, const float64_t n3, const std::vector<float64_t> & x,const std::vector<float64_t> & y, const std::vector<float64_t> & tx,const std::vector<float64_t> & ty) { ker1=SG_MALLOC(float64_t, x.size()*tx.size()); ker2=SG_MALLOC(float64_t, x.size()*tx.size()); ker3=SG_MALLOC(float64_t, x.size()*tx.size()); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< tx.size();++r) { float64_t dist=((x[l]-tx[r])*(x[l]-tx[r]) + (y[l]-ty[r])*(y[l]-ty[r])); ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ; ker2[l +r*x.size()]= x[l]*tx[r] + y[l]*ty[r]; ker3[l +r*x.size()]= (x[l]*tx[r] + y[l]*ty[r]+1)*(x[l]*tx[r] + y[l]*ty[r]+1); } } for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< tx.size();++r) { ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1; ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2; ker3[l +r*x.size()]=ker3[l +r*x.size()]/n2; } } } void tester() { CLabels* lab=NULL; std::vector<float64_t> x,y; gendata(x,y, lab); SG_REF(lab); float64_t* ker1=NULL; float64_t* ker2=NULL; float64_t* ker3=NULL; float64_t autosigma=1; float64_t n1=0; float64_t n2=0; float64_t n3=0; int32_t numdata=0; gentrainkernel( ker1 , ker2, ker3 , autosigma, n1, n2, n3,x,y); numdata=x.size(); CCombinedKernel* ker=new CCombinedKernel(); CCustomKernel* kernel1=new CCustomKernel(); CCustomKernel* kernel2=new CCustomKernel(); CCustomKernel* kernel3=new CCustomKernel(); kernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker1, numdata,numdata)); kernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker2, numdata,numdata)); kernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker3, numdata,numdata)); ker->append_kernel(kernel1); ker->append_kernel(kernel2); ker->append_kernel(kernel3); //here comes the core stuff float64_t regconst=1.0; CMKLMultiClass* tsvm =new CMKLMultiClass(regconst, ker, lab); tsvm->set_epsilon(0.0001); // SVM epsilon // MKL parameters tsvm->set_mkl_epsilon(0.01); // subkernel weight L2 norm termination criterion tsvm->set_max_num_mkliters(120); // well it will be just three iterations tsvm->set_mkl_norm(1.5); // mkl norm //starting svm training tsvm->train(); SG_SPRINT("finished svm training\n"); //starting svm testing on training data CLabels* res=tsvm->apply(); ASSERT(res); float64_t err=0; for(int32_t i=0; i<numdata;++i) { ASSERT(i< res->get_num_labels()); if (lab->get_int_label(i)!=res->get_int_label(i)) err+=1; } err/=(float64_t)res->get_num_labels(); SG_SPRINT("prediction error on training data (3 classes): %f ",err); SG_SPRINT("random guess error would be: %f \n",2/3.0); SG_FREE(ker1); SG_FREE(ker2); SG_FREE(ker3); //generate test data CLabels* tlab=NULL; std::vector<float64_t> tx,ty; gendata( tx,ty,tlab); SG_REF(tlab); float64_t* tker1=NULL; float64_t* tker2=NULL; float64_t* tker3=NULL; gentestkernel(tker1,tker2,tker3, autosigma, n1,n2,n3, x,y, tx,ty); int32_t numdatatest=tx.size(); CCombinedKernel* tker=new CCombinedKernel(); SG_REF(tker); CCustomKernel* tkernel1=new CCustomKernel(); CCustomKernel* tkernel2=new CCustomKernel(); CCustomKernel* tkernel3=new CCustomKernel(); tkernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker1,numdata, numdatatest)); tkernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest)); tkernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest)); tker->append_kernel(tkernel1); tker->append_kernel(tkernel2); tker->append_kernel(tkernel3); int32_t numweights; float64_t* weights=tsvm->getsubkernelweights(numweights); SG_SPRINT("test kernel weights\n"); for(int32_t i=0; i< numweights;++i) SG_SPRINT("%f ", weights[i]); SG_SPRINT("\n"); //set kernel tker->set_subkernel_weights(weights, numweights); tsvm->set_kernel(tker); //compute classification error, check mem CLabels* tres=tsvm->apply(); float64_t terr=0; for(int32_t i=0; i<numdatatest;++i) { ASSERT(i< tres->get_num_labels()); if(tlab->get_int_label(i)!=tres->get_int_label(i)) terr+=1; } terr/=(float64_t) tres->get_num_labels(); SG_SPRINT("prediction error on test data (3 classes): %f ",terr); SG_SPRINT("random guess error would be: %f \n",2/3.0); SG_FREE(tker1); SG_FREE(tker2); SG_FREE(tker3); SG_UNREF(tsvm); SG_UNREF(res); SG_UNREF(tres); SG_UNREF(lab); SG_UNREF(tlab); SG_UNREF(tker); SG_FREE(weights); weights=NULL; SG_SPRINT( "finished \n"); } namespace shogun { extern Version* sg_version; extern SGIO* sg_io; } int main() { init_shogun(&print_message, &print_warning, &print_error); try { sg_version->print_version(); sg_io->set_loglevel(MSG_INFO); tester(); } catch(ShogunException & sh) { printf("%s",sh.get_exception_string()); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/clustering/KMeans.h> #include <shogun/distance/EuclidianDistance.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); int32_t num_clusters=4; int32_t num_features=11; int32_t dim_features=3; int32_t num_vectors_per_cluster=5; float64_t cluster_std_dev=2.0; /* build random cluster centers */ SGMatrix<float64_t> cluster_centers(dim_features, num_clusters); CMath::random_vector(cluster_centers.matrix, dim_features*num_clusters, -10.0, 10.0); CMath::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, cluster_centers.num_cols, "cluster centers"); /* create data around clusters */ SGMatrix<float64_t> data(dim_features, num_clusters*num_vectors_per_cluster); for (index_t i=0; i<num_clusters; ++i) { for (index_t j=0; j<dim_features; ++j) { for (index_t k=0; k<num_vectors_per_cluster; ++k) { index_t idx=i*dim_features*num_vectors_per_cluster; idx+=j; idx+=k*dim_features; float64_t entry=cluster_centers.matrix[i*dim_features+j]; data.matrix[idx]=CMath::normal_random(entry, cluster_std_dev); } } } /* create features, SG_REF to avoid deletion */ CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t> (); features->set_feature_matrix(data); SG_REF(features); /* create labels for cluster centers */ CLabels* labels=new CLabels(num_features); for (index_t i=0; i<num_features; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create distance */ CEuclidianDistance* distance=new CEuclidianDistance(features, features); /* create distance machine */ CKMeans* clustering=new CKMeans(num_clusters, distance); clustering->train(features); /* build clusters */ CLabels* result=clustering->apply(); for (index_t i=0; i<result->get_num_labels(); ++i) SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i)); /* print cluster centers */ CSimpleFeatures<float64_t>* centers= (CSimpleFeatures<float64_t>*)distance->get_lhs(); SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix(); CMath::display_matrix(centers_matrix.matrix, centers_matrix.num_rows, centers_matrix.num_cols, "learned centers"); CMath::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, cluster_centers.num_cols, "real centers"); /* clean up */ SG_UNREF(result); SG_UNREF(centers); SG_UNREF(clustering); SG_UNREF(labels); SG_UNREF(features); cluster_centers.destroy_matrix(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); SGMatrix<float64_t> data(3, 10); CSimpleFeatures<float64_t>* f=new CSimpleFeatures<float64_t>(data); CMath::range_fill_vector(data.matrix, data.num_cols*data.num_rows, 1.0); CMath::display_matrix(data.matrix, data.num_rows, data.num_cols, "original feature data"); index_t offset_subset=1; SGVector<index_t> feature_subset(8); CMath::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); CMath::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); f->set_subset(new CSubset(feature_subset)); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGVector<float64_t> vec=f->get_feature_vector(i); SG_SPRINT("%i: ", i); CMath::display_vector(vec.vector, vec.vlen); f->free_feature_vector(vec, i); } index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); CMath::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); CMath::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); CSimpleFeatures<float64_t>* subset_copy= (CSimpleFeatures<float64_t>*)f->copy_subset(feature_copy_subset); SGMatrix<float64_t> subset_copy_matrix=subset_copy->get_feature_matrix(); CMath::display_matrix(subset_copy_matrix.matrix, subset_copy_matrix.num_rows, subset_copy_matrix.num_cols, "copy matrix"); index_t num_its=subset_copy_matrix.num_rows*subset_copy_matrix.num_cols; for (index_t i=0; i<num_its; ++i) { index_t idx=i+(offset_copy+offset_subset)*subset_copy_matrix.num_rows; ASSERT(subset_copy_matrix.matrix[i]==data.matrix[idx]); } SG_UNREF(f); SG_UNREF(subset_copy); SG_FREE(feature_copy_subset.vector); SG_SPRINT("\nEND\n"); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SparseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_vectors=10; index_t num_features=3; /* create some sparse data */ SGSparseMatrix<float64_t> data=SGSparseMatrix<float64_t>(num_vectors, num_features); for (index_t i=0; i<num_vectors; ++i) { /* put elements only at even indices */ data.sparse_matrix[i]=SGSparseVector<float64_t>(num_features, 2*i); /* fill */ for (index_t j=0; j<num_features; ++j) { data.sparse_matrix[i].features[j].entry=i+j; data.sparse_matrix[i].features[j].feat_index=3*j; } } CSparseFeatures<float64_t>* f=new CSparseFeatures<float64_t>(data); /* display sparse matrix */ SG_SPRINT("original data\n"); for (index_t i=0; i<num_vectors; ++i) { SG_SPRINT("sparse vector at %i: [", data.sparse_matrix[i].vec_index); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", data.sparse_matrix[i].features[j].entry); SG_SPRINT("]\n"); } /* indices for a subset */ index_t offset_subset=1; SGVector<index_t> feature_subset(8); CMath::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); CMath::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); /* set subset and print data */ f->set_subset(new CSubset(feature_subset)); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=f->get_sparse_feature_vector(i); SG_SPRINT("sparse vector at %i: ", vec.vec_index); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", vec.features[j].entry); SG_SPRINT("]\n"); f->free_sparse_feature_vector(vec, i); } /* indices that are to copy */ index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); CMath::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); CMath::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); /* copy a subset of features */ CSparseFeatures<float64_t>* subset_copy= (CSparseFeatures<float64_t>*)f->copy_subset(feature_copy_subset); /* print copied subset */ SG_SPRINT("copied features:\n"); for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i); SG_SPRINT("sparse vector at %i: ", vec.vec_index); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", vec.features[j].entry); SG_SPRINT("]\n"); subset_copy->free_sparse_feature_vector(vec, i); } /* test if all elements are copied correctly */ for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i); index_t ind=i+offset_copy+offset_subset; for (index_t j=0; j<vec.num_feat_entries; ++j) { float64_t a_entry=vec.features[j].entry; float64_t b_entry=data.sparse_matrix[ind].features[j].entry; index_t a_idx=vec.features[j].feat_index; index_t b_idx=data.sparse_matrix[ind].features[j].feat_index; ASSERT(a_entry==b_entry); ASSERT(a_idx==b_idx); } subset_copy->free_sparse_feature_vector(vec, i); } SG_UNREF(f); SG_UNREF(subset_copy); feature_copy_subset.destroy_vector(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_strings=10; index_t max_string_length=20; index_t min_string_length=max_string_length/2; SGStringList<char> strings(num_strings, max_string_length); SG_SPRINT("original string data:\n"); for (index_t i=0; i<num_strings; ++i) { index_t len=CMath::random(min_string_length, max_string_length); SGString<char> current(len); SG_SPRINT("[%i]: \"", i); /* fill with random uppercase letters (ASCII) */ for (index_t j=0; j<len; ++j) { current.string[j]=(char)CMath::random('A', 'Z'); /* attach \0 to print letter */ char* string=SG_MALLOC(char, 2); string[0]=current.string[j]; string[1]='\0'; SG_SPRINT("%s", string); SG_FREE(string); } SG_SPRINT("\"\n"); strings.strings[i]=current; } /* create num_feautres 2-dimensional vectors */ CStringFeatures<char>* f=new CStringFeatures<char>(strings, ALPHANUM); index_t offset_subset=1; SGVector<index_t> feature_subset(8); CMath::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); CMath::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); f->set_subset(new CSubset(feature_subset)); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGVector<char> vec=f->get_feature_vector(i); SG_SPRINT("%i: ", i); for (index_t j=0; j<vec.vlen; ++j) SG_SPRINT("%c", vec.vector[j]); SG_SPRINT("\n"); f->free_feature_vector(vec.vector, i, vec.do_free); } index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); CMath::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); CMath::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); CStringFeatures<char>* subset_copy=(CStringFeatures<char>*)f->copy_subset( feature_copy_subset); for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGVector<char> vec=subset_copy->get_feature_vector(i); SG_SPRINT("%i: ", i); for (index_t j=0; j<vec.vlen; ++j) SG_SPRINT("%c", vec.vector[j]); SG_SPRINT("\n"); subset_copy->free_feature_vector(vec.vector, i, vec.do_free); } for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGVector<char> vec=subset_copy->get_feature_vector(i); for (index_t j=0; j<vec.vlen; ++j) { index_t offset_idx=i+(offset_copy+offset_subset); ASSERT(vec.vector[j]==strings.strings[offset_idx].string[j]); } subset_copy->free_feature_vector(vec.vector, i, vec.do_free); } SG_UNREF(f); SG_UNREF(subset_copy); SG_FREE(feature_copy_subset.vector); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/Labels.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const int32_t num_labels=10; const int32_t num_classes=3; int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); const int32_t num_subset_idx=CMath::random(1, num_labels); /* create labels */ CLabels* labels=new CLabels(num_labels); for (index_t i=0; i<num_labels; ++i) labels->set_label(i, i%num_classes); SG_REF(labels); /* print labels */ SGVector<float64_t> labels_data=labels->get_labels(); CMath::display_vector(labels_data.vector, labels_data.vlen, "labels"); /* create subset indices */ SGVector<index_t> subset_idx(CMath::randperm(num_subset_idx), num_subset_idx); /* print subset indices */ CMath::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n\n-------------------\n" "applying subset to features\n" "-------------------\n"); labels->set_subset(new CSubset(subset_idx)); /* do some stuff do check and output */ ASSERT(labels->get_num_labels()==num_subset_idx); SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels()); for (index_t i=0; i<labels->get_num_labels(); ++i) { float64_t label=labels->get_label(i); SG_SPRINT("label %f:\n", label); ASSERT(label==labels_data.vector[labels->subset_idx_conversion(i)]); } /* remove features subset */SG_SPRINT("\n\n-------------------\n" "removing subset from features\n" "-------------------\n"); labels->remove_subset(); ASSERT(labels->get_num_labels()==num_labels); SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels()); for (index_t i=0; i<labels->get_num_labels(); ++i) { float64_t label=labels->get_label(i); SG_SPRINT("label %f:\n", label); ASSERT(label==labels_data.vector[i]); } SG_UNREF(labels); SG_SPRINT("\nEND\n"); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void check_transposed(CSimpleFeatures<int32_t>* features) { CSimpleFeatures<int32_t>* transposed=features->get_transposed(); CSimpleFeatures<int32_t>* double_transposed=transposed->get_transposed(); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> orig_vec=features->get_feature_vector(i); SGVector<int32_t> new_vec=double_transposed->get_feature_vector(i); ASSERT(orig_vec.vlen==new_vec.vlen); for (index_t j=0; j<orig_vec.vlen; j++) ASSERT(orig_vec.vector[j]==new_vec.vector[j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(orig_vec,i); double_transposed->free_feature_vector(new_vec, i); } SG_UNREF(transposed); SG_UNREF(double_transposed); } const int32_t num_vectors=6; const int32_t dim_features=6; int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); const int32_t num_subset_idx=CMath::random(1, num_vectors); /* create feature data matrix */ SGMatrix<int32_t> data(dim_features, num_vectors); /* fill matrix with random data */ for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<dim_features; ++j) data.matrix[i*dim_features+j]=CMath::random(-5, 5); } /* create simple features */ CSimpleFeatures<int32_t>* features=new CSimpleFeatures<int32_t> (data); SG_REF(features); /* print feature matrix */ CMath::display_matrix(data.matrix, data.num_rows, data.num_cols, "feature matrix"); /* create subset indices */ SGVector<index_t> subset_idx(CMath::randperm(num_subset_idx), num_subset_idx); /* print subset indices */ CMath::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n\n-------------------\n" "applying subset to features\n" "-------------------\n"); features->set_subset(new CSubset(subset_idx)); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_subset_idx); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> vec=features->get_feature_vector(i); SG_SPRINT("vector %d: ", i); CMath::display_vector(vec.vector, vec.vlen); for (index_t j=0; j<dim_features; ++j) ASSERT(vec.vector[j]==data.matrix[features->subset_idx_conversion( i)*num_vectors+j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(vec, i); } /* remove features subset */ SG_SPRINT("\n\n-------------------\n" "removing subset from features\n" "-------------------\n"); features->remove_subset(); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_vectors); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> vec=features->get_feature_vector(i); SG_SPRINT("vector %d: ", i); CMath::display_vector(vec.vector, vec.vlen); for (index_t j=0; j<dim_features; ++j) ASSERT(vec.vector[j]==data.matrix[features->subset_idx_conversion(i) *num_vectors+j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(vec, i); } SG_UNREF(features); SG_SPRINT("\nEND\n"); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SparseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const int32_t num_vectors=6; const int32_t dim_features=6; void check_transposed(CSparseFeatures<int32_t>* features) { CSparseFeatures<int32_t>* transposed=features->get_transposed(); CSparseFeatures<int32_t>* double_transposed=transposed->get_transposed(); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGSparseVector<int32_t> orig_vec=features->get_sparse_feature_vector(i); SGSparseVector<int32_t> new_vec= double_transposed->get_sparse_feature_vector(i); for (index_t j=0; j<dim_features; j++) ASSERT(orig_vec.features[j].entry==new_vec.features[j].entry); /* not necessary since feature matrix is in memory. for documentation */ features->free_sparse_feature_vector(orig_vec, i); double_transposed->free_sparse_feature_vector(new_vec, i); } SG_UNREF(transposed); SG_UNREF(double_transposed); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); const int32_t num_subset_idx=CMath::random(1, num_vectors); /* create feature data matrix */ SGMatrix<int32_t> data(dim_features, num_vectors); /* fill matrix with random data */ for (index_t i=0; i<num_vectors*dim_features; ++i) data.matrix[i]=CMath::random(1, 9); /* create sparse features */ CSparseFeatures<int32_t>* features=new CSparseFeatures<int32_t>(data); /* print dense feature matrix */ CMath::display_matrix(data.matrix, data.num_rows, data.num_cols, "dense feature matrix"); /* create subset indices */ SGVector<index_t> subset_idx(CMath::randperm(num_subset_idx), num_subset_idx); /* print subset indices */ CMath::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n-------------------\n" "applying subset to features\n" "-------------------\n"); features->set_subset(new CSubset(subset_idx)); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_subset_idx); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGSparseVector<int32_t> vec=features->get_sparse_feature_vector(i); SG_SPRINT("sparse_vector[%d]=", i); for (index_t j=0; j<vec.num_feat_entries; ++j) { SG_SPRINT("%d", vec.features[j].entry); if (j<vec.num_feat_entries-1) SG_SPRINT(","); } SG_SPRINT("\n"); for (index_t j=0; j<vec.num_feat_entries; ++j) { int32_t a=vec.features[j].entry; index_t ind=features->subset_idx_conversion(i)*num_vectors+j; int32_t b=data.matrix[ind]; ASSERT(a==b); } features->free_sparse_feature_vector(vec, i); } /* remove features subset */ SG_SPRINT("\n-------------------\n" "removing subset from features\n" "-------------------\n"); features->remove_subset(); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_vectors); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGSparseVector<int32_t> vec=features->get_sparse_feature_vector(i); SG_SPRINT("sparse_vector[%d]=", i); for (index_t j=0; j<vec.num_feat_entries; ++j) { SG_SPRINT("%d", vec.features[j].entry); if (j<vec.num_feat_entries-1) SG_SPRINT(","); } SG_SPRINT("\n"); for (index_t j=0; j<vec.num_feat_entries; ++j) ASSERT(vec.features[j].entry==data.matrix[i*num_vectors+j]); features->free_sparse_feature_vector(vec, i); } SG_UNREF(features); SG_FREE(data.matrix); exit_shogun(); return 0; }
#include <shogun/features/SimpleFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <stdio.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data float64_t* matrix = SG_MALLOC(float64_t, 6); for (int32_t i=0; i<6; i++) matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CSimpleFeatures<float64_t>* features= new CSimpleFeatures<float64_t>(); features->set_feature_matrix(matrix, 2, 3); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(features, features, 10, 0.5); // print kernel matrix for (int32_t i=0; i<3; i++) { for (int32_t j=0; j<3; j++) { SG_SPRINT("%f ", kernel->kernel(i,j)); } SG_SPRINT("\n"); } // free up memory SG_UNREF(kernel); exit_shogun(); return 0; }
#include <shogun/features/SimpleFeatures.h> #include <shogun/kernel/DotKernel.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <stdio.h> using namespace shogun; class CReverseLinearKernel : public CDotKernel { public: /** default constructor */ CReverseLinearKernel() : CDotKernel(0) { } /** destructor */ virtual ~CReverseLinearKernel() { } /** initialize kernel * * @param l features of left-hand side * @param r features of right-hand side * @return if initializing was successful */ virtual bool init(CFeatures* l, CFeatures* r) { CDotKernel::init(l, r); return init_normalizer(); } /** load kernel init_data * * @param src file to load from * @return if loading was successful */ virtual bool load_init(FILE* src) { return false; } /** save kernel init_data * * @param dest file to save to * @return if saving was successful */ virtual bool save_init(FILE* dest) { return false; } /** return what type of kernel we are * * @return kernel type UNKNOWN (as it is not part * officially part of shogun) */ virtual EKernelType get_kernel_type() { return K_UNKNOWN; } /** return the kernel's name * * @return name "Reverse Linear" */ inline virtual const char* get_name() const { return "ReverseLinear"; } protected: /** compute kernel function for features a and b * idx_{a,b} denote the index of the feature vectors * in the corresponding feature object * * @param idx_a index a * @param idx_b index b * @return computed kernel function at indices a,b */ virtual float64_t compute(int32_t idx_a, int32_t idx_b) { int32_t alen, blen; bool afree, bfree; float64_t* avec= ((CSimpleFeatures<float64_t>*) lhs)->get_feature_vector(idx_a, alen, afree); float64_t* bvec= ((CSimpleFeatures<float64_t>*) rhs)->get_feature_vector(idx_b, blen, bfree); ASSERT(alen==blen); float64_t result=0; for (int32_t i=0; i<alen; i++) result+=avec[i]*bvec[alen-i-1]; ((CSimpleFeatures<float64_t>*) lhs)->free_feature_vector(avec, idx_a, afree); ((CSimpleFeatures<float64_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree); return result; } }; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data float64_t* matrix = SG_MALLOC(float64_t, 6); for (int32_t i=0; i<6; i++) matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CSimpleFeatures<float64_t>* features= new CSimpleFeatures<float64_t>(); features->set_feature_matrix(matrix, 2, 3); // create reverse linear kernel CReverseLinearKernel* kernel = new CReverseLinearKernel(); kernel->init(features,features); // print kernel matrix for (int32_t i=0; i<3; i++) { for (int32_t j=0; j<3; j++) SG_SPRINT("%f ", kernel->kernel(i,j)); SG_SPRINT("\n"); } // free up memory SG_UNREF(kernel); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Soeren Sonnenburg * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <shogun/io/SGIO.h> #include <shogun/lib/Time.h> #include <shogun/lib/ShogunException.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/DynInt.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_warning(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_error(FILE* target, const char* str) { fprintf(target, "%s", str); } void gen_ints(uint256_t* &a, uint32_t* &b, uint32_t len) { a=SG_MALLOC(uint256_t, len); b=SG_MALLOC(uint32_t, len); CMath::init_random(17); for (uint32_t i=0; i<len; i++) { uint64_t r[4]={(uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random()}; a[len-i-1]=r; b[len-i-1]=i; } } const int LEN = 5*1024*1024; int main() { init_shogun(&print_message, &print_warning, &print_error); try { uint256_t* a; uint32_t* b; CTime t; t.io->set_loglevel(MSG_DEBUG); SG_SPRINT("gen data.."); t.start(); gen_ints(a,b, LEN); t.cur_time_diff(true); SG_SPRINT("qsort.."); t.start(); CMath::qsort_index(a, b, LEN); t.cur_time_diff(true); SG_SPRINT("\n\n"); for (uint32_t i=0; i<10; i++) { SG_SPRINT("a[%d]=", i); a[i].print_hex(); SG_SPRINT("\n"); } SG_SPRINT("\n\n"); a[0]=(uint64_t[4]) {1,2,3,4}; uint64_t val[4]={5,6,7,8}; a[1]=val; a[2]=a[0]; CMath::swap(a[0],a[1]); printf("a[0]==a[1] %d\n", (int) (a[0] == a[1])); printf("a[0]<a[1] %d\n", (int) (a[0] < a[1])); printf("a[0]<=a[1] %d\n", (int) (a[0] <= a[1])); printf("a[0]>a[1] %d\n", (int) (a[0] > a[1])); printf("a[0]>=a[1] %d\n", (int) (a[0] >= a[1])); printf("a[0]==a[0] %d\n", (int) (a[0] == a[0])); printf("a[0]<a[0] %d\n", (int) (a[0] < a[0])); printf("a[0]<=a[0] %d\n", (int) (a[0] <= a[0])); printf("a[0]>a[0] %d\n", (int) (a[0] > a[0])); printf("a[0]>=a[0] %d\n", (int) (a[0] >= a[0])); SG_SPRINT("\n\n"); for (uint32_t i=0; i<10 ; i++) { SG_SPRINT("a[%d]=", i); a[i].print_hex(); printf("\n"); } SG_FREE(a); SG_FREE(b); } catch(ShogunException & sh) { SG_SPRINT("%s",sh.get_exception_string()); } exit_shogun(); return 0; }
#include <shogun/lib/FibonacciHeap.h> #include <stdio.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); double v[8] = {0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7}; int k[8] = {0,1,2,3,4,5,6,7}; CFibonacciHeap* heap = new CFibonacciHeap(8); for (int i=0; i<8; i++) heap->insert(k[i],v[i]); int k_extract; double v_extract; for (int i=0; i<8; i++) { k_extract = heap->extract_min(v_extract); if (v[k_extract]!=v_extract) { printf("Fibonacci heap goes wrong.\n"); } } delete heap; exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/GCArray.h> #include <shogun/kernel/Kernel.h> #include <shogun/kernel/GaussianKernel.h> #include <stdio.h> using namespace shogun; const int l=10; int main(int argc, char** argv) { init_shogun(); // create array a CGCArray<CKernel*> kernels(l); for (int i=0; i<l; i++) kernels.set(new CGaussianKernel(10, 1.0), i); for (int i=0; i<l; i++) printf("kernels[%d]=%p\n", i, kernels.get(i)); exit_shogun(); return 0; }
#include <shogun/lib/Hash.h> #include <stdio.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); uint8_t array[4]={0,1,2,3}; printf("hash(0)=%0x\n", CHash::MurmurHash2(&array[0], 1, 0xDEADBEAF)); printf("hash(1)=%0x\n", CHash::MurmurHash2(&array[1], 1, 0xDEADBEAF)); printf("hash(2)=%0x\n", CHash::MurmurHash2(&array[0], 2, 0xDEADBEAF)); printf("hash(3)=%0x\n", CHash::MurmurHash2(&array[0], 4, 0xDEADBEAF)); uint32_t h=CHash::IncrementalMurmurHash2(array[0], 0xDEADBEAF); printf("inc_hash(0)=%0x\n", h); h=CHash::IncrementalMurmurHash2(array[1], h); printf("inc_hash(1)=%0x\n", h); h=CHash::IncrementalMurmurHash2(array[2], h); printf("inc_hash(2)=%0x\n", h); h=CHash::IncrementalMurmurHash2(array[3], h); printf("inc_hash(3)=%0x\n", h); exit_shogun(); return 0; }
#include <shogun/lib/HashSet.h> #include <stdio.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); double v[8] = {0.0,0.0,0.1,0.1,0.2,0.2,0.3,0.3}; CHashSet* set = new CHashSet(8); for (int i=0; i<8; i++) set->insert_key(i,v[i]); delete set; exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/memory.h> #include <shogun/lib/IndirectObject.h> #include <shogun/mathematics/Math.h> #include <shogun/base/SGObject.h> #include <stdio.h> using namespace shogun; const int l=10; int main(int argc, char** argv) { init_shogun(); // create array a int32_t* a=SG_MALLOC(int32_t, l); for (int i=0; i<l; i++) a[i]=l-i; typedef CIndirectObject<int32_t, int32_t**> INDIRECT; // create array of indirect objects pointing to array a INDIRECT::set_array(&a); INDIRECT* x = SG_MALLOC(INDIRECT, l); INDIRECT::init_slice(x, l); printf("created array a and indirect object array x pointing to a.\n\n"); for (int i=0; i<l; i++) printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i])); //sort the array CMath::qsort(x, l); printf("\n\nvoila! sorted indirect object array x, keeping a const.\n\n"); for (int i=0; i<l; i++) printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i])); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/mathematics/arpack.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); #ifdef HAVE_ARPACK int N = 100; int nev = 2; double* matrix = new double[N*N]; double* eigenvalues = new double[nev]; double* eigenvectors = new double[nev*N]; for (int i=0; i<N; i++) { for (int j=0; j<N; j++) matrix[i*N+j] = i*i+j*j; } int status = 0; arpack_dsaeupd_wrap(matrix, NULL, N, 2, "LM", 1, false, 0.0, 0.0, eigenvalues, eigenvectors, status); if (status!=0) return -1; arpack_dsaeupd_wrap(matrix, NULL, N, 2, "BE", 3, false, 1.0, 0.0, eigenvalues, eigenvectors, status); if (status!=0) return -1; delete[] eigenvalues; delete[] eigenvectors; delete[] matrix; #endif // HAVE_ARPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/mathematics/Statistics.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); SGVector<float64_t> data(10, true); CMath::range_fill_vector(data.vector, data.vlen, 1.0); float64_t low, up, mean; float64_t error_prob=0.05; mean=CStatistics::confidence_intervals_mean(data, error_prob, low, up); SG_SPRINT("sample mean: %f. True mean lies in [%f,%f] with %f%%\n", mean, low, up, 100*(1-error_prob)); data.free_vector(); SG_SPRINT("\nEND\n"); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/mathematics/lapack.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); #ifdef HAVE_LAPACK // size of square matrix int N = 100; // square matrix double* matrix = new double[N*N]; // for storing eigenpairs double* eigenvalues = new double[N]; double* eigenvectors = new double[N*N]; // for SVD double* U = new double[N*N]; double* s = new double[N*N]; double* Vt = new double[N*N]; // status (should be zero) int status; // DSYGVX for (int i=0; i<N; i++) { for (int j=0; j<N; j++) matrix[i*N+j] = (i-j)/(i+j+1); matrix[i*N+i] += 10; } status = 0; wrap_dsygvx(1,'V','U',N,matrix,N,matrix,N,1,3,eigenvalues,eigenvectors,&status); if (status!=0) { printf("DSYGVX failed with code %d\n",status); return -1; } delete[] eigenvectors; // DGEQRF+DORGQR status = 0; double* tau = new double[N]; wrap_dgeqrf(N,N,matrix,N,tau,&status); wrap_dorgqr(N,N,N,matrix,N,tau,&status); if (status!=0) { printf("DGEQRF/DORGQR failed with code %d\n",status); return -1; } delete[] tau; // DGESVD for (int i=0; i<N; i++) { for (int j=0; j<N; j++) matrix[i*N+j] = i*i+j*j; } status = 0; wrap_dgesvd('A','A',N,N,matrix,N,s,U,N,Vt,N,&status); if (status!=0) { printf("DGESVD failed with code %d\n",status); return -1; } delete[] s; delete[] U; delete[] Vt; // DSYEV for (int i=0; i<N; i++) { for (int j=0; j<N; j++) matrix[i*N+j] = i*i+j*j; } status = 0; wrap_dsyev('V','U',N,matrix,N,eigenvalues,&status); if (status!=0) { printf("DSYEV failed with code %d\n",status); return -1; } delete[] eigenvalues; delete[] matrix; #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/classifier/svm/LibSVM.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C1"); root->append_child(c); c->build_values(1.0, 2.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("width"); param_gaussian_kernel_width->build_values(1.0, 2.0, R_EXP); param_gaussian_kernel->append_child(param_gaussian_kernel_width); return root; } void apply_parameter_tree(CDynamicObjectArray<CParameterCombination>* combinations) { /* create some data */ float64_t* matrix=SG_MALLOC(float64_t, 6); for (index_t i=0; i<6; i++) matrix[i]=i; /* create three 2-dimensional vectors * to avoid deleting these, REF now and UNREF when finished */ CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t> (); SG_REF(features); features->set_feature_matrix(matrix, 2, 3); /* create three labels, will be handed to svm and automaticall deleted */ CLabels* labels=new CLabels(3); SG_REF(labels); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); /* create libsvm with C=10 and train */ CLibSVM* svm=new CLibSVM(); SG_REF(svm); svm->set_labels(labels); for (index_t i=0; i<combinations->get_num_elements(); ++i) { SG_SPRINT("applying:\n"); CParameterCombination* current_combination=combinations->get_element(i); current_combination->print_tree(); Parameter* current_parameters=svm->m_parameters; current_combination->apply_to_modsel_parameter(current_parameters); SG_UNREF(current_combination); /* get kernel to set features, get_kernel SG_REF's the kernel */ CKernel* kernel=svm->get_kernel(); kernel->init(features, features); svm->train(); /* classify on training examples */ for (index_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, svm->apply(i)); /* unset features and SG_UNREF kernel */ kernel->cleanup(); SG_UNREF(kernel); SG_SPRINT("----------------\n\n"); } /* free up memory */ SG_UNREF(features); SG_UNREF(labels); SG_UNREF(svm); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* create example tree */ CModelSelectionParameters* tree=create_param_tree(); tree->print_tree(); SG_SPRINT("----------------------------------\n"); /* build combinations of parameter trees */ CDynamicObjectArray<CParameterCombination>* combinations=tree->get_combinations(); apply_parameter_tree(combinations); /* print and directly delete them all */ for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=combinations->get_element(i); SG_UNREF(combination); } SG_UNREF(combinations); /* delete example tree (after processing of combinations because CSGObject * (namely the kernel) of the tree is SG_UNREF'ed (and not REF'ed anywhere * else) */ SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-1.0, 1.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); CModelSelectionParameters* gaussian_kernel_width= new CModelSelectionParameters("width"); gaussian_kernel_width->build_values(-1.0, 1.0, R_EXP, 1.0, 2.0); param_gaussian_kernel->append_child(gaussian_kernel_width); root->append_child(param_gaussian_kernel); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1.0, 2.0, R_LINEAR); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1.0, 2.0, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ float64_t* matrix=SG_MALLOC(float64_t, num_vectors*dim_vectors); CLabels* labels=new CLabels(num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix[i]=CMath::randn_double(); /* create num_feautres 2-dimensional vectors */ CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t> (); features->set_feature_matrix(matrix, dim_vectors, num_vectors); /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm */ CLibSVM* classifier=new CLibSVM(); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(1); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); CParameterCombination* best_combination=grid_search->select_model(); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); CrossValidationResult result=cross->evaluate(); SG_SPRINT("result: "); result.print_result(); /* clean up destroy result parameter */ SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/classifier/svm/LibLinear.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-2.0, 2.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-2.0, 2.0, R_EXP); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); #ifdef HAVE_LAPACK int32_t num_subsets=5; int32_t num_features=11; /* create some data */ float64_t* matrix=SG_MALLOC(float64_t, num_features*2); for (int32_t i=0; i<num_features*2; i++) matrix[i]=i; /* create num_feautres 2-dimensional vectors */ CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t> (); features->set_feature_matrix(matrix, 2, num_features); /* create three labels */ CLabels* labels=new CLabels(num_features); for (index_t i=0; i<num_features; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create linear classifier (use -s 2 option to avoid warnings) */ CLibLinear* classifier=new CLibLinear(L2R_L2LOSS_SVC); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); CParameterCombination* best_combination=grid_search->select_model(); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); CrossValidationResult result=cross->evaluate(); result.print_result(); /* clean up */ SG_UNREF(best_combination); SG_UNREF(grid_search); #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/features/Labels.h> #include <shogun/features/StringFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/DistantSegmentsKernel.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(1.0, 2.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(1.0, 2.0, R_EXP); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel= new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* ds_kernel_delta= new CModelSelectionParameters("delta"); ds_kernel_delta->build_values(1, 2, R_LINEAR); param_ds_kernel->append_child(ds_kernel_delta); CModelSelectionParameters* ds_kernel_theta= new CModelSelectionParameters("theta"); ds_kernel_theta->build_values(1, 2, R_LINEAR); param_ds_kernel->append_child(ds_kernel_theta); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_strings=10; index_t max_string_length=20; index_t min_string_length=max_string_length/2; index_t num_subsets=num_strings/3; SGStringList<char> strings(num_strings, max_string_length); for (index_t i=0; i<num_strings; ++i) { index_t len=CMath::random(min_string_length, max_string_length); SGString<char> current(len); SG_SPRINT("string %i: \"", i); /* fill with random uppercase letters (ASCII) */ for (index_t j=0; j<len; ++j) { current.string[j]=(char)CMath::random('A', 'Z'); char* string=new char[2]; string[0]=current.string[j]; string[1]='\0'; SG_SPRINT("%s", string); delete[] string; } SG_SPRINT("\"\n"); strings.strings[i]=current; } /* create num_feautres 2-dimensional vectors */ CStringFeatures<char>* features=new CStringFeatures<char>(strings, ALPHANUM); /* create labels, two classes */ CLabels* labels=new CLabels(num_strings); for (index_t i=0; i<num_strings; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm classifier */ CLibSVM* classifier=new CLibSVM(); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(2); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); CParameterCombination* best_combination=grid_search->select_model(); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); CrossValidationResult result=cross->evaluate(); SG_SPRINT("result: "); result.print_result(); /* clean up */ SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/kernel/DistantSegmentsKernel.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* build_complex_example_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1.0, 1.0, R_EXP); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1.0, 1.0, R_EXP); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1.0, 12.0, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("width"); param_gaussian_kernel_width->build_values(1.0, 2.0, R_EXP); param_gaussian_kernel->append_child(param_gaussian_kernel_width); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* param_ds_kernel_delta= new CModelSelectionParameters("delta"); param_ds_kernel_delta->build_values(1.0, 2.0, R_EXP); param_ds_kernel->append_child(param_ds_kernel_delta); CModelSelectionParameters* param_ds_kernel_theta= new CModelSelectionParameters("theta"); param_ds_kernel_theta->build_values(1.0, 2.0, R_EXP); param_ds_kernel->append_child(param_ds_kernel_theta); return root; } CModelSelectionParameters* build_sgobject_no_childs_tree() { CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); return param_power_kernel; } CModelSelectionParameters* build_leaf_node_tree() { CModelSelectionParameters* c_1=new CModelSelectionParameters("C1"); c_1->build_values(1.0, 1.0, R_EXP); return c_1; } CModelSelectionParameters* build_root_no_childs_tree() { return new CModelSelectionParameters(); } CModelSelectionParameters* build_root_value_childs_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c_1=new CModelSelectionParameters("C1"); root->append_child(c_1); c_1->build_values(1.0, 1.0, R_EXP); CModelSelectionParameters* c_2=new CModelSelectionParameters("C2"); root->append_child(c_2); c_2->build_values(1.0, 1.0, R_EXP); return root; } CModelSelectionParameters* build_root_sg_object_child_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); return root; } CModelSelectionParameters* build_root_sg_object_child_value_child_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1.0, 1.0, R_EXP); root->append_child(param_power_kernel); return root; } void test_get_combinations(CModelSelectionParameters* tree) { tree->print_tree(); /* build combinations of parameter trees */ CDynamicObjectArray<CParameterCombination>* combinations=tree->get_combinations(); /* print and directly delete them all */ SG_SPRINT("----------------------------------\n"); for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=combinations->get_element(i); combination->print_tree(); SG_UNREF(combination); } SG_UNREF(combinations); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); CModelSelectionParameters* tree; tree=build_root_no_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_leaf_node_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_sgobject_no_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_value_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_sg_object_child_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_sg_object_child_value_child_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_complex_example_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/lib/DynamicObjectArray.h> #include <stdlib.h> using namespace std; using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_parameter_set_multiplication() { SG_SPRINT("\ntest_parameter_set_multiplication()\n"); DynArray<Parameter*> set1; DynArray<Parameter*> set2; SGVector<float64_t> param_vector(8, true); CMath::range_fill_vector(param_vector.vector, param_vector.vlen); SGVector<Parameter*> parameters(4, true); parameters.vector[0]=new Parameter(); parameters.vector[0]->add(¶m_vector.vector[0], "0"); parameters.vector[0]->add(¶m_vector.vector[1], "1"); set1.append_element(parameters.vector[0]); parameters.vector[1]=new Parameter(); parameters.vector[1]->add(¶m_vector.vector[2], "2"); parameters.vector[1]->add(¶m_vector.vector[3], "3"); set1.append_element(parameters.vector[1]); parameters.vector[2]=new Parameter(); parameters.vector[2]->add(¶m_vector.vector[4], "4"); parameters.vector[2]->add(¶m_vector.vector[5], "5"); set2.append_element(parameters.vector[2]); parameters.vector[3]=new Parameter(); parameters.vector[3]->add(¶m_vector.vector[6], "6"); parameters.vector[3]->add(¶m_vector.vector[7], "7"); set2.append_element(parameters.vector[3]); DynArray<Parameter*>* result=new DynArray<Parameter*>();//CParameterCombination::parameter_set_multiplication(set1, set2); for (index_t i=0; i<result->get_num_elements(); ++i) { Parameter* p=result->get_element(i); for (index_t j=0; j<p->get_num_parameters(); ++j) SG_SPRINT("%s ", p->get_parameter(j)->m_name); SG_SPRINT("\n"); delete p; } delete result; for (index_t i=0; i<4; ++i) delete parameters.vector[i]; param_vector.free_vector(); parameters.free_vector(); } void test_leaf_sets_multiplication() { SG_SPRINT("\ntest_leaf_sets_multiplication()\n"); SGVector<float64_t> param_vector(6, true); CMath::range_fill_vector(param_vector.vector, param_vector.vlen); CDynamicObjectArray<CDynamicObjectArray<CParameterCombination> > sets; CParameterCombination* new_root=new CParameterCombination(); SG_REF(new_root); CDynamicObjectArray<CParameterCombination>* current=new CDynamicObjectArray< CParameterCombination>(); sets.append_element(current); Parameter* p=new Parameter(); p->add(¶m_vector.vector[0], "0"); CParameterCombination* pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[1], "1"); pc=new CParameterCombination(p); current->append_element(pc); /* first case: one element */ CDynamicObjectArray<CParameterCombination>* result_simple= CParameterCombination::leaf_sets_multiplication(sets, new_root); SG_SPRINT("one set\n"); for (index_t i=0; i<result_simple->get_num_elements(); ++i) { CParameterCombination* current=result_simple->get_element(i); current->print_tree(); SG_UNREF(current); } SG_UNREF(result_simple); /* now more elements are created */ current=new CDynamicObjectArray<CParameterCombination>(); sets.append_element(current); p=new Parameter(); p->add(¶m_vector.vector[2], "2"); pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[3], "3"); pc=new CParameterCombination(p); current->append_element(pc); current=new CDynamicObjectArray<CParameterCombination>(); sets.append_element(current); p=new Parameter(); p->add(¶m_vector.vector[4], "4"); pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[5], "5"); pc=new CParameterCombination(p); current->append_element(pc); /* second case: more element */ CDynamicObjectArray<CParameterCombination>* result_complex= CParameterCombination::leaf_sets_multiplication(sets, new_root); SG_SPRINT("more sets\n"); for (index_t i=0; i<result_complex->get_num_elements(); ++i) { CParameterCombination* current=result_complex->get_element(i); current->print_tree(); SG_UNREF(current); } SG_UNREF(result_complex); SG_UNREF(new_root); param_vector.free_vector(); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_parameter_set_multiplication(); test_leaf_sets_multiplication(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/kernel/DistantSegmentsKernel.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1, 2, R_EXP); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1, 2, R_EXP); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1, 2, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("width"); param_gaussian_kernel_width->build_values(1, 2, R_EXP); param_gaussian_kernel->append_child(param_gaussian_kernel_width); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* param_ds_kernel_delta= new CModelSelectionParameters("delta"); param_ds_kernel_delta->build_values(1, 2, R_EXP); param_ds_kernel->append_child(param_ds_kernel_delta); CModelSelectionParameters* param_ds_kernel_theta= new CModelSelectionParameters("theta"); param_ds_kernel_theta->build_values(1, 2, R_EXP); param_ds_kernel->append_child(param_ds_kernel_theta); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* create example tree */ CModelSelectionParameters* tree=create_param_tree(); SG_REF(tree); tree->print_tree(); /* build combinations of parameter trees */ CDynamicObjectArray<CParameterCombination>* combinations=tree->get_combinations(); /* print and directly delete them all */ SG_SPRINT("----------------------------------\n"); for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=combinations->get_element(i); combination->print_tree(); SG_UNREF(combination); } SG_UNREF(combinations); /* delete example tree */ SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } /* number of features and their dimension */ int32_t n=6; int main(int argc, char** argv) { init_shogun(&print_message); /* create some random data */ float64_t* matrix = SG_MALLOC(float64_t, n*n); for(int32_t i=0; i<n*n; ++i) matrix[i]=CMath::random((float64_t)-n,(float64_t)n); CMath::display_matrix(matrix, n, n); /* create n n-dimensional feature vectors */ CSimpleFeatures<float64_t>* features= new CSimpleFeatures<float64_t>(); features->set_feature_matrix(matrix, n, n); /* create gaussian kernel with cache 10MB, width will be changed later */ CGaussianKernel* kernel = new CGaussianKernel(10, 0); kernel->init(features, features); /* create n labels (+1,-1,+1,-1,...) */ CLabels* labels=new CLabels(n); for (int32_t i=0; i<n; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* create libsvm with C=10 and produced labels */ CLibSVM* svm=new CLibSVM(10, kernel, labels); /* iterate over different width parameters */ for (int32_t i=0; i<10; ++i) { SG_SPRINT("\n\ncurrent kernel width: 2^%d=%f\n", i, CMath::pow(2.0,i)); float64_t width=CMath::pow(2.0,i); /* create parameter to change current kernel width */ Parameter* param=new Parameter(); param->add(&width, "width", ""); /* tell kernel to use the newly produced parameter */ kernel->m_parameters->set_from_parameters(param); /* print kernel matrix */ for (int32_t i=0; i<n; i++) { for (int32_t j=0; j<n; j++) SG_SPRINT("%f ", kernel->kernel(i,j)); SG_SPRINT("\n"); } /* train and classify */ svm->train(); for (int32_t i=0; i<n; ++i) SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i, svm->apply(i), i, labels->get_label(i)); delete param; } /* free up memory */ SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/features/Labels.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } /* number of features and their dimension, number of kernels */ const int32_t n=7; int main(int argc, char** argv) { init_shogun(&print_message); /* create some random data and hand it to each kernel */ float64_t* matrix=SG_MALLOC(float64_t, n*n); for (int32_t k=0; k<n*n; ++k) matrix[k]=CMath::random((float64_t) -n, (float64_t) n); SG_SPRINT("feature data:\n"); CMath::display_matrix(matrix, n, n); CSimpleFeatures<float64_t>* features=new CSimpleFeatures<float64_t> (); features->set_feature_matrix(matrix, n, n); /* create n kernels with n features each */ CGaussianKernel** kernels=SG_MALLOC(CGaussianKernel*, n); for (int32_t i=0; i<n; ++i) { kernels[i]=new CGaussianKernel(10, CMath::random(0.0, (float64_t)n*n)); /* hand data to kernel */ kernels[i]->init(features, features); } /* create n parameter instances, each with one kernel */ Parameter** parameters=SG_MALLOC(Parameter*, n); for (int32_t i=0; i<n; ++i) { parameters[i]=new Parameter(); parameters[i]->add((CSGObject**)&kernels[i], "kernel", ""); } /* create n labels (+1,-1,+1,-1,...) */ CLabels* labels=new CLabels(n); for (int32_t i=0; i<n; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* create libsvm with C=10 and produced labels */ CLibSVM* svm=new CLibSVM(10, NULL, labels); /* iterate over all parameter instances and set them as subkernel */ for (int32_t i=0; i<n; ++i) { SG_SPRINT("\nkernel %d has width %f\n", i, kernels[i]->get_width()); /* change kernel, old one is UNREF'ed, new one is REF'ed */ svm->m_parameters->set_from_parameters(parameters[i]); /* train and classify with the different kernels */ svm->train(); for (int32_t i=0; i<n; ++i) SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i, svm->apply(i), i, labels->get_label(i)); } /* free up memory: delete all Parameter instances */ for (int32_t i=0; i<n; ++i) delete parameters[i]; /* delete created arrays */ SG_FREE(kernels); SG_FREE(parameters); /* this also handles features, labels, and last kernel in kernels[n-1] */ SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/kernel/DistantSegmentsKernel.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_modsel_parameters(CSGObject* object) { SGVector<char*> modsel_params=object->get_modelsel_names(); SG_SPRINT("Parameters of %s available for model selection:\n", object->get_name()); char* type_string=SG_MALLOC(char, 100); for (index_t i=0; i<modsel_params.vlen; ++i) { /* extract current name, ddescription and type, and print them */ const char* name=modsel_params.vector[i]; index_t index=object->get_modsel_param_index(name); TSGDataType type=object->m_model_selection_parameters->get_parameter( index)->m_datatype; type.to_string(type_string, 100); SG_SPRINT("\"%s\": \"%s\", %s\n", name, object->get_modsel_param_descr(name), type_string); } SG_FREE(type_string); modsel_params.destroy_vector(); SG_SPRINT("\n"); } int main(int argc, char** argv) { init_shogun(&print_message); #ifndef HAVE_LAPACK CSGObject* object; object=new CLibSVM(); print_modsel_parameters(object); SG_UNREF(object); object=new CLibLinear(); print_modsel_parameters(object); SG_UNREF(object); object=new CDistantSegmentsKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CGaussianKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CPowerKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CMinkowskiMetric(); print_modsel_parameters(object); SG_UNREF(object); #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/io/SGIO.h> #include <shogun/mathematics/Math.h> #include <shogun/base/Parameter.h> #include <shogun/kernel/DistantSegmentsKernel.h> #include <shogun/kernel/GaussianKernel.h> using namespace shogun; int32_t max=3; const float64_t initial_value=1; const float64_t another_value=2; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } bool test_float_scalar() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t original_parameter=initial_value; original_parameter_list->add(&original_parameter, "param", ""); float64_t new_parameter=another_value; Parameter* new_parameter_list=new Parameter(); new_parameter_list->add(&new_parameter, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); result&=original_parameter==another_value; delete original_parameter_list; delete new_parameter_list; return result; } bool test_float_vector() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t* original_parameter=SG_MALLOC(float64_t, max); CMath::fill_vector(original_parameter, max, initial_value); original_parameter_list->add_vector(&original_parameter, &max, "param", ""); float64_t* new_parameter=SG_MALLOC(float64_t, max); CMath::fill_vector(new_parameter, max, another_value); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_vector(&new_parameter, &max, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) result&=original_parameter[i]==another_value; delete original_parameter; delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_float_matrix() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t* original_parameter=SG_MALLOC(float64_t, max*max); CMath::fill_vector(original_parameter, max*max, initial_value); original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", ""); float64_t* new_parameter=SG_MALLOC(float64_t, max*max); CMath::fill_vector(new_parameter, max*max, another_value); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max*max; ++i) result&=original_parameter[i]==another_value; delete original_parameter; delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_scalar() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject* original_parameter=new CGaussianKernel(10, 10); SG_REF(original_parameter); original_parameter_list->add(&original_parameter, "kernel", ""); CSGObject* new_parameter=new CDistantSegmentsKernel(10, 10, 10); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add(&new_parameter, "kernel", ""); /* note: old_parameter is SG_UNREF'ed, new one SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); result&=original_parameter==new_parameter; /* old original kernel was deleted by shogun's SG_UNREF */ SG_UNREF(new_parameter); delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_vector() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject** original_parameter=SG_MALLOC(CSGObject*, max); for (int32_t i=0; i<max; ++i) { original_parameter[i]=new CDistantSegmentsKernel(1, 1, 1); SG_REF(original_parameter[i]); } original_parameter_list->add_vector(&original_parameter, &max, "param", ""); CSGObject** new_parameter=SG_MALLOC(CSGObject*, max); for (int32_t i=0; i<max; ++i) new_parameter[i]=new CDistantSegmentsKernel(2, 2, 2); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_vector(&new_parameter, &max, "param", ""); /* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) result&=original_parameter[i]==new_parameter[i]; /* old original kernels were deleted by shogun's SG_UNREF */ delete original_parameter; for (int32_t i=0; i<max; ++i) SG_UNREF(new_parameter[i]); delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_matrix() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject** original_parameter=SG_MALLOC(CSGObject*, max*max); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) { original_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1); SG_REF(original_parameter[j*max+i]); } } original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", ""); CSGObject** new_parameter=SG_MALLOC(CSGObject*, max*max); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) new_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1); } Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", ""); /* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) result&=original_parameter[j*max+i]==new_parameter[j*max+i]; } /* old original kernels were deleted by shogun's SG_UNREF */ delete original_parameter; for (int32_t i=0; i<max*max; ++i) SG_UNREF(new_parameter[i]); delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); bool result=true; /* test wheater set_from_parameters works for these types */ result&=test_float_scalar(); result&=test_sgobject_scalar(); result&=test_sgobject_vector(); result&=test_sgobject_matrix(); result&=test_float_matrix(); result&=test_float_vector(); if (result) SG_SPRINT("SUCCESS!\n"); else SG_SPRINT("FAILURE!\n"); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2008-2010 Soeren Sonnenburg, Alexander Binder * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society * Copyright (C) 2010 Berlin Institute of Technology */ #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/preproc/RandomFourierGaussPreproc.h> #include <shogun/features/SimpleFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/lib/Mathematics.h> #include <shogun/lib/common.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> #include <vector> #include <iostream> #include <algorithm> #include <ctime> using namespace shogun; void gen_rand_data(float64_t* & feat, float64_t* & lab,const int32_t num,const int32_t dims,const float64_t dist) { lab=SG_MALLOC(float64_t, num); feat=SG_MALLOC(float64_t, num*dims); for (int32_t i=0; i<num; i++) { if (i<num/2) { lab[i]=-1.0; for (int32_t j=0; j<dims; j++) feat[i*dims+j]=CMath::random(0.0,1.0)+dist; } else { lab[i]=1.0; for (int32_t j=0; j<dims; j++) feat[i*dims+j]=CMath::random(0.0,1.0)-dist; } } CMath::display_vector(lab,num); CMath::display_matrix(feat,dims, num); } int main() { time_t a,b; int32_t dims=6000; float64_t dist=0.5; int32_t randomfourier_featurespace_dim=500; // the typical application of the below preprocessor are cases with high input dimensionalities of some thousands int32_t numtr=3000; int32_t numte=3000; const int32_t feature_cache=0; const int32_t kernel_cache=0; // important trick for RFgauss to work: kernel width is set such that average inner kernel distance is close one // the rfgauss approximation breaks down if average inner kernel distances (~~ kernel width to small compared to variance of data) are too large // try rbf_width=0.1 to see how it fails! - you will see the problem in the large number of negative kernel entries (numnegratio) for the rfgauss linear kernel const float64_t rbf_width=4000; const float64_t svm_C=10; const float64_t svm_eps=0.001; init_shogun(); float64_t* feattr(NULL); float64_t* labtr(NULL); a=time(NULL); std::cout << "generating train data"<<std::endl; gen_rand_data(feattr,labtr,numtr,dims,dist); float64_t* feattr2=SG_MALLOC(float64_t, numtr*dims); std::copy(feattr,feattr+numtr*dims,feattr2); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; float64_t* featte(NULL); float64_t* labte(NULL); a=time(NULL); std::cout << "generating test data"<<std::endl; gen_rand_data(featte,labte,numte,dims,dist); float64_t* featte2=SG_MALLOC(float64_t, numtr*dims); std::copy(featte,featte+numtr*dims,featte2); float64_t* featte3=SG_MALLOC(float64_t, numtr*dims); std::copy(featte,featte+numtr*dims,featte3); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create train labels CLabels* labelstr=new CLabels(); labelstr->set_labels(labtr, numtr); SG_REF(labelstr); // create train features a=time(NULL); std::cout << "initializing shogun train feature"<<std::endl; CSimpleFeatures<float64_t>* featurestr1 = new CSimpleFeatures<float64_t>(feature_cache); SG_REF(featurestr1); featurestr1->set_feature_matrix(feattr, dims, numtr); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create gaussian kernel // std::cout << "computing gaussian train kernel"<<std::endl; CGaussianKernel* kerneltr1 = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kerneltr1); kerneltr1->init(featurestr1, featurestr1); // create svm via libsvm and train CLibSVM* svm1 = new CLibSVM(svm_C, kerneltr1, labelstr); SG_REF(svm1); svm1->set_epsilon(svm_eps); a=time(NULL); std::cout << "training SVM over gaussian kernel"<<std::endl; svm1->train(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; printf("num_sv:%d b:%f\n", svm1->get_num_support_vectors(), svm1->get_bias()); a=time(NULL); std::cout << "initializing shogun test feature"<<std::endl; CSimpleFeatures<float64_t>* featureste1 = new CSimpleFeatures<float64_t>(feature_cache); SG_REF(featureste1); featureste1->set_feature_matrix(featte, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing gaussian test kernel"<<std::endl; CGaussianKernel* kernelte1 = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kernelte1); kernelte1->init(featurestr1, featureste1); svm1->set_kernel(kernelte1); a=time(NULL); std::cout << "scoring gaussian test kernel"<<std::endl; std::vector<float64_t> scoreste1(numte); float64_t err1=0; for(int32_t i=0; i< numte ;++i) { scoreste1[i]=svm1->classify_example(i); if(scoreste1[i]*labte[i]<0) { err1+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // *************************************** // now WITH the preprocessor a=time(NULL); std::cout << "initializing preprocessor"<<std::endl; CRandomFourierGaussPreproc *rfgauss=new CRandomFourierGaussPreproc; SG_REF(rfgauss); rfgauss->get_io()->set_loglevel(MSG_DEBUG); // ************************************************************ // set parameters of the preprocessor // ******************************** !!!!!!!!!!!!!!!!! CMath::sqrt(rbf_width/2.0) rfgauss->set_kernelwidth( CMath::sqrt(rbf_width/2.0) ); rfgauss->set_dim_input_space(dims); rfgauss->set_dim_feature_space(randomfourier_featurespace_dim); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create train features a=time(NULL); std::cout << "initializing shogun train feature again"<<std::endl; CSimpleFeatures<float64_t>* featurestr2 = new CSimpleFeatures<float64_t>(feature_cache); SG_REF(featurestr2); featurestr2->set_feature_matrix(feattr2, dims, numtr); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** // add preprocessor featurestr2->add_preproc(rfgauss); // apply preprocessor a=time(NULL); std::cout << "applying preprocessor to train feature"<<std::endl; featurestr2->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // save random coefficients and state data of preprocessor for use with a new preprocessor object (see lines following "// now the same with a new preprocessor to show the usage of set_randomcoefficients" // Alternative: use built-in serialization to load and save state data from/to a file!!! float64_t *randomcoeff_additive2, * randomcoeff_multiplicative2; int32_t dim_feature_space2,dim_input_space2; float64_t kernelwidth2; rfgauss->get_randomcoefficients(&randomcoeff_additive2, &randomcoeff_multiplicative2, &dim_feature_space2, &dim_input_space2, &kernelwidth2); // create linear kernel //std::cout << "computing linear train kernel over preprocessed features"<<std::endl; CLinearKernel* kerneltr2 = new CLinearKernel(); SG_REF(kerneltr2); kerneltr2->init(featurestr2, featurestr2); // create svm via libsvm and train CLibSVM* svm2 = new CLibSVM(svm_C, kerneltr2, labelstr); SG_REF(svm2); svm2->set_epsilon(svm_eps); a=time(NULL); std::cout << "training SVM over linear kernel over preprocessed features"<<std::endl; svm2->train(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; printf("num_sv:%d b:%f\n", svm2->get_num_support_vectors(), svm2->get_bias()); a=time(NULL); std::cout << "initializing shogun test feature again"<<std::endl; CSimpleFeatures<float64_t>* featureste2 = new CSimpleFeatures<float64_t>(feature_cache); SG_REF(featureste2); featureste2->set_feature_matrix(featte2, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** CRandomFourierGaussPreproc *rfgauss2=new CRandomFourierGaussPreproc; SG_REF(rfgauss2); rfgauss2->get_io()->set_loglevel(MSG_DEBUG); // add preprocessor featureste2->add_preproc(rfgauss); // apply preprocessor a=time(NULL); std::cout << "applying same preprocessor to test feature"<<std::endl; featureste2->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing linear test kernel over preprocessed features"<<std::endl; CLinearKernel* kernelte2 = new CLinearKernel(); SG_REF(kernelte2); kernelte2->init(featurestr2, featureste2); //std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; svm2->set_kernel(kernelte2); a=time(NULL); std::cout << "scoring linear test kernel over preprocessed features"<<std::endl; std::vector<float64_t> scoreste2(numte); float64_t err2=0; for(int32_t i=0; i< numte ;++i) { scoreste2[i]=svm2->classify_example(i); if(scoreste2[i]*labte[i]<0) { err2+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "pausing 12 seconds"<<std::endl; sleep(12); // ************************************************************ // compare results // ************************************************************** int32_t num_labeldiffs=0; float64_t avg_scorediff=0; for(int32_t i=0; i< numte ;++i) { if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste2[i])) { ++num_labeldiffs; } avg_scorediff+=CMath::abs(scoreste1[i]-scoreste2[i])/numte; std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste2[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste2[i] <<" = " << CMath::abs(scoreste1[i]-scoreste2[i])<<std::endl; } std::cout << "usedwidth for rbf kernel"<< kerneltr1->get_width() << " " << kernelte1->get_width()<<std::endl; std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl; std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl; std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err2<<std::endl; a=time(NULL); std::cout << "computing effective kernel widths (means of inner distances)"<<std::endl; int32_t m, n; float64_t * kertr1; kerneltr1->get_kernel_matrix ( &kertr1, &m, &n); std::cout << "kernel size "<< m << " "<< n <<std::endl; float64_t avgdist1=0; for(int i=0; i<m ;++i) { for(int l=0; l<i ;++l) { avgdist1+= -CMath::log(kertr1[i+l*m])*2.0/m/(m+1.0); } } float64_t * kertr2; kerneltr2->get_kernel_matrix (&kertr2,&m, &n); float64_t avgdist2=0; float64_t numnegratio=0; for(int i=0; i<m ;++i) { for(int l=0; l<i ;++l) { if(kertr2[i+l*m]<=0) { numnegratio+=2.0/m/(m+1.0); } else { avgdist2+= -CMath::log(std::max(kertr2[i+l*m],1e-10))*2.0/m/(m+1.0); } } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "effective kernel width for gaussian kernel and RFgauss "<< avgdist1 << " " <<avgdist2/(1.0-numnegratio) << std::endl<< " numnegratio (negative entries in RFgauss approx kernel)"<< numnegratio<<std::endl; // ********************************************** // now the same with a new preprocessor to show the usage of set_randomcoefficients // ********************************************8 CSimpleFeatures<float64_t>* featureste3 = new CSimpleFeatures<float64_t>(feature_cache); SG_REF(featureste3); featureste3->set_feature_matrix(featte3, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** rfgauss2->set_randomcoefficients( randomcoeff_additive2, randomcoeff_multiplicative2, dim_feature_space2, dim_input_space2, kernelwidth2); // add preprocessor featureste3->add_preproc(rfgauss2); // apply preprocessor a=time(NULL); std::cout << "applying same preprocessor to test feature"<<std::endl; featureste3->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing linear test kernel over preprocessed features"<<std::endl; CLinearKernel* kernelte3 = new CLinearKernel(); SG_REF(kernelte3); kernelte2->init(featurestr2, featureste3); //std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; svm2->set_kernel(kernelte3); a=time(NULL); std::cout << "scoring linear test kernel over preprocessed features"<<std::endl; std::vector<float64_t> scoreste3(numte); float64_t err3=0; for(int32_t i=0; i< numte ;++i) { scoreste3[i]=svm2->classify_example(i); if(scoreste3[i]*labte[i]<0) { err3+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "pausing 12 seconds"<<std::endl; sleep(12); // ************************************************************ // compare results // ************************************************************** num_labeldiffs=0; avg_scorediff=0; for(int32_t i=0; i< numte ;++i) { if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste3[i])) { ++num_labeldiffs; } avg_scorediff+=CMath::abs(scoreste1[i]-scoreste3[i])/numte; std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste3[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste3[i] <<" = " << CMath::abs(scoreste1[i]-scoreste3[i])<<std::endl; } std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl; std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl; std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err3<<std::endl; SG_FREE(randomcoeff_additive2); SG_FREE(randomcoeff_multiplicative2); SG_FREE(labtr); SG_FREE(labte); SG_FREE(kertr1); SG_FREE(kertr2); SG_UNREF(labelstr); SG_UNREF(kerneltr1); SG_UNREF(kerneltr2); SG_UNREF(kernelte1); SG_UNREF(kernelte2); SG_UNREF(kernelte3); SG_UNREF(featurestr1); SG_UNREF(featurestr2); SG_UNREF(featureste1); SG_UNREF(featureste2); SG_UNREF(featureste3); SG_UNREF(svm1); SG_UNREF(svm2); SG_UNREF(rfgauss); SG_UNREF(rfgauss2); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/io/SerializableJsonFile.h> #include <shogun/io/SerializableXmlFile.h> #include <shogun/io/SerializableHdf5File.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const char* filename="filename.txt"; void print(Parameter* p) { TParameter* param=p->get_parameter(0); SGVector<float64_t>* v=(SGVector<float64_t>*)param->m_parameter; CMath::display_vector(v->vector, v->vlen, "vector:"); param=p->get_parameter(1); SGMatrix<float64_t>* m=(SGMatrix<float64_t>*)param->m_parameter; CMath::display_matrix(m->matrix, m->num_rows, m->num_cols, "matrix:"); } void check_content_equal(Parameter* save_param, Parameter* load_param) { TParameter* p; p=save_param->get_parameter(0); SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter; p=save_param->get_parameter(1); SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter; p=load_param->get_parameter(0); SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter; p=load_param->get_parameter(1); SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter; ASSERT(sv->vlen==lv->vlen); ASSERT(sm->num_rows==lm->num_rows); ASSERT(sm->num_cols==lm->num_cols); for (index_t i=0; i<sv->vlen; ++i) ASSERT(sv->vector[i]==lv->vector[i]); for (index_t i=0; i<sm->num_cols*sm->num_rows; ++i) ASSERT(sm->matrix[i]==lm->matrix[i]); } void test_acsii(Parameter* save_param, Parameter* load_param) { SG_SPRINT("testing ascii serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableAsciiFile* file; file=new CSerializableAsciiFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableAsciiFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_hdf5(Parameter* save_param, Parameter* load_param) { /* TODO, HDF5 file leaks memory */ SG_SPRINT("testing hdf5 serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableHdf5File* file; file=new CSerializableHdf5File(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableHdf5File(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_json(Parameter* save_param, Parameter* load_param) { /* TODO, json file leaks memory, also save methods */ SG_SPRINT("testing json serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableJsonFile* file; file=new CSerializableJsonFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableJsonFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_xml(Parameter* save_param, Parameter* load_param) { /* TODO, xml file leaks memory and produces a read error */ SG_SPRINT("testing xml serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableXmlFile* file; file=new CSerializableXmlFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableXmlFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void reset_values(Parameter* save_param, Parameter* load_param) { TParameter* p; p=save_param->get_parameter(0); SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter; p=save_param->get_parameter(1); SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter; p=load_param->get_parameter(0); SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter; p=load_param->get_parameter(1); SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter; sv->destroy_vector(); lv->destroy_vector(); sm->destroy_matrix(); lm->destroy_matrix(); *sv=SGVector<float64_t>(9); *lv=SGVector<float64_t>(3); *sm=SGMatrix<float64_t>(3, 3); *lm=SGMatrix<float64_t>(4, 4); CMath::range_fill_vector(sv->vector, sv->vlen); CMath::range_fill_vector(sm->matrix, sm->num_rows*sm->num_cols); CMath::fill_vector(lv->vector, lv->vlen, 0.0); CMath::fill_vector(lm->matrix, lm->num_rows*lm->num_cols, 0.0); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* for serialization */ SGVector<float64_t> sv; SGMatrix<float64_t> sm; Parameter* sp=new Parameter(); sp->add(&sv, "vector", "description"); sp->add(&sm, "matrix", "description"); /* for deserialization */ SGVector<float64_t> lv; SGMatrix<float64_t> lm; Parameter* lp=new Parameter(); lp->add(&lv, "vector", "description"); lp->add(&lm, "matrix", "description"); /* still leaks memory TODO */ reset_values(sp, lp); test_json(sp, lp); reset_values(sp, lp); test_acsii(sp, lp); /* still leaks memory TODO */ reset_values(sp, lp); test_hdf5(sp, lp); /* still leaks memory TODO */ reset_values(sp, lp); test_xml(sp, lp); /* clean up */ sv.destroy_vector(); sm.destroy_matrix(); lv.destroy_vector(); lm.destroy_matrix(); delete sp; delete lp; exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/features/Labels.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_labels, num_classes, num_subsets; index_t runs=50; while (runs-->0) { num_labels=CMath::random(5, 100); num_classes=CMath::random(2, 10); num_subsets=CMath::random(1, 10); /* this will throw an error */ if (num_labels<num_subsets) continue; SG_SPRINT("num_labels=%d\nnum_classes=%d\nnum_subsets=%d\n\n", num_labels, num_classes, num_subsets); /* build labels */ CLabels* labels=new CLabels(num_labels); for (index_t i=0; i<num_labels; ++i) { labels->set_label(i, CMath::random()%num_classes); SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i)); } SG_SPRINT("\n"); /* print classes */ SGVector<float64_t> classes=labels->get_classes(); CMath::display_vector(classes.vector, classes.vlen, "classes"); classes.destroy_vector(); /* build splitting strategy */ CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, num_subsets); for (index_t i=0; i<num_subsets; ++i) { SGVector<index_t> subset=splitting->generate_subset_indices(i); SGVector<index_t> inverse=splitting->generate_subset_inverse(i); SG_SPRINT("subset %d\n", i); for (index_t j=0; j<subset.vlen; ++j) SG_SPRINT("%d(%d),", subset.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n"); SG_SPRINT("inverse %d\n", i); for (index_t j=0; j<inverse.vlen; ++j) SG_SPRINT("%d(%d),", inverse.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n\n"); subset.destroy_vector(); inverse.destroy_vector(); } /* check whether number of labels in every subset is nearly equal */ for (index_t i=0; i<num_classes; ++i) { SG_SPRINT("checking class %d\n", i); /* count number of elements for this class */ SGVector<index_t> temp=splitting->generate_subset_indices(0); int32_t count=0; for (index_t j=0; j<temp.vlen; ++j) { if ((int32_t)labels->get_label(temp.vector[j])==i) ++count; } temp.destroy_vector(); /* check all subsets for same ratio */ for (index_t j=0; j<num_subsets; ++j) { SGVector<index_t> subset=splitting->generate_subset_indices(j); int32_t temp_count=0; for (index_t k=0; k<subset.vlen; ++k) { if ((int32_t)labels->get_label(subset.vector[k])==i) ++temp_count; } subset.destroy_vector(); /* at most one difference */ SG_SPRINT("number in subset %d: %d\n", j, temp_count); ASSERT(CMath::abs(temp_count-count)<=1); } } /* clean up */ SG_UNREF(splitting); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This file demonstrates how a regular CSimpleFeatures object can * be used as input for the StreamingFeatures framework, effectively * making it suitable for using online learning algorithms. */ #include <shogun/features/StreamingSimpleFeatures.h> #include <shogun/io/StreamingFileFromSimpleFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> using namespace shogun; #define NUM 100 #define DIMS 2 #define DIST 0.5 float32_t* feat; float64_t* lab; void gen_rand_data() { feat=SG_MALLOC(float32_t, NUM*DIMS); lab=SG_MALLOC(float64_t, NUM); for (int32_t i=0; i<NUM; i++) { if (i<NUM/2) { for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0,1.0)+DIST; lab[i]=0; } else { for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0,1.0)-DIST; lab[i]=1; } } CMath::display_matrix(feat,DIMS, NUM); } int main() { init_shogun_with_defaults(); // Generate random data, features and labels gen_rand_data(); // Create features CSimpleFeatures<float32_t>* features = new CSimpleFeatures<float32_t>(); SG_REF(features); features->set_feature_matrix(feat, DIMS, NUM); // Create a StreamingSimpleFeatures object which uses the above as input; labels (float64_t*) are optional CStreamingSimpleFeatures<float32_t>* streaming_simple = new CStreamingSimpleFeatures<float32_t>(features, lab); SG_REF(streaming_simple); // Start parsing of the examples; in this case, it is trivial - returns each vector from the SimpleFeatures object streaming_simple->start_parser(); int32_t counter=0; SG_SPRINT("Processing examples...\n\n"); // Run a while loop over all the examples. Note that since // features are "streaming", there is no predefined // number_of_vectors known to the StreamingFeatures object. // Thus, this loop must be used to iterate over all the // features while (streaming_simple->get_next_example()) { counter++; // Get the current vector; no other vector is accessible SGVector<float32_t> vec = streaming_simple->get_vector(); float64_t label = streaming_simple->get_label(); SG_SPRINT("Vector %d: [\t", counter); for (int32_t i=0; i<vec.vlen; i++) { SG_SPRINT("%f\t", vec.vector[i]); } SG_SPRINT("Label=%f\t", label); // Calculate dot product of the current vector (from // the StreamingFeatures object) with itself (the // vector passed as argument) float32_t dot_prod = streaming_simple->dense_dot(vec.vector, vec.vlen); SG_SPRINT("]\nDot product of the vector with itself: %f", dot_prod); SG_SPRINT("\n\n"); // Free the example, since we are done with processing it. streaming_simple->release_example(); } // Now that all examples are used, end the parser. streaming_simple->end_parser(); SG_FREE(lab); SG_UNREF(streaming_simple); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the online variant of SGD which * relies on the streaming features framework. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingSparseFeatures.h> #include <shogun/classifier/svm/OnlineSVMSGD.h> using namespace shogun; int main() { init_shogun_with_defaults(); // Create a StreamingAsciiFile from the training data char* train_file_name = "../data/train_sparsereal.light"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); // Create a StreamingSparseFeatures from the StreamingAsciiFile. // The bool value is true if examples are labelled. // 1024 is a good standard value for the number of examples for the parser to hold at a time. CStreamingSparseFeatures<float64_t>* train_features = new CStreamingSparseFeatures<float64_t>(train_file, true, 1024); SG_REF(train_features); // Create an OnlineSVMSGD object from the features. The first parameter is 'C'. COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features); sgd->set_bias_enabled(false); // Enable/disable bias sgd->set_lambda(0.1); // Choose lambda sgd->train(); // Train train_file->close(); // Now we want to test on other data char* test_file_name = "../data/fm_test_sparsereal.dat"; CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingSparseFeatures<float64_t>* test_features = new CStreamingSparseFeatures<float64_t>(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CLabels* test_labels = sgd->apply(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(sgd); exit_shogun(); return 0; }
// This example simply demonstrates the use/working of StreamingStringFeatures #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingStringFeatures.h> using namespace shogun; void display_vector(const SGString<char> &vec) { printf("\nNew Vector\n------------------\n"); printf("Length=%d.\n", vec.slen); for (int32_t i=0; i<vec.slen; i++) { printf("%c", vec.string[i]); } printf("\n"); } int main(int argc, char **argv) { init_shogun_with_defaults(); // Create a StreamingAsciiFile from our input file CStreamingAsciiFile* file = new CStreamingAsciiFile("../data/fm_train_dna.dat"); // This file contains unlabelled data, so the second arg is `false'. CStreamingStringFeatures<char>* feat = new CStreamingStringFeatures<char>(file, false, 1024); // Alphabet to use is DNA feat->use_alphabet(DNA); // Loop over all examples and simply display each example feat->start_parser(); while (feat->get_next_example()) { SGString<char> vec = feat->get_vector(); display_vector(vec); feat->release_example(); } feat->end_parser(); // Get the alphabet and display the histogram CAlphabet* alpha = feat->get_alphabet(); printf("\nThe histogram is:\n"); alpha->print_histogram(); SG_UNREF(alpha); SG_UNREF(feat); SG_UNREF(file); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingVwFile.h> #include <shogun/features/StreamingVwFeatures.h> #include <shogun/classifier/vw/VowpalWabbit.h> using namespace shogun; int main() { init_shogun_with_defaults(); char* train_file_name = "../data/train_sparsereal.light"; CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name); train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format SG_REF(train_file); CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024); SG_REF(train_features); CVowpalWabbit* vw = new CVowpalWabbit(train_features); vw->set_regressor_out("./vw_regressor_text.dat"); // Save regressor to this file vw->set_adaptive(false); // Use adaptive learning vw->train_machine(); SG_SPRINT("Weights have been output in text form to vw_regressor_text.dat.\n"); train_file->close(); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(vw); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of online SGD with CStreamingVwFeatures * as the features object. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingVwFile.h> #include <shogun/features/StreamingVwFeatures.h> #include <shogun/classifier/svm/OnlineSVMSGD.h> using namespace shogun; int main() { init_shogun_with_defaults(); char* train_file_name = "../data/train_sparsereal.light"; CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name); train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format SG_REF(train_file); CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024); SG_REF(train_features); COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features); sgd->set_bias_enabled(false); sgd->set_lambda(0.1); sgd->train(); train_file->close(); // Now we want to test on other data char* test_file_name = "../data/fm_test_sparsereal.dat"; CStreamingVwFile* test_file = new CStreamingVwFile(test_file_name); test_file->set_parser_type(T_SVMLIGHT); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CLabels* test_labels = sgd->apply(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(sgd); exit_shogun(); return 0; }