This page lists ready to run shogun examples for the C++ libshogun interface.
To run the examples you will need to manually compile them via
g++ name_of_example.cpp -lshogun
in case you installed libshogun to a nonstandard directory you will need to specify the appropriate library and include paths, e.g.
g++ -I/path/to/libshogun/includes name_of_example.cpp -L/path/to/libshogun/sofile -lshogun
Then the examples are standard binary executables and can be started via
./name_of_example
respectively if the libraries are in nonstandard locations (such that they cannot be found by the dynamic linker)
LD_LIBRARY_PATH=path/to/libshogun ./name_of_example
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/lib/common.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/multiclass/tree/BalancedConditionalProbabilityTree.h> using namespace shogun; int main(int argc, char **argv) { init_shogun_with_defaults(); const char* train_file_name = "../data/7class_example4_train.dense"; const char* test_file_name = "../data/7class_example4_test.dense"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024); SG_REF(train_features); CBalancedConditionalProbabilityTree *cpt = new CBalancedConditionalProbabilityTree(); cpt->set_num_passes(1); cpt->set_features(train_features); if (argc > 1) { float64_t alpha = 0.5; sscanf(argv[1], "%lf", &alpha); SG_SPRINT("Setting alpha to %.2lf\n", alpha); cpt->set_alpha(alpha); } cpt->train(); cpt->print_tree(); CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *pred = cpt->apply_multiclass(test_features); test_features->reset_stream(); SG_SPRINT("num_labels = %d\n", pred->get_num_labels()); SG_UNREF(test_features); SG_UNREF(test_file); test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels()); SG_REF(gnd); test_features->start_parser(); for (int32_t i=0; i < pred->get_num_labels(); ++i) { test_features->get_next_example(); gnd->set_int_label(i, test_features->get_label()); test_features->release_example(); } test_features->end_parser(); int32_t n_correct = 0; for (index_t i=0; i < pred->get_num_labels(); ++i) { if (pred->get_int_label(i) == gnd->get_int_label(i)) n_correct++; //SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i)); } SG_SPRINT("\n"); SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels()); SG_UNREF(gnd); SG_UNREF(train_features); SG_UNREF(test_features); SG_UNREF(train_file); SG_UNREF(test_file); SG_UNREF(cpt); SG_UNREF(pred); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/base/ParameterMap.h> #include <shogun/features/DenseFeatures.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClassInt : public CSGObject { public: CTestClassInt() { m_number=10; m_parameters->add(&m_number, "number", "Test number"); m_vector_length=3; m_vector=SG_MALLOC(int32_t, m_vector_length); SGVector<int32_t>::fill_vector(m_vector, m_vector_length, 10); m_parameters->add_vector(&m_vector, &m_vector_length, "vector", "Test vector"); m_matrix_rows=2; m_matrix_cols=3; m_matrix=SG_MALLOC(int32_t, m_matrix_rows*m_matrix_cols); SGVector<int32_t>::range_fill_vector(m_matrix, m_matrix_rows*m_matrix_cols); m_parameters->add_matrix(&m_matrix, &m_matrix_rows, &m_matrix_cols, "matrix", "Test matrix"); SGMatrix<int32_t> features=SGMatrix<int32_t>(2, 3); SGVector<int32_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3); m_features=new CDenseFeatures<int32_t>(features); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "int_features", "Test features"); } virtual ~CTestClassInt() { SG_FREE(m_vector); SG_FREE(m_matrix); SG_UNREF(m_features); } int32_t m_number; int32_t* m_vector; int32_t m_vector_length; int32_t* m_matrix; int32_t m_matrix_rows; int32_t m_matrix_cols; CDenseFeatures<int32_t>* m_features; virtual const char* get_name() const { return "TestClassInt"; } }; class CTestClassFloat : public CSGObject { public: CTestClassFloat() { m_number=3.2; m_vector=SGVector<float64_t>(10); m_matrix=SGMatrix<float64_t>(2, 3); m_parameters->add(&m_number, "number", "Test number"); m_parameters->add(&m_vector, "vector", "Test vector"); m_parameters->add(&m_matrix, "matrix", "Test matrix"); SGMatrix<float64_t> features=SGMatrix<float64_t>(2, 3); SGVector<float64_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3.0); m_features=new CDenseFeatures<float64_t>(features); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "float_features", "Test features"); /* add some parameter mappings for number, here: type changes */ m_parameter_map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0) ); m_parameter_map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0), new SGParamInfo("vector", CT_VECTOR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0), new SGParamInfo("matrix", CT_MATRIX, ST_NONE, PT_INT32, -1) ); /* name change for sgobject */ m_parameter_map->put( new SGParamInfo("float_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1), new SGParamInfo("int_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 0) ); m_parameter_map->finalize_map(); } virtual ~CTestClassFloat() { SG_UNREF(m_features); } float64_t m_number; SGVector<float64_t> m_vector; SGMatrix<float64_t> m_matrix; CDenseFeatures<float64_t>* m_features; virtual const char* get_name() const { return "TestClassFloat"; } }; const char* filename="test.txt"; void test_load_file_parameter() { /* create one instance of each class */ CTestClassInt* int_instance=new CTestClassInt(); CTestClassFloat* float_instance=new CTestClassFloat(); CSerializableAsciiFile* file; /* serialize int instance */ file=new CSerializableAsciiFile(filename, 'w'); int_instance->save_serializable(file); file->close(); SG_UNREF(file); /* reopen file for reading */ file=new CSerializableAsciiFile(filename, 'r'); int32_t file_version=-1; /* load all parameter data, current version is set to 1 here */ DynArray<TParameter*>* params= float_instance->load_all_file_parameters(file_version, 1, file, ""); /* test the result */ for (index_t i=0; i<params->get_num_elements(); ++i) { TParameter* current=params->get_element(i); /* ensure that data is same as of the instance for all parameters */ if (!strcmp(current->m_name, "number")) { int32_t value_number=*((int32_t*)current->m_parameter); SG_SPRINT("%i\n", value_number); ASSERT(value_number=int_instance->m_number); } else if (!strcmp(current->m_name, "vector")) { int32_t* value_vector=*((int32_t**)current->m_parameter); SGVector<int32_t>::display_vector(value_vector, int_instance->m_vector_length); for (index_t i=0; i<int_instance->m_vector_length; ++i) ASSERT(value_vector[i]=int_instance->m_vector[i]); } else if (!strcmp(current->m_name, "matrix")) { int32_t* value_matrix=*((int32_t**)current->m_parameter); SGMatrix<int32_t>::display_matrix(value_matrix, int_instance->m_matrix_rows, int_instance->m_matrix_cols); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(value_matrix[i]==int_instance->m_matrix[i]); } } else if (!strcmp(current->m_name, "int_features")) { CDenseFeatures<int32_t>* features= *((CDenseFeatures<int32_t>**) current->m_parameter); SGMatrix<int32_t> feature_matrix_loaded= features->get_feature_matrix(); SGMatrix<int32_t> feature_matrix_original= int_instance->m_features->get_feature_matrix(); SGMatrix<int32_t>::display_matrix(feature_matrix_loaded.matrix, feature_matrix_loaded.num_rows, feature_matrix_loaded.num_cols, "features"); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(feature_matrix_original.matrix[i]== feature_matrix_loaded.matrix[i]); } } } /* assert that parameter data is sorted */ for (index_t i=1; i<params->get_num_elements(); ++i) { /* assert via TParameter < and == operator */ TParameter* t1=params->get_element(i-1); TParameter* t2=params->get_element(i); ASSERT((*t1)<(*t2) || (*t1)==(*t2)); /* assert via name (which is used in the operator, but to be sure */ const char* s1=t1->m_name; const char* s2=t2->m_name; SG_SPRINT("param \"%s\" <= \"%s\" ? ... ", s1, s2); ASSERT(strcmp(s1, s2)<=0); SG_SPRINT("yes\n"); } /* clean up */ for (index_t i=0; i<params->get_num_elements(); ++i) delete params->get_element(i); delete params; file->close(); SG_UNREF(file); SG_UNREF(int_instance); SG_UNREF(float_instance); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_load_file_parameter(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/base/ParameterMap.h> #include <shogun/features/DenseFeatures.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClassInt : public CSGObject { public: CTestClassInt() { m_number=10; m_parameters->add(&m_number, "number", "Test number"); m_vector_length=3; m_vector=SG_MALLOC(int32_t, m_vector_length); SGVector<int32_t>::fill_vector(m_vector, m_vector_length, 10); m_parameters->add_vector(&m_vector, &m_vector_length, "vector", "Test vector"); m_matrix_rows=2; m_matrix_cols=3; m_matrix=SG_MALLOC(int32_t, m_matrix_rows*m_matrix_cols); SGVector<int32_t>::range_fill_vector(m_matrix, m_matrix_rows*m_matrix_cols); m_parameters->add_matrix(&m_matrix, &m_matrix_rows, &m_matrix_cols, "matrix", "Test matrix"); SGMatrix<int32_t> features=SGMatrix<int32_t>(2, 3); SGVector<int32_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3); m_features=new CDenseFeatures<int32_t>(features); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "int_features", "Test features"); } virtual ~CTestClassInt() { SG_FREE(m_vector); SG_FREE(m_matrix); SG_UNREF(m_features); } int32_t m_number; int32_t* m_vector; int32_t m_vector_length; int32_t* m_matrix; int32_t m_matrix_rows; int32_t m_matrix_cols; CDenseFeatures<int32_t>* m_features; virtual const char* get_name() const { return "TestClassInt"; } }; class CTestClassFloat : public CSGObject { public: CTestClassFloat() { m_number=3.2; m_vector=SGVector<float64_t>(10); m_matrix=SGMatrix<float64_t>(2, 3); m_parameters->add(&m_number, "number", "Test number"); m_parameters->add(&m_vector, "vector", "Test vector"); m_parameters->add(&m_matrix, "matrix", "Test matrix"); SGMatrix<float64_t> features=SGMatrix<float64_t>(2, 3); SGVector<float64_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3.0); m_features=new CDenseFeatures<float64_t>(features); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "float_features", "Test features"); /* add some parameter mappings for number, here: type changes */ m_parameter_map->put( new const SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 1), new const SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0) ); m_parameter_map->put( new const SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0), new const SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new const SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_FLOAT64, 1), new const SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new const SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0), new const SGParamInfo("vector", CT_VECTOR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new const SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_FLOAT64, 1), new const SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new const SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0), new const SGParamInfo("matrix", CT_MATRIX, ST_NONE, PT_INT32, -1) ); /* name change for sgobject */ m_parameter_map->put( new const SGParamInfo("float_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1), new const SGParamInfo("int_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 0) ); m_parameter_map->finalize_map(); } virtual ~CTestClassFloat() { SG_UNREF(m_features); } float64_t m_number; SGVector<float64_t> m_vector; SGMatrix<float64_t> m_matrix; CDenseFeatures<float64_t>* m_features; virtual const char* get_name() const { return "TestClassFloat"; } }; const char* filename="test.txt"; void test_load_file_parameters() { /* create one instance of each class */ CTestClassInt* int_instance=new CTestClassInt(); CTestClassFloat* float_instance=new CTestClassFloat(); CSerializableAsciiFile* file; /* serialize int instance */ file=new CSerializableAsciiFile(filename, 'w'); int_instance->save_serializable(file); file->close(); SG_UNREF(file); /* reopen file for reading */ file=new CSerializableAsciiFile(filename, 'r'); /* build parameter info for parameter of the OTHER instance, start from * version 1 */ const SGParamInfo param_info_number( float_instance->m_parameters->get_parameter(0), 1); const SGParamInfo param_info_vector( float_instance->m_parameters->get_parameter(1), 1); const SGParamInfo param_info_matrix( float_instance->m_parameters->get_parameter(2), 1); const SGParamInfo param_info_sgobject( float_instance->m_parameters->get_parameter(3), 1); int32_t file_version=-1; /* now, here the magic happens, the parameter info of the float instance is * mapped backwards (see its parameter map above) until the parameter * info of the file is found. Then the parameters with the file version * are loaded into memory. This will be used for migration. * Note that only one parameter is in the array here for testing */ DynArray<TParameter*>* file_loaded_number= float_instance->load_file_parameters(¶m_info_number, file_version, file); DynArray<TParameter*>* file_loaded_vector= float_instance->load_file_parameters(¶m_info_vector, file_version, file); DynArray<TParameter*>* file_loaded_matrix= float_instance->load_file_parameters(¶m_info_matrix, file_version, file); DynArray<TParameter*>* file_loaded_sgobject= float_instance->load_file_parameters(¶m_info_sgobject, file_version, file); /* Note that there is only ONE element in array here (old test) */ TParameter* current; /* ensure that its the same as of the instance */ current=file_loaded_number->get_element(0); int32_t value_number=*((int32_t*)current->m_parameter); SG_SPRINT("%i\n", value_number); ASSERT(value_number=int_instance->m_number); /* same for the vector */ current=file_loaded_vector->get_element(0); int32_t* value_vector=*((int32_t**)current->m_parameter); SGVector<int32_t>::display_vector(value_vector, int_instance->m_vector_length); for (index_t i=0; i<int_instance->m_vector_length; ++i) ASSERT(value_vector[i]=int_instance->m_vector[i]); /* and for the matrix */ current=file_loaded_matrix->get_element(0); int32_t* value_matrix=*((int32_t**)current->m_parameter); SGMatrix<int32_t>::display_matrix(value_matrix, int_instance->m_matrix_rows, int_instance->m_matrix_cols); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(value_matrix[i]==int_instance->m_matrix[i]); } /* and for the feature object */ current=file_loaded_sgobject->get_element(0); CDenseFeatures<int32_t>* features= *((CDenseFeatures<int32_t>**)current->m_parameter); SGMatrix<int32_t> feature_matrix_loaded= features->get_feature_matrix(); SGMatrix<int32_t> feature_matrix_original= int_instance->m_features->get_feature_matrix(); SGMatrix<int32_t>::display_matrix(feature_matrix_loaded.matrix, feature_matrix_loaded.num_rows, feature_matrix_loaded.num_cols, "features"); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(feature_matrix_original.matrix[i]== feature_matrix_loaded.matrix[i]); } /* only the TParameter instances have to be deleted, data, data pointer, * and possible length variables are deleted automatically */ for (index_t i=0; i<file_loaded_number->get_num_elements(); ++i) delete file_loaded_number->get_element(i); for (index_t i=0; i<file_loaded_vector->get_num_elements(); ++i) delete file_loaded_vector->get_element(i); for (index_t i=0; i<file_loaded_matrix->get_num_elements(); ++i) delete file_loaded_matrix->get_element(i); for (index_t i=0; i<file_loaded_sgobject->get_num_elements(); ++i) delete file_loaded_sgobject->get_element(i); /* also delete arrays */ delete file_loaded_number; delete file_loaded_vector; delete file_loaded_matrix; delete file_loaded_sgobject; file->close(); SG_UNREF(file); SG_UNREF(int_instance); SG_UNREF(float_instance); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_load_file_parameters(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/base/ParameterMap.h> #include <shogun/features/DenseFeatures.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClassInt : public CSGObject { public: CTestClassInt() { m_number=10; m_parameters->add(&m_number, "number", "Test number"); m_vector_length=3; m_vector=SG_MALLOC(int32_t, m_vector_length); SGVector<int32_t>::fill_vector(m_vector, m_vector_length, 10); m_parameters->add_vector(&m_vector, &m_vector_length, "vector", "Test vector"); m_matrix_rows=2; m_matrix_cols=3; m_matrix=SG_MALLOC(int32_t, m_matrix_rows*m_matrix_cols); SGVector<int32_t>::range_fill_vector(m_matrix, m_matrix_rows*m_matrix_cols); m_parameters->add_matrix(&m_matrix, &m_matrix_rows, &m_matrix_cols, "matrix", "Test matrix"); SGMatrix<int32_t> features=SGMatrix<int32_t>(2, 3); SGVector<int32_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3); m_features=new CDenseFeatures<int32_t>(10); m_features->set_feature_matrix(features); m_features->set_combined_feature_weight(5.0); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "int_features", "Test features"); } virtual ~CTestClassInt() { SG_FREE(m_vector); SG_FREE(m_matrix); SG_UNREF(m_features); } int32_t m_number; int32_t* m_vector; int32_t m_vector_length; int32_t* m_matrix; int32_t m_matrix_rows; int32_t m_matrix_cols; CDenseFeatures<int32_t>* m_features; virtual const char* get_name() const { return "TestClassInt"; } }; class CTestClassFloat : public CSGObject { public: CTestClassFloat() { m_number=3.2; m_vector=SGVector<float64_t>(10); m_matrix=SGMatrix<float64_t>(2, 3); m_parameters->add(&m_number, "number", "Test number"); m_parameters->add(&m_vector, "vector", "Test vector"); m_parameters->add(&m_matrix, "matrix", "Test matrix"); SGMatrix<int32_t> features=SGMatrix<int32_t>(2, 3); SGVector<int32_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3); m_features=new CDenseFeatures<int32_t>(features); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "float_features", "Test features"); /* add some parameter mappings for number, here: type changes */ m_parameter_map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0) ); m_parameter_map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0), new SGParamInfo("vector", CT_VECTOR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0), new SGParamInfo("matrix", CT_MATRIX, ST_NONE, PT_INT32, -1) ); /* CSGObject mapping is not yet done */ /* name change for sgobject */ m_parameter_map->put( new SGParamInfo("float_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1), new SGParamInfo("int_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 0) ); m_parameter_map->finalize_map(); } virtual ~CTestClassFloat() { SG_UNREF(m_features); } float64_t m_number; SGVector<float64_t> m_vector; SGMatrix<float64_t> m_matrix; /* no type change here */ CDenseFeatures<int32_t>* m_features; virtual const char* get_name() const { return "TestClassFloat"; } virtual TParameter* migrate(DynArray<TParameter*>* param_base, const SGParamInfo* target) { TSGDataType type(target->m_ctype, target->m_stype, target->m_ptype); TParameter* result=NULL; TParameter* to_migrate=NULL; if (*target==SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 1)) { one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: simply copy (and cast) data because nothing has changed */ *((float64_t*)result->m_parameter)= *((int8_t*)to_migrate->m_parameter); } else if (*target==SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0)) { one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: simply copy (and cast) data because nothing has changed */ *((int8_t*)result->m_parameter)= *((int32_t*)to_migrate->m_parameter); } else if (*target==SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_FLOAT64, 1)) { one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data element wise because type changes */ float64_t* array_to=*((float64_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); for (index_t i=0; i<*to_migrate->m_datatype.m_length_y; ++i) array_to[i]=array_from[i]; } else if (*target==SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0)) { TParameter* to_migrate=NULL; one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data complete because its just wrapper type change */ int32_t* array_to=*((int32_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); memcpy(array_to, array_from, to_migrate->m_datatype.get_size()); } else if (*target==SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0)) { TParameter* to_migrate=NULL; one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data complete because its just wrapper type change */ int32_t* array_to=*((int32_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); memcpy(array_to, array_from, to_migrate->m_datatype.get_size()); } else if (*target==SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_FLOAT64, 1)) { TParameter* to_migrate=NULL; one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data element wise because type changes */ float64_t* array_to=*((float64_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); for (index_t i=0; i<to_migrate->m_datatype.get_num_elements(); ++i) array_to[i]=array_from[i]; } else if (*target==SGParamInfo("float_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1)) { TParameter* to_migrate=NULL; /* specify name change and thats it */ one_to_one_migration_prepare(param_base, target, result, to_migrate, (char*) "int_features"); } if (result) return result; else return CSGObject::migrate(param_base, target); } }; const char* filename="test.txt"; void test_load_file_parameter() { /* create one instance of each class */ CTestClassInt* int_instance=new CTestClassInt(); CTestClassFloat* float_instance=new CTestClassFloat(); CSerializableAsciiFile* file; /* serialize int instance */ file=new CSerializableAsciiFile(filename, 'w'); int_instance->save_serializable(file); file->close(); SG_UNREF(file); /* reopen file for reading */ file=new CSerializableAsciiFile(filename, 'r'); /* versions that are used in this example */ int32_t file_version=-1; int32_t current_version=1; /* load all parameter data, current version is set to 1 here */ DynArray<TParameter*>* params= float_instance->load_all_file_parameters(file_version, current_version, file, ""); /* create an array of param infos from float instance parameters */ DynArray<const SGParamInfo*>* param_infos= new DynArray<const SGParamInfo*>(); for (index_t i=0; i<float_instance->m_parameters->get_num_parameters(); ++i) { param_infos->append_element( new SGParamInfo(float_instance->m_parameters->get_parameter(i), current_version)); } /* here the magic mapping happens */ float_instance->map_parameters(params, file_version, param_infos); /* assert equalness of all parameters * alphabetical order is "float_features", "matrix", "number", "vector" */ TParameter* current=NULL; /* "float_features" (no type change here) */ current=params->get_element(0); SG_SPRINT("checking \"float_features\":\n"); ASSERT(!strcmp(current->m_name, "float_features")); /* cast to simple features */ CDenseFeatures<int32_t>* features= *((CDenseFeatures<int32_t>**)current->m_parameter); SG_SPRINT("checking address (mapped!=original): %p!=%p\n", features, int_instance->m_features); ASSERT((void*)features!=(void*)int_instance->m_features); SG_SPRINT("checking cache size: %d==%d\n", features->get_cache_size(), int_instance->m_features->get_cache_size()); ASSERT(features->get_cache_size()== int_instance->m_features->get_cache_size()); SG_SPRINT("checking combined feature weight: %f==%f\n", features->get_combined_feature_weight(), int_instance->m_features->get_combined_feature_weight()); ASSERT(features->get_combined_feature_weight()== int_instance->m_features->get_combined_feature_weight()); SG_SPRINT("checking feature matrix:\n"); SGMatrix<int32_t> int_matrix=int_instance->m_features->get_feature_matrix(); SGMatrix<int32_t> float_matrix=features->get_feature_matrix(); SG_SPRINT("number of rows: %d==%d\n", int_matrix.num_rows, float_matrix.num_rows); ASSERT(int_matrix.num_rows==float_matrix.num_rows); SG_SPRINT("number of cols: %d==%d\n", int_matrix.num_cols, float_matrix.num_cols); ASSERT(int_matrix.num_cols==float_matrix.num_cols); SGMatrix<int32_t>::display_matrix(float_matrix.matrix, float_matrix.num_rows, float_matrix.num_cols, "mapped"); SGMatrix<int32_t>::display_matrix(int_matrix.matrix, int_matrix.num_rows, int_matrix.num_cols, "original"); for (index_t i=0; i<int_matrix.num_rows*int_matrix.num_cols; ++i) ASSERT(int_matrix.matrix[i]==float_matrix.matrix[i]); /* "matrix" */ current=params->get_element(1); ASSERT(!strcmp(current->m_name, "matrix")); SGMatrix<float64_t> matrix(*(float64_t**)current->m_parameter, *current->m_datatype.m_length_y, *current->m_datatype.m_length_x); SG_SPRINT("checking \"matrix:\n"); SG_SPRINT("number of rows: %d==%d\n", *current->m_datatype.m_length_y, int_instance->m_matrix_rows); ASSERT(*current->m_datatype.m_length_y==int_instance->m_matrix_rows); SGMatrix<float64_t>::display_matrix(matrix.matrix, matrix.num_rows, matrix.num_cols, "mapped"); SGMatrix<int32_t>::display_matrix(int_instance->m_matrix, int_instance->m_matrix_rows, int_instance->m_matrix_cols, "original"); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(matrix.matrix[i]==int_instance->m_matrix[i]); } /* "number" */ current=params->get_element(2); ASSERT(!strcmp(current->m_name, "number")); float64_t number=*((float64_t*)current->m_parameter); SG_SPRINT("checking \"number\": %f == %d\n", number, int_instance->m_number); ASSERT(number==int_instance->m_number); /* "vector" */ current=params->get_element(3); ASSERT(!strcmp(current->m_name, "vector")); SGVector<float64_t> vector(*(float64_t**)current->m_parameter, *current->m_datatype.m_length_y, false); SG_SPRINT("checking \"vector:\n"); SG_SPRINT("length: %d==%d\n", *current->m_datatype.m_length_y, int_instance->m_vector_length); ASSERT(*current->m_datatype.m_length_y==int_instance->m_vector_length); SGVector<float64_t>::display_vector(vector.vector, vector.vlen, "mapped"); SGVector<int32_t>::display_vector(int_instance->m_vector, int_instance->m_vector_length, "original"); for (index_t i=0; i<int_instance->m_vector_length; ++i) ASSERT(vector.vector[i]==int_instance->m_vector[i]); /* clean up */ for (index_t i=0; i<param_infos->get_num_elements(); ++i) delete param_infos->get_element(i); delete param_infos; for (index_t i=0; i<params->get_num_elements(); ++i) { /* delete data of TParameters because they were mapped */ params->get_element(i)->m_delete_data=true; delete params->get_element(i); } delete params; file->close(); SG_UNREF(file); SG_UNREF(int_instance); SG_UNREF(float_instance); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_load_file_parameter(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/base/ParameterMap.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClassOld : public CSGObject { public: CTestClassOld() { m_number_to_drop=10; m_parameters->add(&m_number_to_drop, "m_number_to_drop", ""); m_number_to_keep=10; m_parameters->add(&m_number_to_keep, "m_number_to_keep", ""); } int32_t m_number_to_drop; int32_t m_number_to_keep; virtual const char* get_name() const { return "TestClassOld"; } }; class CTestClassNew : public CSGObject { public: CTestClassNew() { m_number=3; m_parameters->add(&m_number, "m_number", ""); m_number_new=4; m_parameters->add(&m_number_new, "m_number_new", ""); /* change name of to be kept number */ m_parameter_map->put( new SGParamInfo("m_number", CT_SCALAR, ST_NONE, PT_INT32, 1), new SGParamInfo("m_number_to_keep", CT_SCALAR, ST_NONE, PT_INT32, 0) ); /* this parameter is new in this version, mapping from "nowhere" */ m_parameter_map->put( new SGParamInfo("m_number_new", CT_SCALAR, ST_NONE, PT_INT32, 1), new SGParamInfo() ); /* note that dropped parameters need not be considered, just ignored */ /* needed if more than one element */ m_parameter_map->finalize_map(); } int32_t m_number; int32_t m_number_new; virtual const char* get_name() const { return "TestClassNew"; } virtual TParameter* migrate(DynArray<TParameter*>* param_base, const SGParamInfo* target) { TParameter* result=NULL; TParameter* to_migrate=NULL; if (*target==SGParamInfo("m_number", CT_SCALAR, ST_NONE, PT_INT32, 1)) { /* specify name change here (again, was also done in mappings) */ char* old_name=(char*) "m_number_to_keep"; one_to_one_migration_prepare(param_base, target, result, to_migrate, old_name); /* here: simply copy data because nothing has changed */ *((int32_t*)result->m_parameter)= *((int32_t*)to_migrate->m_parameter); } /* note there has to be no case distinction for the new parameter */ if (result) return result; else return CSGObject::migrate(param_base, target); } }; void check_equalness(CTestClassOld* old_instance, CTestClassNew* new_instance) { /* number */ SG_SPRINT("checking \"m_number\":\n"); SG_SPRINT("\t%d==%d\n", old_instance->m_number_to_keep, new_instance->m_number); ASSERT(old_instance->m_number_to_keep==new_instance->m_number); /* new element */ SG_SPRINT("checking \"m_number_new\":\n"); SG_SPRINT("\t%d\n", new_instance->m_number_new); } void test_migration() { const char* filename="test.txt"; /* create one instance of each class */ CTestClassOld* old_instance=new CTestClassOld(); CTestClassNew* new_instance=new CTestClassNew(); CSerializableAsciiFile* file; /* serialize int instance, use custom parameter version */ file=new CSerializableAsciiFile(filename, 'w'); old_instance->save_serializable(file, "", 0); file->close(); SG_UNREF(file); /* de-serialize float instance, use custom parameter version */ file=new CSerializableAsciiFile(filename, 'r'); new_instance->load_serializable(file, "", 1); file->close(); SG_UNREF(file); /* assert that content is equal */ check_equalness(old_instance, new_instance); SG_UNREF(old_instance); SG_UNREF(new_instance); SG_UNREF(file); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_migration(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/base/ParameterMap.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClassOld : public CSGObject { public: CTestClassOld() { m_number_1=1; m_number_2=2; m_parameters->add(&m_number_1, "m_number_1", ""); m_parameters->add(&m_number_2, "m_number_2", ""); } int32_t m_number_1; int32_t m_number_2; virtual const char* get_name() const { return "TestClassOld"; } }; class CTestClassNew : public CSGObject { public: CTestClassNew() { m_number=0; m_parameters->add(&m_number, "m_number", ""); /* number_1 in old version will become new, merged number */ m_parameter_map->put( new SGParamInfo("m_number", CT_SCALAR, ST_NONE, PT_INT32, 1), new SGParamInfo("m_number_1", CT_SCALAR, ST_NONE, PT_INT32, 0) ); /* Note that here, two mappings for one parameter are added. This means * that m_number both depends on m_number_1 and m_number_2 */ m_parameter_map->put( new SGParamInfo("m_number", CT_SCALAR, ST_NONE, PT_INT32, 1), new SGParamInfo("m_number_2", CT_SCALAR, ST_NONE, PT_INT32, 0) ); /* note that dropped parameters need not be considered, just ignored */ /* needed if more than one element */ m_parameter_map->finalize_map(); } int32_t m_number; int32_t m_number_new; virtual const char* get_name() const { return "TestClassNew"; } virtual TParameter* migrate(DynArray<TParameter*>* param_base, const SGParamInfo* target) { TParameter* result=NULL; if (*target==SGParamInfo("m_number", CT_SCALAR, ST_NONE, PT_INT32, 1)) { /* one to one migration may not be used here because two parameters * are merged into one parameter. Here the new parameter will * contain the sum of the two old ones. */ /* generate type of target structure */ TSGDataType type(target->m_ctype, target->m_stype, target->m_ptype); /* find elements that are needed for migration, in this case the * two numbers of the base */ char* name_1=(char*) "m_number_1"; char* name_2=(char*) "m_number_2"; /* dummy elements for searching */ TParameter* t_1=new TParameter(&type, NULL, name_1, ""); TParameter* t_2=new TParameter(&type, NULL, name_2, ""); index_t i_1=CMath::binary_search(param_base->get_array(), param_base->get_num_elements(), t_1); index_t i_2=CMath::binary_search(param_base->get_array(), param_base->get_num_elements(), t_2); delete t_1; delete t_2; /* gather search results and tell them that they are to be deleted * because they will be replaced */ ASSERT(i_1>=0 && i_2>=0); TParameter* to_migrate_1=param_base->get_element(i_1); TParameter* to_migrate_2=param_base->get_element(i_2); to_migrate_1->m_delete_data=true; to_migrate_2->m_delete_data=true; /* create result structure and allocate data for it */ result=new TParameter(&type, NULL, target->m_name, "New description"); /* scalar value has length one */ result->allocate_data_from_scratch(1, 1); /* merged element contains sum of both to be merged elements */ *((int32_t*)result->m_parameter)= *((int32_t*)to_migrate_1->m_parameter)+ *((int32_t*)to_migrate_2->m_parameter); } if (result) return result; else return CSGObject::migrate(param_base, target); } }; void test_migration() { const char* filename="test.txt"; /* create one instance of each class */ CTestClassOld* old_instance=new CTestClassOld(); CTestClassNew* new_instance=new CTestClassNew(); CSerializableAsciiFile* file; /* serialize int instance, use custom parameter version */ file=new CSerializableAsciiFile(filename, 'w'); old_instance->save_serializable(file, "", 0); file->close(); SG_UNREF(file); /* de-serialize float instance, use custom parameter version */ file=new CSerializableAsciiFile(filename, 'r'); new_instance->load_serializable(file, "", 1); file->close(); SG_UNREF(file); /* check that merged number is sum old to be merged ones */ SG_SPRINT("checking \"m_number\":\n"); SG_SPRINT("\t%d==%d+%d\n", new_instance->m_number, old_instance->m_number_1, old_instance->m_number_2); ASSERT(new_instance->m_number==old_instance->m_number_1+ old_instance->m_number_2); SG_UNREF(old_instance); SG_UNREF(new_instance); SG_UNREF(file); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* this is a more complex example, where a parameter is based on two * old parameter */ test_migration(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/base/ParameterMap.h> #include <shogun/features/DenseFeatures.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClassInt : public CSGObject { public: CTestClassInt() { m_number=10; m_parameters->add(&m_number, "number", "Test number"); m_vector_length=3; m_vector=SG_MALLOC(int32_t, m_vector_length); SGVector<int32_t>::fill_vector(m_vector, m_vector_length, 10); m_parameters->add_vector(&m_vector, &m_vector_length, "vector", "Test vector"); m_matrix_rows=2; m_matrix_cols=3; m_matrix=SG_MALLOC(int32_t, m_matrix_rows*m_matrix_cols); SGVector<int32_t>::range_fill_vector(m_matrix, m_matrix_rows*m_matrix_cols); m_parameters->add_matrix(&m_matrix, &m_matrix_rows, &m_matrix_cols, "matrix", "Test matrix"); SGMatrix<int32_t> features=SGMatrix<int32_t>(2, 3); SGVector<int32_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 3); m_features=new CDenseFeatures<int32_t>(10); m_features->set_feature_matrix(features); m_features->set_combined_feature_weight(5.0); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "int_features", "Test features"); } virtual ~CTestClassInt() { SG_FREE(m_vector); SG_FREE(m_matrix); SG_UNREF(m_features); } int32_t m_number; int32_t* m_vector; int32_t m_vector_length; int32_t* m_matrix; int32_t m_matrix_rows; int32_t m_matrix_cols; CDenseFeatures<int32_t>* m_features; virtual const char* get_name() const { return "TestClassInt"; } }; class CTestClassFloat : public CSGObject { public: CTestClassFloat() { m_number=3.2; m_vector=SGVector<float64_t>(10); SGVector<float64_t>::fill_vector(m_vector.vector, m_vector.vlen, 0.0); m_matrix=SGMatrix<float64_t>(3, 3); SGVector<float64_t>::range_fill_vector(m_matrix.matrix, m_matrix.num_rows*m_matrix.num_cols, 0.0); m_parameters->add(&m_number, "number", "Test number"); m_parameters->add(&m_vector, "vector", "Test vector"); m_parameters->add(&m_matrix, "matrix", "Test matrix"); SGMatrix<int32_t> features=SGMatrix<int32_t>(2, 3); SGVector<int32_t>::range_fill_vector(features.matrix, features.num_rows*features.num_cols, 0); m_features=new CDenseFeatures<int32_t>(features); SG_REF(m_features); m_parameters->add((CSGObject**)&m_features, "float_features", "Test features"); /* add some parameter mappings for number, here: type changes */ m_parameter_map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0) ); m_parameter_map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0), new SGParamInfo("vector", CT_VECTOR, ST_NONE, PT_INT32, -1) ); /* changes for vector: from int32_t vector to float64_t SG_VECTOR */ m_parameter_map->put( new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_FLOAT64, 1), new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0) ); /* from normal vector to SG_VECTOR of same type */ m_parameter_map->put( new SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0), new SGParamInfo("matrix", CT_MATRIX, ST_NONE, PT_INT32, -1) ); /* CSGObject mapping is not yet done */ /* name change for sgobject */ m_parameter_map->put( new SGParamInfo("float_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1), new SGParamInfo("int_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 0) ); m_parameter_map->finalize_map(); } virtual ~CTestClassFloat() { SG_UNREF(m_features); } float64_t m_number; SGVector<float64_t> m_vector; SGMatrix<float64_t> m_matrix; /* no type change here */ CDenseFeatures<int32_t>* m_features; virtual const char* get_name() const { return "TestClassFloat"; } virtual TParameter* migrate(DynArray<TParameter*>* param_base, const SGParamInfo* target) { TParameter* result=NULL; TParameter* to_migrate=NULL; if (*target==SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 1)) { one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: simply copy (and cast) data because nothing has changed */ *((float64_t*)result->m_parameter)= *((int8_t*)to_migrate->m_parameter); } else if (*target==SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT8, 0)) { one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: simply copy (and cast) data because nothing has changed */ *((int8_t*)result->m_parameter)= *((int32_t*)to_migrate->m_parameter); } else if (*target==SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_FLOAT64, 1)) { one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data element wise because type changes */ float64_t* array_to=*((float64_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); for (index_t i=0; i<*to_migrate->m_datatype.m_length_y; ++i) array_to[i]=array_from[i]; } else if (*target==SGParamInfo("vector", CT_SGVECTOR, ST_NONE, PT_INT32, 0)) { TParameter* to_migrate=NULL; one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data complete because its just wrapper type change */ int32_t* array_to=*((int32_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); memcpy(array_to, array_from, to_migrate->m_datatype.get_size()); } else if (*target==SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_INT32, 0)) { TParameter* to_migrate=NULL; one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data complete because its just wrapper type change */ int32_t* array_to=*((int32_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); memcpy(array_to, array_from, to_migrate->m_datatype.get_size()); } else if (*target==SGParamInfo("matrix", CT_SGMATRIX, ST_NONE, PT_FLOAT64, 1)) { TParameter* to_migrate=NULL; one_to_one_migration_prepare(param_base, target, result, to_migrate); /* here: copy data element wise because type changes */ float64_t* array_to=*((float64_t**)result->m_parameter); int32_t* array_from=*((int32_t**)to_migrate->m_parameter); for (index_t i=0; i<to_migrate->m_datatype.get_num_elements(); ++i) array_to[i]=array_from[i]; } else if (*target==SGParamInfo("float_features", CT_SCALAR, ST_NONE, PT_SGOBJECT, 1)) { TParameter* to_migrate=NULL; /* specify name change and thats it */ char* new_name=(char*) "int_features"; one_to_one_migration_prepare(param_base, target, result, to_migrate, new_name); } if (result) return result; else return CSGObject::migrate(param_base, target); } }; const char* filename="test.txt"; void check_equalness(CTestClassInt* int_instance, CTestClassFloat* float_instance) { /* number */ SG_SPRINT("checking \"number\":\n"); SG_SPRINT("\t%d==%f\n", int_instance->m_number, float_instance->m_number); ASSERT(int_instance->m_number==float_instance->m_number); /* "vector" */ SG_SPRINT("checking \"vector\":\n"); SG_SPRINT("\tlength: %d==%d\n", int_instance->m_vector_length, float_instance->m_vector.vlen); ASSERT(int_instance->m_vector_length==float_instance->m_vector.vlen); SGVector<int32_t>::display_vector(int_instance->m_vector, int_instance->m_vector_length, "oiginal", "\t"); SGVector<float64_t>::display_vector(float_instance->m_vector.vector, float_instance->m_vector.vlen, "migrated", "\t"); for (index_t i=0; i<int_instance->m_vector_length; ++i) ASSERT(int_instance->m_vector[i]==float_instance->m_vector.vector[i]); /* "matrix" */ SG_SPRINT("checking \"matrix\":\n"); SG_SPRINT("\trows: %d==%d\n", int_instance->m_matrix_rows, float_instance->m_matrix.num_rows); ASSERT(int_instance->m_matrix_rows==float_instance->m_matrix.num_rows); SG_SPRINT("\tcols: %d==%d\n", int_instance->m_matrix_cols, float_instance->m_matrix.num_cols); ASSERT(int_instance->m_matrix_cols==float_instance->m_matrix.num_cols); SGMatrix<int32_t>::display_matrix(int_instance->m_matrix, int_instance->m_matrix_rows, int_instance->m_matrix_cols, "original", "\t"); SGMatrix<float64_t>::display_matrix(float_instance->m_matrix.matrix, float_instance->m_matrix.num_rows, float_instance->m_matrix.num_cols, "migrated", "\t"); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(int_instance->m_matrix[i]==float_instance->m_matrix.matrix[i]); } /* "features" */ SG_SPRINT("checking \"features\":\n"); SG_SPRINT("\tchecking \"feature matrix\":\n"); SGMatrix<int32_t> original_matrix= int_instance->m_features->get_feature_matrix(); SGMatrix<int32_t> migrated_matrix= float_instance->m_features->get_feature_matrix(); SG_SPRINT("\t\trows: %d==%d\n", original_matrix.num_rows, migrated_matrix.num_rows); ASSERT(original_matrix.num_rows==migrated_matrix.num_rows); SG_SPRINT("\t\tcols: %d==%d\n", original_matrix.num_cols, migrated_matrix.num_cols); ASSERT(original_matrix.num_cols==migrated_matrix.num_cols); SGMatrix<int32_t>::display_matrix(original_matrix.matrix, original_matrix.num_rows, original_matrix.num_cols, "original", "\t\t"); SGMatrix<int32_t>::display_matrix(migrated_matrix.matrix, migrated_matrix.num_rows, migrated_matrix.num_cols, "migrated", "\t\t"); for (index_t i=0; i<int_instance->m_matrix_rows*int_instance->m_matrix_cols; ++i) { ASSERT(original_matrix.matrix[i]==migrated_matrix.matrix[i]); } } void test_migration() { /* create one instance of each class */ CTestClassInt* int_instance=new CTestClassInt(); CTestClassFloat* float_instance=new CTestClassFloat(); CSerializableAsciiFile* file; /* serialize int instance, use custom parameter version */ file=new CSerializableAsciiFile(filename, 'w'); int_instance->save_serializable(file, "", -1); file->close(); SG_UNREF(file); /* now the magic happens, the float instance is derserialized from file. * Note that the parameter types are different. they will all be mapped. * See migration methods. Everything is just converted, value is kept. * The float instance has different initial values for all members, however, * after de-serializing it from the int_instance file, the values should be * the same * * The parameter mappings are chosen in such way that CTestClassInt could * be seen as an old version of CTestClassFloat. */ /* de-serialize float instance, use custom parameter version * Note that a warning will appear, complaining that there is no parameter * version in file. This is not true, the version is -1, which is used here * as custom version. Normally numbers >=0 are used. */ file=new CSerializableAsciiFile(filename, 'r'); float_instance->load_serializable(file, "", 1); file->close(); SG_UNREF(file); /* assert that content is equal */ check_equalness(int_instance, float_instance); SG_UNREF(int_instance); SG_UNREF(float_instance); SG_UNREF(file); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_migration(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/base/ParameterMap.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_mapping_1() { ParameterMap* map=new ParameterMap(); map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 2), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, 1) ); map->put( new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, 1), new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 0) ); map->put( new SGParamInfo("number_2", CT_SCALAR, ST_NONE, PT_INT32, 1), new SGParamInfo("number_to_keep", CT_SCALAR, ST_NONE, PT_INT32, 0) ); /* finalizing the map is needed before accessing it */ SG_SPRINT("\n\before finalization:\n"); map->finalize_map(); SG_SPRINT("\n\nafter finalization:\n"); map->print_map(); SG_SPRINT("\n"); /* get some elements from map, one/two ARE in map, three and four are NOT */ DynArray<SGParamInfo*> dummies; dummies.append_element(new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, 1)); dummies.append_element(new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 2)); dummies.append_element(new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_INT32, 2)); dummies.append_element(new SGParamInfo("number", CT_SCALAR, ST_NONE, PT_FLOAT64, 0)); dummies.append_element(new SGParamInfo("number_2", CT_SCALAR, ST_NONE, PT_INT32, 1)); for (index_t i=0; i<dummies.get_num_elements(); ++i) { SGParamInfo* current=dummies.get_element(i); char* s=current->to_string(); SG_SPRINT("searching for: %s\n", s); SG_FREE(s); DynArray<const SGParamInfo*>* result=map->get(current); if (result) { for (index_t i=0; i<result->get_num_elements(); ++i) { s=result->get_element(i)->to_string(); SG_SPRINT("found: %s\n\n", s); SG_FREE(s); } } else SG_SPRINT("nothing found\n\n"); delete current; } delete map; } void print_value(const SGParamInfo* key, ParameterMap* map) { DynArray<const SGParamInfo*>* current=map->get(key); key->print_param_info(); SG_SPRINT("value: "); if (current) { for (index_t i=0; i<current->get_num_elements(); ++i) current->get_element(i)->print_param_info("\t"); } else SG_SPRINT("no elements\n"); SG_SPRINT("\n"); } void test_mapping_2() { ParameterMap* map=new ParameterMap(); EContainerType cfrom=CT_SCALAR; EContainerType cto=CT_MATRIX; EStructType sfrom=ST_NONE; EStructType sto=ST_STRING; EPrimitiveType pfrom=PT_BOOL; EPrimitiveType pto=PT_SGOBJECT; map->put(new SGParamInfo("1", cfrom, sfrom, pfrom, 2), new SGParamInfo("eins", cto, sto, pto, 1)); map->put(new SGParamInfo("2", cfrom, sfrom, pfrom, 2), new SGParamInfo("zwei", cto, sto, pto, 1)); map->put(new SGParamInfo("3", cfrom, sfrom, pfrom, 4), new SGParamInfo("drei", cto, sto, pto, 3)); map->put(new SGParamInfo("4", cfrom, sfrom, pfrom, 4), new SGParamInfo("vier", cto, sto, pto, 3)); map->finalize_map(); SG_SPRINT("\n\nafter finalization:\n"); map->print_map(); const SGParamInfo* key; SG_SPRINT("\n\ntesting map\n"); key=new SGParamInfo("1", cfrom, sfrom, pfrom, 1); print_value(key, map); delete key; key=new SGParamInfo("2", cfrom, sfrom, pfrom, 2); print_value(key, map); delete key; key=new SGParamInfo("2", cto, sfrom, pfrom, 2); print_value(key, map); delete key; key=new SGParamInfo("2", cfrom, sto, pfrom, 2); print_value(key, map); delete key; key=new SGParamInfo("2", cfrom, sfrom, pto, 2); print_value(key, map); delete key; key=new SGParamInfo("5", cfrom, sfrom, pfrom, 4); print_value(key, map); delete key; delete map; } void test_mapping_0() { /* test multiple values per key */ ParameterMap* map=new ParameterMap(); EContainerType cfrom=CT_SCALAR; EContainerType cto=CT_MATRIX; EStructType sfrom=ST_NONE; EStructType sto=ST_STRING; EPrimitiveType pfrom=PT_BOOL; EPrimitiveType pto=PT_SGOBJECT; /* 3 equal keys */ map->put(new SGParamInfo("1", cfrom, sfrom, pfrom, 2), new SGParamInfo("eins a", cto, sto, pto, 1)); map->put(new SGParamInfo("1", cfrom, sfrom, pfrom, 2), new SGParamInfo("eins b", cto, sto, pto, 1)); map->put(new SGParamInfo("1", cfrom, sfrom, pfrom, 2), new SGParamInfo("eins c", cto, sto, pto, 1)); /* 2 equal keys */ map->put(new SGParamInfo("2", cfrom, sfrom, pfrom, 2), new SGParamInfo("zwei a", cto, sto, pto, 1)); map->put(new SGParamInfo("2", cfrom, sfrom, pfrom, 2), new SGParamInfo("zwei b", cto, sto, pto, 1)); map->finalize_map(); SG_SPRINT("printing finalized map\n"); map->print_map(); /* assert that all is there */ DynArray<const SGParamInfo*>* result; bool found; /* key 0 */ result=map->get(SGParamInfo("1", cfrom, sfrom, pfrom, 2)); ASSERT(result); /* first value element */ found=false; for (index_t i=0; i<result->get_num_elements(); ++i) { if (*result->get_element(i) == SGParamInfo("eins a", cto, sto, pto, 1)) found=true; } ASSERT(found); /* second value element */ found=false; for (index_t i=0; i<result->get_num_elements(); ++i) { if (*result->get_element(i) == SGParamInfo("eins b", cto, sto, pto, 1)) found=true; } ASSERT(found); /* third value element */ found=false; for (index_t i=0; i<result->get_num_elements(); ++i) { if (*result->get_element(i) == SGParamInfo("eins c", cto, sto, pto, 1)) found=true; } ASSERT(found); /* key 1 */ result=map->get(SGParamInfo("2", cfrom, sfrom, pfrom, 2)); ASSERT(result); /* first value element */ found=false; for (index_t i=0; i<result->get_num_elements(); ++i) { if (*result->get_element(i) == SGParamInfo("zwei a", cto, sto, pto, 1)) found=true; } ASSERT(found); /* second value element */ found=false; for (index_t i=0; i<result->get_num_elements(); ++i) { if (*result->get_element(i) == SGParamInfo("zwei b", cto, sto, pto, 1)) found=true; } ASSERT(found); delete map; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_mapping_0(); test_mapping_1(); test_mapping_2(); exit_shogun(); return 0; }
#include <shogun/base/init.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ConjugateIndex.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CMulticlassLabels* labels=new CMulticlassLabels(3); labels->set_label(0, 0); labels->set_label(1, +1); labels->set_label(2, 0); CConjugateIndex* ci = new CConjugateIndex(features,labels); ci->train(); // classify on training examples for (int32_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, ci->apply_one(i)); // free up memory SG_UNREF(ci); exit_shogun(); return 0; }
#include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/FeatureBlockLogisticRegression.h> #include <shogun/lib/IndexBlock.h> #include <shogun/lib/IndexBlockTree.h> #include <shogun/lib/IndexBlockGroup.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun_with_defaults(); // create some data SGMatrix<float64_t> matrix(4,4); for (int32_t i=0; i<4*4; i++) matrix.matrix[i]=i; CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CBinaryLabels* labels=new CBinaryLabels(4); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); labels->set_label(3, +1); CIndexBlock* first_block = new CIndexBlock(0,2); CIndexBlock* second_block = new CIndexBlock(2,4); CIndexBlockGroup* block_group = new CIndexBlockGroup(); block_group->add_block(first_block); block_group->add_block(second_block); CFeatureBlockLogisticRegression* regressor = new CFeatureBlockLogisticRegression(0.5,features,labels,block_group); regressor->train(); regressor->get_w().display_vector(); CIndexBlock* root_block = new CIndexBlock(0,4); root_block->add_sub_block(first_block); root_block->add_sub_block(second_block); CIndexBlockTree* block_tree = new CIndexBlockTree(root_block); regressor->set_feature_relation(block_tree); regressor->train(); regressor->get_w().display_vector(); SG_UNREF(regressor); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/GaussianNaiveBayes.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CMulticlassLabels* labels=new CMulticlassLabels(3); labels->set_label(0, 0); labels->set_label(1, +1); labels->set_label(2, +2); CGaussianNaiveBayes* ci = new CGaussianNaiveBayes(features,labels); ci->train(); // classify on training examples for (int32_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, ci->apply_one(i)); // free up memory SG_UNREF(ci); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/multiclass/LaRank.h> #include <shogun/base/init.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); index_t num_vec=3; index_t num_feat=2; index_t num_class=2; // create some data SGMatrix<float64_t> matrix(num_feat, num_vec); SGVector<float64_t>::range_fill_vector(matrix.matrix, num_feat*num_vec); // create vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); // create three labels CMulticlassLabels* labels=new CMulticlassLabels(num_vec); for (index_t i=0; i<num_vec; ++i) labels->set_label(i, i%num_class); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CLaRank* svm = new CLaRank(10, kernel, labels); svm->train(); svm->train(); // classify on training examples CMulticlassLabels* output=CMulticlassLabels::obtain_from_generic(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "batch output"); /* assert that batch apply and apply(index_t) give same result */ for (index_t i=0; i<output->get_num_labels(); ++i) { float64_t label=svm->apply_one(i); SG_SPRINT("single output[%d]=%f\n", i, label); ASSERT(output->get_label(i)==label); } SG_UNREF(output); // free up memory SG_UNREF(svm); exit_shogun(); return 0; }
#include <shogun/labels/LatentLabels.h> #include <shogun/features/LatentFeatures.h> #include <shogun/latent/LatentSVM.h> #include <shogun/features/DenseFeatures.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <libgen.h> using namespace shogun; #define MAX_LINE_LENGTH 4096 #define HOG_SIZE 1488 struct CBoundingBox : public CData { CBoundingBox(int32_t x, int32_t y) : CData(), x_pos(x), y_pos(y) {}; int32_t x_pos, y_pos; /** @return name of SGSerializable */ virtual const char* get_name() const { return "BoundingBox"; } }; struct CHOGFeatures : public CData { CHOGFeatures(int32_t w, int32_t h) : CData(), width(w), height(h) {}; int32_t width, height; float64_t ***hog; /** @return name of SGSerializable */ virtual const char* get_name() const { return "HOGFeatures"; } }; class CObjectDetector: public CLatentModel { public: CObjectDetector() {} CObjectDetector(CLatentFeatures* feat, CLatentLabels* labels) : CLatentModel(feat, labels) {} virtual ~CObjectDetector() {} virtual int32_t get_dim() const { return HOG_SIZE; } virtual CDotFeatures* get_psi_feature_vectors() { int32_t num_examples = this->get_num_vectors(); int32_t dim = this->get_dim(); SGMatrix<float64_t> psi_m(dim, num_examples); for (int32_t i = 0; i < num_examples; ++i) { CHOGFeatures* hf = (CHOGFeatures*) m_features->get_sample(i); CBoundingBox* bb = (CBoundingBox*) m_labels->get_latent_label(i); memcpy(psi_m.matrix+i*dim, hf->hog[bb->x_pos][bb->y_pos], dim*sizeof(float64_t)); } CDenseFeatures<float64_t>* psi_feats = new CDenseFeatures<float64_t>(psi_m); return psi_feats; } virtual CData* infer_latent_variable(const SGVector<float64_t>& w, index_t idx) { int32_t pos_x = 0, pos_y = 0; float64_t max_score = -CMath::INFTY; CHOGFeatures* hf = (CHOGFeatures*) m_features->get_sample(idx); for (int i = 0; i < hf->width; ++i) { for (int j = 0; j < hf->height; ++j) { float64_t score = w.dot(w.vector, hf->hog[i][j], w.vlen); if (score > max_score) { pos_x = i; pos_y = j; max_score = score; } } } SG_SDEBUG("%d %d %f\n", pos_x, pos_y, max_score); CBoundingBox* h = new CBoundingBox(pos_x, pos_y); SG_REF(h); return h; } }; static void read_dataset(char* fname, CLatentFeatures*& feats, CLatentLabels*& labels) { FILE* fd = fopen(fname, "r"); char line[MAX_LINE_LENGTH]; char *pchar, *last_pchar; int num_examples,label,height,width; char* path = dirname(fname); if (fd == NULL) SG_SERROR("Cannot open input file %s!\n", fname); fgets(line, MAX_LINE_LENGTH, fd); num_examples = atoi(line); labels = new CLatentLabels(num_examples); SG_REF(labels); CBinaryLabels* ys = new CBinaryLabels(num_examples); feats = new CLatentFeatures(num_examples); SG_REF(feats); CMath::init_random(); for (int i = 0; (!feof(fd)) && (i < num_examples); ++i) { fgets(line, MAX_LINE_LENGTH, fd); pchar = line; while ((*pchar)!=' ') pchar++; *pchar = '\0'; pchar++; /* label: {-1, 1} */ last_pchar = pchar; while ((*pchar)!=' ') pchar++; *pchar = '\0'; label = (atoi(last_pchar) % 2 == 0) ? 1 : -1; pchar++; if (ys->set_label(i, label) == false) SG_SERROR("Couldn't set label for element %d\n", i); last_pchar = pchar; while ((*pchar)!=' ') pchar++; *pchar = '\0'; width = atoi(last_pchar); pchar++; last_pchar = pchar; while ((*pchar)!='\n') pchar++; *pchar = '\0'; height = atoi(last_pchar); /* create latent label */ int x = CMath::random(0, width-1); int y = CMath::random(0, height-1); CBoundingBox* bb = new CBoundingBox(x,y); labels->add_latent_label(bb); SG_SPROGRESS(i, 0, num_examples); CHOGFeatures* hog = new CHOGFeatures(width, height); hog->hog = SG_CALLOC(float64_t**, hog->width); for (int j = 0; j < width; ++j) { hog->hog[j] = SG_CALLOC(float64_t*, hog->height); for (int k = 0; k < height; ++k) { char filename[MAX_LINE_LENGTH]; hog->hog[j][k] = SG_CALLOC(float64_t, HOG_SIZE); sprintf(filename,"%s/%s.%03d.%03d.txt",path,line,j,k); FILE* f = fopen(filename, "r"); if (f == NULL) SG_SERROR("Could not open file: %s\n", filename); for (int l = 0; l < HOG_SIZE; ++l) fscanf(f,"%lf",&hog->hog[j][k][l]); fclose(f); } } feats->add_sample(hog); } fclose(fd); labels->set_labels(ys); SG_SDONE(); } int main(int argc, char** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); /* check whether the train/test args are given */ if (argc < 3) { SG_SERROR("not enough arguements given\n"); } CLatentFeatures* train_feats = NULL; CLatentLabels* train_labels = NULL; /* read train data set */ read_dataset(argv[1], train_feats, train_labels); /* train the classifier */ float64_t C = 10.0; CObjectDetector* od = new CObjectDetector(train_feats, train_labels); CLatentSVM llm(od, C); llm.train(); // CLatentFeatures* test_feats = NULL; // CLatentLabels* test_labels = NULL; // read_dataset(argv[2], test_feats, test_labels); SG_SPRINT("Testing with the test set\n"); llm.apply(train_feats); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2008-2009 Soeren Sonnenburg * Written (W) 2012 Heiko Strathmann * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society */ #include <shogun/kernel/GaussianKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> using namespace shogun; void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat, float64_t dist) { index_t dims=feat.num_rows; index_t num=lab.vlen; for (int32_t i=0; i<num; i++) { if (i<num/2) { lab[i]=-1.0; for (int32_t j=0; j<dims; j++) feat(j, i)=CMath::random(0.0, 1.0)+dist; } else { lab[i]=1.0; for (int32_t j=0; j<dims; j++) feat(j, i)=CMath::random(0.0, 1.0)-dist; } } lab.display_vector("lab"); feat.display_matrix("feat"); } void test_libsvm() { const int32_t feature_cache=0; const int32_t kernel_cache=0; const float64_t rbf_width=10; const float64_t svm_C=10; const float64_t svm_eps=0.001; index_t num=100; index_t dims=2; float64_t dist=0.5; SGVector<float64_t> lab(num); SGMatrix<float64_t> feat(dims, num); gen_rand_data(lab, feat, dist); // create train labels CLabels* labels=new CBinaryLabels(lab); // create train features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>( feature_cache); SG_REF(features); features->set_feature_matrix(feat); // create gaussian kernel CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kernel); kernel->init(features, features); // create svm via libsvm and train CLibSVM* svm=new CLibSVM(svm_C, kernel, labels); SG_REF(svm); svm->set_epsilon(svm_eps); svm->train(); SG_SPRINT("num_sv:%d b:%f\n", svm->get_num_support_vectors(), svm->get_bias()); // classify + display output CBinaryLabels* out_labels=CBinaryLabels::obtain_from_generic(svm->apply()); for (int32_t i=0; i<num; i++) { SG_SPRINT("out[%d]=%f (%f)\n", i, out_labels->get_label(i), out_labels->get_value(i)); } SG_UNREF(out_labels); SG_UNREF(kernel); SG_UNREF(features); SG_UNREF(svm); } int main() { init_shogun(); test_libsvm(); exit_shogun(); return 0; }
#include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(); features->set_feature_matrix(matrix); // create three labels CBinaryLabels* labels=new CBinaryLabels(3); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CLibSVM* svm = new CLibSVM(10, kernel, labels); svm->train(); // classify on training examples for (int32_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, svm->apply_one(i)); // free up memory SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Alexander Binder * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <iostream> #include <shogun/io/SGIO.h> #include <shogun/lib/ShogunException.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/classifier/mkl/MKLMulticlass.h> // g++ -Wall -O3 classifier_mklmulticlass.cpp -I /home/theseus/private/alx/shoguntrunk/compiledtmp/include -L/home/theseus/private/alx/shoguntrunk/compiledtmp/lib -lshogun using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_warning(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_error(FILE* target, const char* str) { fprintf(target, "%s", str); } void getgauss(float64_t & y1, float64_t & y2) { float x1, x2, w; do { x1 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0; x2 = 2.0 * rand()/(float64_t)RAND_MAX - 1.0; w = x1 * x1 + x2 * x2; } while ( (w >= 1.0)|| (w<1e-9) ); w = sqrt( (-2.0 * log( w ) ) / w ); y1 = x1 * w; y2 = x2 * w; } void gendata(std::vector<float64_t> & x,std::vector<float64_t> & y, CMulticlassLabels*& lab) { int32_t totalsize=240; int32_t class1size=80; int32_t class2size=70; //generating three class data set x.resize(totalsize); y.resize(totalsize); for(size_t i=0; i< x.size();++i) getgauss(x[i], y[i]); for(size_t i=0; i< x.size();++i) { if((int32_t)i < class1size) { x[i]+=0; y[i]+=0; } else if( (int32_t)i< class1size+class2size) { x[i]+=+1; y[i]+=-1; } else { x[i]+=-1; y[i]+=+1; } } //set labels lab=new CMulticlassLabels(x.size()); for(size_t i=0; i< x.size();++i) { if((int32_t)i < class1size) lab->set_int_label(i,0); else if( (int32_t)i< class1size+class2size) lab->set_int_label(i,1); else lab->set_int_label(i,2); } } void gentrainkernel(float64_t * & ker1 ,float64_t * & ker2, float64_t * & ker3 ,float64_t & autosigma,float64_t & n1,float64_t & n2, float64_t & n3, const std::vector<float64_t> & x, const std::vector<float64_t> & y) { autosigma=0; for(size_t l=0; l< x.size();++l) { for(size_t r=0; r<= l;++r) { float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r])); autosigma+=dist*2.0/(float64_t)x.size()/((float64_t)x.size()+1); } } float64_t fm1=0, mean1=0,fm2=0, mean2=0,fm3=0, mean3=0; ker1=SG_MALLOC(float64_t, x.size()*x.size()); ker2=SG_MALLOC(float64_t, x.size()*x.size()); ker3=SG_MALLOC(float64_t, x.size()*x.size()); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< x.size();++r) { float64_t dist=((x[l]-x[r])*(x[l]-x[r]) + (y[l]-y[r])*(y[l]-y[r])); ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ; //ker2[l +r*x.size()]= exp( -dist/sigma2/sigma2) ; ker2[l +r*x.size()]= x[l]*x[r] + y[l]*y[r]; ker3[l +r*x.size()]= (x[l]*x[r] + y[l]*y[r]+1)*(x[l]*x[r] + y[l]*y[r]+1); fm1+=ker1[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); fm2+=ker2[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); fm3+=ker3[l +r*x.size()]/(float64_t)x.size()/((float64_t)x.size()); if(l==r) { mean1+=ker1[l +r*x.size()]/(float64_t)x.size(); mean2+=ker2[l +r*x.size()]/(float64_t)x.size(); mean3+=ker3[l +r*x.size()]/(float64_t)x.size(); } } } n1=(mean1-fm1); n2=(mean2-fm2); n3=(mean3-fm3); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< x.size();++r) { ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1; ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2; ker3[l +r*x.size()]=ker3[l +r*x.size()]/n3; } } } void gentestkernel(float64_t * & ker1 ,float64_t * & ker2,float64_t * & ker3, const float64_t autosigma,const float64_t n1,const float64_t n2, const float64_t n3, const std::vector<float64_t> & x,const std::vector<float64_t> & y, const std::vector<float64_t> & tx,const std::vector<float64_t> & ty) { ker1=SG_MALLOC(float64_t, x.size()*tx.size()); ker2=SG_MALLOC(float64_t, x.size()*tx.size()); ker3=SG_MALLOC(float64_t, x.size()*tx.size()); for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< tx.size();++r) { float64_t dist=((x[l]-tx[r])*(x[l]-tx[r]) + (y[l]-ty[r])*(y[l]-ty[r])); ker1[l +r*x.size()]= exp( -dist/autosigma/autosigma) ; ker2[l +r*x.size()]= x[l]*tx[r] + y[l]*ty[r]; ker3[l +r*x.size()]= (x[l]*tx[r] + y[l]*ty[r]+1)*(x[l]*tx[r] + y[l]*ty[r]+1); } } for(size_t l=0; l< x.size();++l) { for(size_t r=0; r< tx.size();++r) { ker1[l +r*x.size()]=ker1[l +r*x.size()]/n1; ker2[l +r*x.size()]=ker2[l +r*x.size()]/n2; ker3[l +r*x.size()]=ker3[l +r*x.size()]/n2; } } } void tester() { CMulticlassLabels* lab=NULL; std::vector<float64_t> x,y; gendata(x,y, lab); SG_REF(lab); float64_t* ker1=NULL; float64_t* ker2=NULL; float64_t* ker3=NULL; float64_t autosigma=1; float64_t n1=0; float64_t n2=0; float64_t n3=0; int32_t numdata=0; gentrainkernel( ker1 , ker2, ker3 , autosigma, n1, n2, n3,x,y); numdata=x.size(); CCombinedKernel* ker=new CCombinedKernel(); CCustomKernel* kernel1=new CCustomKernel(); CCustomKernel* kernel2=new CCustomKernel(); CCustomKernel* kernel3=new CCustomKernel(); kernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker1, numdata,numdata,false)); kernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker2, numdata,numdata,false)); kernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(ker3, numdata,numdata,false)); SG_FREE(ker1); SG_FREE(ker2); SG_FREE(ker3); ker->append_kernel(kernel1); ker->append_kernel(kernel2); ker->append_kernel(kernel3); //here comes the core stuff float64_t regconst=1.0; CMKLMulticlass* tsvm =new CMKLMulticlass(regconst, ker, lab); tsvm->set_epsilon(0.0001); // SVM epsilon // MKL parameters tsvm->set_mkl_epsilon(0.01); // subkernel weight L2 norm termination criterion tsvm->set_max_num_mkliters(120); // well it will be just three iterations tsvm->set_mkl_norm(1.5); // mkl norm //starting svm training tsvm->train(); SG_SPRINT("finished svm training\n"); //starting svm testing on training data CMulticlassLabels* res=CMulticlassLabels::obtain_from_generic(tsvm->apply()); ASSERT(res); float64_t err=0; for(int32_t i=0; i<numdata;++i) { ASSERT(i< res->get_num_labels()); if (lab->get_int_label(i)!=res->get_int_label(i)) err+=1; } err/=(float64_t)res->get_num_labels(); SG_SPRINT("prediction error on training data (3 classes): %f ",err); SG_SPRINT("random guess error would be: %f \n",2/3.0); //generate test data CMulticlassLabels* tlab=NULL; std::vector<float64_t> tx,ty; gendata( tx,ty,tlab); SG_REF(tlab); float64_t* tker1=NULL; float64_t* tker2=NULL; float64_t* tker3=NULL; gentestkernel(tker1,tker2,tker3, autosigma, n1,n2,n3, x,y, tx,ty); int32_t numdatatest=tx.size(); CCombinedKernel* tker=new CCombinedKernel(); SG_REF(tker); CCustomKernel* tkernel1=new CCustomKernel(); CCustomKernel* tkernel2=new CCustomKernel(); CCustomKernel* tkernel3=new CCustomKernel(); tkernel1->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker1,numdata, numdatatest, false)); tkernel2->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest, false)); tkernel3->set_full_kernel_matrix_from_full(SGMatrix<float64_t>(tker2,numdata, numdatatest, false)); SG_FREE(tker1); SG_FREE(tker2); SG_FREE(tker3); tker->append_kernel(tkernel1); tker->append_kernel(tkernel2); tker->append_kernel(tkernel3); int32_t numweights; float64_t* weights=tsvm->getsubkernelweights(numweights); SG_SPRINT("test kernel weights\n"); for(int32_t i=0; i< numweights;++i) SG_SPRINT("%f ", weights[i]); SG_SPRINT("\n"); //set kernel tker->set_subkernel_weights(SGVector<float64_t>(weights, numweights)); tsvm->set_kernel(tker); //compute classification error, check mem CMulticlassLabels* tres=CMulticlassLabels::obtain_from_generic(tsvm->apply()); float64_t terr=0; for(int32_t i=0; i<numdatatest;++i) { ASSERT(i< tres->get_num_labels()); if(tlab->get_int_label(i)!=tres->get_int_label(i)) terr+=1; } terr/=(float64_t) tres->get_num_labels(); SG_SPRINT("prediction error on test data (3 classes): %f ",terr); SG_SPRINT("random guess error would be: %f \n",2/3.0); SG_UNREF(tsvm); SG_UNREF(res); SG_UNREF(tres); SG_UNREF(lab); SG_UNREF(tlab); SG_UNREF(tker); SG_SPRINT( "finished \n"); } namespace shogun { extern Version* sg_version; extern SGIO* sg_io; } int main() { init_shogun(&print_message, &print_warning, &print_error); try { sg_version->print_version(); sg_io->set_loglevel(MSG_INFO); tester(); } catch(ShogunException & sh) { printf("%s",sh.get_exception_string()); } exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ecoc/ECOCStrategy.h> #include <shogun/multiclass/ecoc/ECOCOVREncoder.h> #include <shogun/multiclass/ecoc/ECOCHDDecoder.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; void test() { // Prepare to read a file for the training data char fname_feats[] = "../data/fm_train_real.dat"; char fname_labels[] = "../data/label_train_multiclass.dat"; CStreamingAsciiFile* ffeats_train = new CStreamingAsciiFile(fname_feats); CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels); SG_REF(ffeats_train); SG_REF(flabels_train); CStreamingDenseFeatures< float64_t >* stream_features = new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024); CStreamingDenseFeatures< float64_t >* stream_labels = new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024); SG_REF(stream_features); SG_REF(stream_labels); stream_features->start_parser(); // Read the values from the file and store them in features CDenseFeatures< float64_t >* features= (CDenseFeatures< float64_t >*) stream_features->get_streamed_features(1000); stream_features->end_parser(); CMulticlassLabels* labels = new CMulticlassLabels(features->get_num_vectors()); SG_REF(features); SG_REF(labels); // Read the labels from the file int32_t idx = 0; stream_labels->start_parser(); while ( stream_labels->get_next_example() ) { labels->set_int_label( idx++, (int32_t)stream_labels->get_label() ); stream_labels->release_example(); } stream_labels->end_parser(); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CECOCStrategy(new CECOCOVREncoder(), new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen); // Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(ffeats_train); SG_UNREF(flabels_train); SG_UNREF(stream_features); SG_UNREF(stream_labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ecoc/ECOCStrategy.h> #include <shogun/multiclass/ecoc/ECOCDiscriminantEncoder.h> #include <shogun/multiclass/ecoc/ECOCHDDecoder.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; void test() { // Prepare to read a file for the training data char fname_feats[] = "../data/fm_train_real.dat"; char fname_labels[] = "../data/label_train_multiclass.dat"; CStreamingAsciiFile* ffeats_train = new CStreamingAsciiFile(fname_feats); CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels); SG_REF(ffeats_train); SG_REF(flabels_train); CStreamingDenseFeatures< float64_t >* stream_features = new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024); CStreamingDenseFeatures< float64_t >* stream_labels = new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024); SG_REF(stream_features); SG_REF(stream_labels); stream_features->start_parser(); // Read the values from the file and store them in features CDenseFeatures< float64_t >* features= (CDenseFeatures< float64_t >*) stream_features->get_streamed_features(1000); stream_features->end_parser(); CMulticlassLabels* labels = new CMulticlassLabels(features->get_num_vectors()); SG_REF(features); SG_REF(labels); // Read the labels from the file int32_t idx = 0; stream_labels->start_parser(); while ( stream_labels->get_next_example() ) { labels->set_int_label( idx++, (int32_t)stream_labels->get_label() ); stream_labels->release_example(); } stream_labels->end_parser(); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); CECOCDiscriminantEncoder *encoder = new CECOCDiscriminantEncoder(); encoder->set_features(features); encoder->set_labels(labels); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CECOCStrategy(encoder, new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen); // Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(ffeats_train); SG_UNREF(flabels_train); SG_UNREF(stream_features); SG_UNREF(stream_labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/ecoc/ECOCStrategy.h> #include <shogun/multiclass/ecoc/ECOCRandomDenseEncoder.h> #include <shogun/multiclass/ecoc/ECOCRandomSparseEncoder.h> #include <shogun/multiclass/ecoc/ECOCHDDecoder.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; void test() { // Prepare to read a file for the training data char fname_feats[] = "../data/fm_train_real.dat"; char fname_labels[] = "../data/label_train_multiclass.dat"; CStreamingAsciiFile* ffeats_train = new CStreamingAsciiFile(fname_feats); CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels); SG_REF(ffeats_train); SG_REF(flabels_train); CStreamingDenseFeatures< float64_t >* stream_features = new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024); CStreamingDenseFeatures< float64_t >* stream_labels = new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024); SG_REF(stream_features); SG_REF(stream_labels); stream_features->start_parser(); // Read the values from the file and store them in features CDenseFeatures< float64_t >* features= (CDenseFeatures< float64_t >*) stream_features->get_streamed_features(1000); stream_features->end_parser(); CMulticlassLabels* labels = new CMulticlassLabels(features->get_num_vectors()); SG_REF(features); SG_REF(labels); // Read the labels from the file int32_t idx = 0; stream_labels->start_parser(); while ( stream_labels->get_next_example() ) { labels->set_int_label( idx++, (int32_t)stream_labels->get_label() ); stream_labels->release_example(); } stream_labels->end_parser(); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CECOCStrategy(new CECOCRandomDenseEncoder(), new CECOCHDDecoder()), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector< int32_t >::display_vector(out_labels.vector, out_labels.vlen); // Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(ffeats_train); SG_UNREF(flabels_train); SG_UNREF(stream_features); SG_UNREF(stream_labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
#include <algorithm> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DenseSubsetFeatures.h> #include <shogun/base/init.h> #include <shogun/multiclass/tree/RelaxedTree.h> #include <shogun/multiclass/MulticlassLibLinear.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/kernel/GaussianKernel.h> #define EPSILON 1e-5 using namespace shogun; int main(int argc, char** argv) { int32_t num_vectors = 0; int32_t num_feats = 0; init_shogun_with_defaults(); const char*fname_train = "../data/7class_example4_train.dense"; CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train); SG_REF(train_file); CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024); SG_REF(stream_features); SGMatrix<float64_t> mat; SGVector<float64_t> labvec(1000); stream_features->start_parser(); SGVector< float64_t > vec; int32_t num_vec=0; while (stream_features->get_next_example()) { vec = stream_features->get_vector(); if (num_feats == 0) { num_feats = vec.vlen; mat = SGMatrix<float64_t>(num_feats, 1000); } std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors)); labvec[num_vectors] = stream_features->get_label(); num_vectors++; stream_features->release_example(); num_vec++; if (num_vec > 20000) break; } stream_features->end_parser(); mat.num_cols = num_vectors; labvec.vlen = num_vectors; CMulticlassLabels* labels = new CMulticlassLabels(labvec); SG_REF(labels); // Create features with the useful values from mat CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat); SG_REF(features); // Create RelaxedTree Machine CRelaxedTree *machine = new CRelaxedTree(); SG_REF(machine); machine->set_labels(labels); CKernel *kernel = new CGaussianKernel(); SG_REF(kernel); machine->set_kernel(kernel); CMulticlassLibLinear *svm = new CMulticlassLibLinear(); machine->set_machine_for_confusion_matrix(svm); machine->train(features); CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply()); CMulticlassAccuracy *evaluator = new CMulticlassAccuracy(); SG_SPRINT("Accuracy = %.4f\n", evaluator->evaluate(output, labels)); // Free resources SG_UNREF(machine); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(train_file); SG_UNREF(stream_features); SG_UNREF(evaluator); SG_UNREF(kernel); exit_shogun(); return 0; }
#include <algorithm> #include <shogun/labels/MulticlassLabels.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/StreamingDenseFeatures.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DenseSubsetFeatures.h> #include <shogun/base/init.h> #include <shogun/multiclass/ShareBoost.h> #define EPSILON 1e-5 using namespace shogun; int main(int argc, char** argv) { int32_t num_vectors = 0; int32_t num_feats = 0; init_shogun_with_defaults(); const char*fname_train = "../data/7class_example4_train.dense"; CStreamingAsciiFile *train_file = new CStreamingAsciiFile(fname_train); SG_REF(train_file); CStreamingDenseFeatures<float64_t> *stream_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024); SG_REF(stream_features); SGMatrix<float64_t> mat; SGVector<float64_t> labvec(1000); stream_features->start_parser(); SGVector< float64_t > vec; while (stream_features->get_next_example()) { vec = stream_features->get_vector(); if (num_feats == 0) { num_feats = vec.vlen; mat = SGMatrix<float64_t>(num_feats, 1000); } std::copy(vec.vector, vec.vector+vec.vlen, mat.get_column_vector(num_vectors)); labvec[num_vectors] = stream_features->get_label(); num_vectors++; stream_features->release_example(); } stream_features->end_parser(); mat.num_cols = num_vectors; labvec.vlen = num_vectors; CMulticlassLabels* labels = new CMulticlassLabels(labvec); SG_REF(labels); // Create features with the useful values from mat CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(mat); SG_REF(features); SG_SPRINT("Performing ShareBoost on a %d-class problem\n", labels->get_num_classes()); // Create ShareBoost Machine CShareBoost *machine = new CShareBoost(features, labels, 10); SG_REF(machine); machine->train(); SGVector<int32_t> activeset = machine->get_activeset(); SG_SPRINT("%d out of %d features are selected:\n", activeset.vlen, mat.num_rows); for (int32_t i=0; i < activeset.vlen; ++i) SG_SPRINT("activeset[%02d] = %d\n", i, activeset[i]); CDenseSubsetFeatures<float64_t> *subset_fea = new CDenseSubsetFeatures<float64_t>(features, machine->get_activeset()); SG_REF(subset_fea); CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(machine->apply(subset_fea)); int32_t correct = 0; for (int32_t i=0; i < output->get_num_labels(); ++i) if (output->get_int_label(i) == labels->get_int_label(i)) correct++; SG_SPRINT("Accuracy = %.4f\n", float64_t(correct)/labels->get_num_labels()); // Free resources SG_UNREF(machine); SG_UNREF(output); SG_UNREF(subset_fea); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(train_file); SG_UNREF(stream_features); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/multiclass/MulticlassLibSVM.h> #include <shogun/base/init.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); index_t num_vec=3; index_t num_feat=2; index_t num_class=2; // create some data SGMatrix<float64_t> matrix(num_feat, num_vec); SGVector<float64_t>::range_fill_vector(matrix.matrix, num_feat*num_vec); // create vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); // create three labels CMulticlassLabels* labels=new CMulticlassLabels(num_vec); for (index_t i=0; i<num_vec; ++i) labels->set_label(i, i%num_class); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(10, 0.5); kernel->init(features, features); // create libsvm with C=10 and train CMulticlassLibSVM* svm = new CMulticlassLibSVM(10, kernel, labels); svm->train(); // classify on training examples CMulticlassLabels* output=CMulticlassLabels::obtain_from_generic(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "batch output"); /* assert that batch apply and apply(index_t) give same result */ for (index_t i=0; i<output->get_num_labels(); ++i) { float64_t label=svm->apply_one(i); SG_SPRINT("single output[%d]=%f\n", i, label); ASSERT(output->get_label(i)==label); } SG_UNREF(output); // free up memory SG_UNREF(svm); exit_shogun(); return 0; }
#include <shogun/labels/MulticlassLabels.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/features/DenseFeatures.h> #include <shogun/multiclass/MulticlassOneVsOneStrategy.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/base/init.h> #define EPSILON 1e-5 using namespace shogun; void test() { // Prepare to read a file for the training data char fname_feats[] = "../data/fm_train_real.dat"; char fname_labels[] = "../data/label_train_multiclass.dat"; CStreamingAsciiFile* ffeats_train = new CStreamingAsciiFile(fname_feats); CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels); SG_REF(ffeats_train); SG_REF(flabels_train); CStreamingDenseFeatures< float64_t >* stream_features = new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024); CStreamingDenseFeatures< float64_t >* stream_labels = new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024); SG_REF(stream_features); SG_REF(stream_labels); stream_features->start_parser(); // Read the values from the file and store them in features CDenseFeatures< float64_t >* features= (CDenseFeatures< float64_t >*) stream_features->get_streamed_features(1000); features->get_feature_matrix().display_matrix("FM"); SG_REF(features); stream_features->end_parser(); CMulticlassLabels* labels = new CMulticlassLabels(features->get_num_vectors()); SG_REF(labels); // Read the labels from the file int32_t idx = 0; stream_labels->start_parser(); while ( stream_labels->get_next_example() ) { labels->set_int_label( idx++, (int32_t)stream_labels->get_label() ); stream_labels->release_example(); } stream_labels->end_parser(); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); SG_REF(svm); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(true); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CMulticlassOneVsOneStrategy(), (CDotFeatures*) features, svm, labels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); // Classify the training examples and show the results CMulticlassLabels* output = CMulticlassLabels::obtain_from_generic(mc_svm->apply()); SGVector< int32_t > out_labels = output->get_int_labels(); SGVector<int32_t>::display_vector(out_labels.vector, out_labels.vlen); //Free resources SG_UNREF(mc_svm); SG_UNREF(svm); SG_UNREF(output); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(ffeats_train); SG_UNREF(flabels_train); SG_UNREF(stream_features); SG_UNREF(stream_labels); } int main(int argc, char** argv) { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test(); exit_shogun(); return 0; }
#include <shogun/features/Labels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/distance/EuclideanDistance.h> #include <shogun/classifier/NearestCentroid.h> #include <shogun/base/init.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(){ init_shogun(&print_message); index_t num_vec=7; index_t num_feat=2; index_t num_class=2; // create some data SGMatrix<float64_t> matrix(num_feat, num_vec); CMath::range_fill_vector(matrix.matrix, num_feat*num_vec); // Create features ; shogun will now own the matrix created CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); CMath::display_matrix(matrix.matrix,num_feat,num_vec); //Create labels CLabels* labels=new CLabels(num_vec); for (index_t i=0; i<num_vec; ++i) labels->set_label(i, i%num_class); //Create Euclidean Distance CEuclideanDistance* distance = new CEuclideanDistance(features,features); //Create Nearest Centroid CNearestCentroid* nearest_centroid = new CNearestCentroid(distance, labels); nearest_centroid->train(); // classify on training examples CLabels* output=nearest_centroid->apply(); CMath::display_vector(output->get_labels().vector, output->get_num_labels(), "batch output"); SG_UNREF(output); // free up memory SG_UNREF(nearest_centroid); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/features/Labels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/classifier/svm/NewtonSVM.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc,char *argv[]) { init_shogun(&print_message,&print_message,&print_message);//initialising shogun without giving arguments shogun wont be able to print int32_t x_n=4,x_d=2;//X dimensions : x_n for no of datapoints and x_d for dimensionality of data SGMatrix<float64_t> fmatrix(x_d,x_n); SG_SPRINT("\nTEST 1:\n\n"); /*Initialising Feature Matrix */ for (int i=0; i<x_n*x_d; i++) fmatrix.matrix[i] = i+1; SG_SPRINT("FEATURE MATRIX :\n"); CMath::display_matrix(fmatrix.matrix,x_d,x_n); CDenseFeatures<float64_t>* features = new CDenseFeatures<float64_t>(fmatrix); SG_REF(features); /*Creating random labels */ CLabels* labels=new CLabels(x_n); // create labels, two classes labels->set_label(0,1); labels->set_label(1,-1); labels->set_label(2,1); labels->set_label(3,1); SG_REF(labels); /*Working with Newton SVM */ float64_t lambda=1.0; int32_t iter=20; CNewtonSVM *nsvm = new CNewtonSVM(lambda,features,labels,iter); SG_REF(nsvm); nsvm->train(); SG_UNREF(labels); SG_UNREF(nsvm); SG_SPRINT("TEST 2:\n\n"); x_n=5; x_d=3; SGMatrix<float64_t> fmatrix2(x_d,x_n); for (int i=0; i<x_n*x_d; i++) fmatrix2.matrix[i] = i+1; SG_SPRINT("FEATURE MATRIX :\n"); CMath::display_matrix(fmatrix2.matrix,x_d,x_n); features->set_feature_matrix(fmatrix2); SG_REF(features); /*Creating random labels */ CLabels* labels2=new CLabels(x_n); // create labels, two classes labels2->set_label(0,1); labels2->set_label(1,-1); labels2->set_label(2,1); labels2->set_label(3,1); labels2->set_label(4,-1); SG_REF(labels2); /*Working with Newton SVM */ lambda=1.0; iter=20; CNewtonSVM *nsvm2 = new CNewtonSVM(lambda,features,labels2,iter); SG_REF(nsvm2); nsvm2->train(); SG_UNREF(labels2); SG_UNREF(nsvm2); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Fernando José Iglesias GarcÃa * Copyright (C) 2012 Fernando José Iglesias GarcÃa */ #include <shogun/base/init.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/multiclass/QDA.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/lib/common.h> #include <shogun/mathematics/Math.h> using namespace shogun; #define NUM 100 #define DIMS 2 #define DIST 0.5 void gen_rand_data(SGVector< float64_t > lab, SGMatrix< float64_t > feat) { for (int32_t i = 0; i < NUM; i++) { if (i < NUM/2) { lab[i] = 0.0; for (int32_t j = 0; j < DIMS; j++) feat[i*DIMS + j] = CMath::random(0.0,1.0) + DIST; } else { lab[i] = 1.0; for (int32_t j = 0; j < DIMS; j++) feat[i*DIMS + j] = CMath::random(0.0,1.0) - DIST; } } } void test() { SGVector< float64_t > lab(NUM); SGMatrix< float64_t > feat(DIMS, NUM); gen_rand_data(lab, feat); // Create train labels CMulticlassLabels* labels = new CMulticlassLabels(lab); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feat); // Create QDA classifier CQDA* qda = new CQDA(features, labels); SG_REF(qda); qda->train(); // Classify and display output CMulticlassLabels* out_labels = CMulticlassLabels::obtain_from_generic(qda->apply()); SG_REF(out_labels); // Free memory SG_UNREF(out_labels); SG_UNREF(qda); } int main(int argc, char ** argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/clustering/KMeans.h> #include <shogun/distance/EuclideanDistance.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); int32_t num_clusters=4; int32_t num_features=11; int32_t dim_features=3; int32_t num_vectors_per_cluster=5; float64_t cluster_std_dev=2.0; /* build random cluster centers */ SGMatrix<float64_t> cluster_centers(dim_features, num_clusters); SGVector<float64_t>::random_vector(cluster_centers.matrix, dim_features*num_clusters, -10.0, 10.0); SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, cluster_centers.num_cols, "cluster centers"); /* create data around clusters */ SGMatrix<float64_t> data(dim_features, num_clusters*num_vectors_per_cluster); for (index_t i=0; i<num_clusters; ++i) { for (index_t j=0; j<dim_features; ++j) { for (index_t k=0; k<num_vectors_per_cluster; ++k) { index_t idx=i*dim_features*num_vectors_per_cluster; idx+=j; idx+=k*dim_features; float64_t entry=cluster_centers.matrix[i*dim_features+j]; data.matrix[idx]=CMath::normal_random(entry, cluster_std_dev); } } } /* create features, SG_REF to avoid deletion */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(data); SG_REF(features); /* create labels for cluster centers */ CMulticlassLabels* labels=new CMulticlassLabels(num_features); for (index_t i=0; i<num_features; ++i) labels->set_label(i, i%2==0 ? 0 : 1); /* create distance */ CEuclideanDistance* distance=new CEuclideanDistance(features, features); /* create distance machine */ CKMeans* clustering=new CKMeans(num_clusters, distance); clustering->train(features); /* build clusters */ CMulticlassLabels* result=CMulticlassLabels::obtain_from_generic(clustering->apply()); for (index_t i=0; i<result->get_num_labels(); ++i) SG_SPRINT("cluster index of vector %i: %f\n", i, result->get_label(i)); /* print cluster centers */ CDenseFeatures<float64_t>* centers= (CDenseFeatures<float64_t>*)distance->get_lhs(); SGMatrix<float64_t> centers_matrix=centers->get_feature_matrix(); SGMatrix<float64_t>::display_matrix(centers_matrix.matrix, centers_matrix.num_rows, centers_matrix.num_cols, "learned centers"); SGMatrix<float64_t>::display_matrix(cluster_centers.matrix, cluster_centers.num_rows, cluster_centers.num_cols, "real centers"); /* clean up */ SG_UNREF(result); SG_UNREF(centers); SG_UNREF(clustering); SG_UNREF(labels); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/DiffusionMaps.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CDiffusionMaps* dmaps = new CDiffusionMaps(); dmaps->set_target_dim(2); dmaps->set_t(10); dmaps->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = dmaps->embed(features); SG_UNREF(embedding); SG_UNREF(dmaps); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/HessianLocallyLinearEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CHessianLocallyLinearEmbedding* hlle = new CHessianLocallyLinearEmbedding(); hlle->set_target_dim(2); hlle->set_k(8); hlle->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = hlle->embed(features); SG_UNREF(embedding); SG_UNREF(hlle); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/Isomap.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CIsomap* isomap = new CIsomap(); isomap->set_target_dim(2); isomap->set_landmark(true); isomap->set_k(4); isomap->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = isomap->embed(features); SG_UNREF(embedding); SG_UNREF(isomap); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/KernelLocallyLinearEmbedding.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CKernelLocallyLinearEmbedding* klle = new CKernelLocallyLinearEmbedding(); CKernel* kernel = new CLinearKernel(); klle->set_target_dim(2); klle->set_k(4); klle->set_kernel(kernel); klle->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = klle->embed(features); SG_UNREF(embedding); SG_UNREF(klle); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/KernelLocalTangentSpaceAlignment.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CKernelLocalTangentSpaceAlignment* kltsa = new CKernelLocalTangentSpaceAlignment(); CKernel* kernel = new CLinearKernel(); kltsa->set_target_dim(2); kltsa->set_k(4); kltsa->set_kernel(kernel); kltsa->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = kltsa->embed(features); SG_UNREF(embedding); SG_UNREF(kltsa); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LaplacianEigenmaps.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLaplacianEigenmaps* lem = new CLaplacianEigenmaps(); lem->set_target_dim(2); lem->set_k(10); lem->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lem->embed(features); SG_UNREF(embedding); SG_UNREF(lem); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LinearLocalTangentSpaceAlignment.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLinearLocalTangentSpaceAlignment* lltsa = new CLinearLocalTangentSpaceAlignment(); lltsa->set_target_dim(2); lltsa->set_k(4); lltsa->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lltsa->embed(features); SG_UNREF(embedding); SG_UNREF(lltsa); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LocalityPreservingProjections.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLocalityPreservingProjections* lpp = new CLocalityPreservingProjections(); lpp->set_target_dim(2); lpp->set_k(10); lpp->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lpp->embed(features); SG_UNREF(embedding); SG_UNREF(lpp); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LocallyLinearEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun_with_defaults(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLocallyLinearEmbedding* lle = new CLocallyLinearEmbedding(); lle->set_target_dim(2); lle->set_k(4); lle->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = lle->embed(features); SG_UNREF(embedding); SG_UNREF(lle); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/LocalTangentSpaceAlignment.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CLocalTangentSpaceAlignment* ltsa = new CLocalTangentSpaceAlignment(); ltsa->set_target_dim(2); ltsa->set_k(4); ltsa->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = ltsa->embed(features); SG_UNREF(embedding); SG_UNREF(ltsa); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/MultidimensionalScaling.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CMultidimensionalScaling* mds = new CMultidimensionalScaling(); mds->set_target_dim(2); mds->set_landmark(true); mds->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = mds->embed(features); SG_UNREF(embedding); SG_UNREF(mds); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/NeighborhoodPreservingEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); int N = 100; int dim = 3; float64_t* matrix = new double[N*dim]; for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures<double>* features = new CDenseFeatures<double>(SGMatrix<double>(matrix,dim,N)); SG_REF(features); CNeighborhoodPreservingEmbedding* npe = new CNeighborhoodPreservingEmbedding(); npe->set_target_dim(2); npe->set_k(15); npe->parallel->set_num_threads(4); CDenseFeatures<double>* embedding = npe->embed(features); SG_UNREF(embedding); SG_UNREF(npe); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Fernando José Iglesias GarcÃa * Copyright (C) 2012 Fernando José Iglesias GarcÃa */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/converter/StochasticProximityEmbedding.h> #include <shogun/mathematics/Math.h> using namespace shogun; int main() { init_shogun_with_defaults(); int N = 100; int dim = 3; // Generate toy data SGMatrix< float64_t > matrix(dim, N); for (int i=0; i<N*dim; i++) matrix[i] = CMath::sin((i/float64_t(N*dim))*3.14); CDenseFeatures< float64_t >* features = new CDenseFeatures<float64_t>(matrix); SG_REF(features); // Create embedding and set parameters for global strategy CStochasticProximityEmbedding* spe = new CStochasticProximityEmbedding(); spe->set_target_dim(2); spe->set_strategy(SPE_GLOBAL); spe->set_nupdates(40); SG_REF(spe); // Apply embedding with global strategy CDenseFeatures< float64_t >* embedding = spe->embed(features); SG_REF(embedding); // Set parameters for local strategy spe->set_strategy(SPE_LOCAL); spe->set_k(12); // Apply embedding with local strategy SG_UNREF(embedding); embedding = spe->embed(features); SG_REF(embedding); // Free memory SG_UNREF(embedding); SG_UNREF(spe); SG_UNREF(features); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=40; index_t num_features=5; /* data means -1, 1 in all components, std deviation of 3 */ SGVector<float64_t> mean_1(num_features); SGVector<float64_t> mean_2(num_features); SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -1.0); SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 1.0); float64_t sigma=3; SGVector<float64_t>::display_vector(mean_1.vector, mean_1.vlen, "mean 1"); SGVector<float64_t>::display_vector(mean_2.vector, mean_2.vlen, "mean 2"); /* fill data matrix around mean */ SGMatrix<float64_t> train_dat(num_features, num_vectors); for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<num_features; ++j) { float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0]; train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma); } } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels +/- 1 for each cluster */ SGVector<float64_t> lab(num_vectors); for (index_t i=0; i<num_vectors; ++i) lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0; CBinaryLabels* labels=new CBinaryLabels(lab); /* gaussian kernel */ int32_t kernel_cache=100; int32_t width=10; CGaussianKernel* kernel=new CGaussianKernel(kernel_cache, width); kernel->init(features, features); /* create svm via libsvm */ float64_t svm_C=10; float64_t svm_eps=0.0001; CLibSVM* svm=new CLibSVM(svm_C, kernel, labels); svm->set_epsilon(svm_eps); /* train and output */ svm->train(features); CBinaryLabels* output=CBinaryLabels::obtain_from_generic(svm->apply(features)); for (index_t i=0; i<num_vectors; ++i) SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i)); /* evaluation criterion */ CContingencyTableEvaluation* eval_crit= new CContingencyTableEvaluation(ACCURACY); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training error: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(svm, features, labels, splitting, eval_crit); cross->set_num_runs(10); cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); sg_io->set_loglevel(MSG_DEBUG); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/svm/SVMLight.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/lib/Time.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=50; index_t num_features=5; /* data means -1, 1 in all components, std deviation of sigma */ SGVector<float64_t> mean_1(num_features); SGVector<float64_t> mean_2(num_features); SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -1.0); SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 1.0); float64_t sigma=1.5; /* fill data matrix around mean */ SGMatrix<float64_t> train_dat(num_features, num_vectors); for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<num_features; ++j) { float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0]; train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma); } } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels +/- 1 for each cluster */ SGVector<float64_t> lab(num_vectors); for (index_t i=0; i<num_vectors; ++i) lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0; CBinaryLabels* labels=new CBinaryLabels(lab); /* gaussian kernel */ CGaussianKernel* kernel=new CGaussianKernel(); kernel->set_width(10); kernel->init(features, features); /* create svm via libsvm */ float64_t svm_C=1; float64_t svm_eps=0.0001; CSVM* svm=new CLibSVM(svm_C, kernel, labels); svm->set_epsilon(svm_eps); /* train and output the normal way */ SG_SPRINT("starting normal training\n"); svm->train(features); CBinaryLabels* output=CBinaryLabels::obtain_from_generic(svm->apply(features)); /* evaluation criterion */ CContingencyTableEvaluation* eval_crit= new CContingencyTableEvaluation(ACCURACY); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training accuracy: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=3; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(svm, features, labels, splitting, eval_crit); cross->set_num_runs(5); cross->set_conf_int_alpha(0.05); CCrossValidationResult* tmp; /* no locking */ index_t repetitions=5; SG_SPRINT("unlocked x-val\n"); kernel->init(features, features); cross->set_autolock(false); CTime time; time.start(); for (index_t i=0; i<repetitions; ++i) { tmp = (CCrossValidationResult*)cross->evaluate(); SG_UNREF(tmp); } time.stop(); SG_SPRINT("%f sec\n", time.cur_time_diff()); /* auto_locking in every iteration of this loop (better, not so nice) */ SG_SPRINT("locked in every iteration x-val\n"); cross->set_autolock(true); time.start(); for (index_t i=0; i<repetitions; ++i) { tmp = (CCrossValidationResult*)cross->evaluate(); SG_UNREF(tmp); } time.stop(); SG_SPRINT("%f sec\n", time.cur_time_diff()); /* lock once before, (no locking/unlocking in this loop) */ svm->data_lock(labels, features); SG_SPRINT("locked x-val\n"); time.start(); for (index_t i=0; i<repetitions; ++i) { tmp = (CCrossValidationResult*)cross->evaluate(); SG_UNREF(tmp); } time.stop(); SG_SPRINT("%f sec\n", time.cur_time_diff()); /* clean up */ SG_UNREF(cross); SG_UNREF(features); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/mkl/MKLClassification.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/CrossValidationPrintOutput.h> #include <shogun/evaluation/CrossValidationMKLStorage.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void gen_rand_data(SGVector<float64_t> lab, SGMatrix<float64_t> feat, float64_t dist) { index_t dims=feat.num_rows; index_t num=lab.vlen; for (int32_t i=0; i<num; i++) { if (i<num/2) { lab[i]=-1.0; for (int32_t j=0; j<dims; j++) feat(j, i)=CMath::random(0.0, 1.0)+dist; } else { lab[i]=1.0; for (int32_t j=0; j<dims; j++) feat(j, i)=CMath::random(0.0, 1.0)-dist; } } lab.display_vector("lab"); feat.display_matrix("feat"); } void test_mkl_cross_validation() { /* generate random data */ index_t num=10; index_t dims=2; float64_t dist=0.5; SGVector<float64_t> lab(num); SGMatrix<float64_t> feat(dims, num); gen_rand_data(lab, feat, dist); /*create train labels */ CLabels* labels=new CBinaryLabels(lab); /* create train features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); features->set_feature_matrix(feat); SG_REF(features); /* create combined features */ CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); SG_REF(comb_features); /* create multiple gaussian kernels */ CCombinedKernel* kernel=new CCombinedKernel(); kernel->append_kernel(new CGaussianKernel(10, 0.1)); kernel->append_kernel(new CGaussianKernel(10, 1)); kernel->append_kernel(new CGaussianKernel(10, 2)); kernel->init(comb_features, comb_features); SG_REF(kernel); /* create mkl using libsvm, due to a mem-bug, interleaved is not possible */ CMKLClassification* svm=new CMKLClassification(new CLibSVM()); svm->set_interleaved_optimization_enabled(false); svm->set_kernel(kernel); SG_REF(svm); /* create cross-validation instance */ index_t num_folds=3; CSplittingStrategy* split=new CStratifiedCrossValidationSplitting(labels, num_folds); CEvaluation* eval=new CContingencyTableEvaluation(ACCURACY); CCrossValidation* cross=new CCrossValidation(svm, comb_features, labels, split, eval, false); /* add print output listener and mkl storage listener */ cross->add_cross_validation_output(new CCrossValidationPrintOutput()); CCrossValidationMKLStorage* mkl_storage=new CCrossValidationMKLStorage(); cross->add_cross_validation_output(mkl_storage); /* perform cross-validation, this will print loads of information * (caused by the CCrossValidationPrintOutput instance attached to it) */ CEvaluationResult* result=cross->evaluate(); /* print mkl weights */ SGMatrix<float64_t> weights=mkl_storage->get_mkl_weights(); weights.display_matrix("mkl weights"); /* print mean and variance of each kernel weight. These could for example * been used to compute confidence intervals */ CStatistics::matrix_mean(weights, false).display_vector("mean per kernel"); CStatistics::matrix_variance(weights, false).display_vector("variance per kernel"); CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel"); SG_UNREF(result); /* again for two runs */ cross->set_num_runs(2); result=cross->evaluate(); /* print mkl weights */ weights=mkl_storage->get_mkl_weights(); weights.display_matrix("mkl weights"); /* print mean and variance of each kernel weight. These could for example * been used to compute confidence intervals */ CStatistics::matrix_mean(weights, false).display_vector("mean per kernel"); CStatistics::matrix_variance(weights, false).display_vector("variance per kernel"); CStatistics::matrix_std_deviation(weights, false).display_vector("std-dev per kernel"); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(kernel); SG_UNREF(features); SG_UNREF(comb_features); SG_UNREF(svm); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_mkl_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/multiclass/MulticlassLibLinear.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/io/SGIO.h> #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/MulticlassAccuracy.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { // Prepare to read a file for the training data char fname_feats[] = "../data/fm_train_real.dat"; char fname_labels[] = "../data/label_train_multiclass.dat"; CStreamingAsciiFile* ffeats_train = new CStreamingAsciiFile(fname_feats); CStreamingAsciiFile* flabels_train = new CStreamingAsciiFile(fname_labels); SG_REF(ffeats_train); SG_REF(flabels_train); CStreamingDenseFeatures< float64_t >* stream_features = new CStreamingDenseFeatures< float64_t >(ffeats_train, false, 1024); CStreamingDenseFeatures< float64_t >* stream_labels = new CStreamingDenseFeatures< float64_t >(flabels_train, true, 1024); SG_REF(stream_features); SG_REF(stream_labels); stream_features->start_parser(); // Read the values from the file and store them in features CDenseFeatures< float64_t >* features= (CDenseFeatures< float64_t >*) stream_features->get_streamed_features(1000); stream_features->end_parser(); CMulticlassLabels* labels = new CMulticlassLabels(features->get_num_vectors()); SG_REF(features); SG_REF(labels); // Read the labels from the file int32_t idx = 0; stream_labels->start_parser(); while ( stream_labels->get_next_example() ) { labels->set_int_label( idx++, (int32_t)stream_labels->get_label() ); stream_labels->release_example(); } stream_labels->end_parser(); /* create svm via libsvm */ float64_t svm_C=10; float64_t svm_eps=0.0001; CMulticlassLibLinear* svm=new CMulticlassLibLinear(svm_C, features, labels); svm->set_epsilon(svm_eps); /* train and output */ svm->train(features); CMulticlassLabels* output=CMulticlassLabels::obtain_from_generic(svm->apply(features)); for (index_t i=0; i<features->get_num_vectors(); ++i) SG_SPRINT("i=%d, class=%f,\n", i, output->get_label(i)); /* evaluation criterion */ CMulticlassAccuracy* eval_crit = new CMulticlassAccuracy (); /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training accuracy: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(svm, features, labels, splitting, eval_crit); cross->set_num_runs(1); cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(ffeats_train); SG_UNREF(flabels_train); SG_UNREF(stream_features); SG_UNREF(stream_labels); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); sg_io->set_loglevel(MSG_DEBUG); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 yoo, thereisnoknife@gmail.com * Written (W) 2012 Heiko Strathmann */ #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/kernel/PolyKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/classifier/mkl/MKLMulticlass.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/MulticlassAccuracy.h> using namespace shogun; void test_multiclass_mkl_cv() { /* stream data from a file */ int32_t num_vectors=50; int32_t num_feats=2; /* file data */ char fname_feats[]="../data/fm_train_real.dat"; char fname_labels[]="../data/label_train_multiclass.dat"; CStreamingAsciiFile* ffeats_train=new CStreamingAsciiFile(fname_feats); CStreamingAsciiFile* flabels_train=new CStreamingAsciiFile(fname_labels); SG_REF(ffeats_train); SG_REF(flabels_train); /* streaming data */ CStreamingDenseFeatures<float64_t>* stream_features= new CStreamingDenseFeatures<float64_t>(ffeats_train, false, 1024); CStreamingDenseFeatures<float64_t>* stream_labels= new CStreamingDenseFeatures<float64_t>(flabels_train, true, 1024); SG_REF(stream_features); SG_REF(stream_labels); /* matrix data */ SGMatrix<float64_t> mat=SGMatrix<float64_t>(num_feats, num_vectors); SGVector<float64_t> vec; stream_features->start_parser(); index_t count=0; while (stream_features->get_next_example() && count<num_vectors) { vec=stream_features->get_vector(); for (int32_t i=0; i<num_feats; ++i) mat(i,count)=vec[i]; stream_features->release_example(); count++; } stream_features->end_parser(); mat.num_cols=num_vectors; /* dense features from streamed matrix */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(mat); CMulticlassLabels* labels=new CMulticlassLabels(num_vectors); SG_REF(features); SG_REF(labels); /* read labels from file */ int32_t idx=0; stream_labels->start_parser(); while (stream_labels->get_next_example()) { labels->set_int_label(idx++, (int32_t)stream_labels->get_label()); stream_labels->release_example(); } stream_labels->end_parser(); /* combined features and kernel */ CCombinedFeatures *cfeats=new CCombinedFeatures(); CCombinedKernel *cker=new CCombinedKernel(); SG_REF(cfeats); SG_REF(cker); /** 1st kernel: gaussian */ cfeats->append_feature_obj(features); cker->append_kernel(new CGaussianKernel(features, features, 1.2, 10)); /** 2nd kernel: linear */ cfeats->append_feature_obj(features); cker->append_kernel(new CLinearKernel(features, features)); /** 3rd kernel: poly */ cfeats->append_feature_obj(features); cker->append_kernel(new CPolyKernel(features, features, 2, true, 10)); cker->init(cfeats, cfeats); /* create mkl instance */ CMKLMulticlass* mkl=new CMKLMulticlass(1.2, cker, labels); SG_REF(mkl); mkl->set_epsilon(0.00001); mkl->parallel->set_num_threads(1); mkl->set_mkl_epsilon(0.001); mkl->set_mkl_norm(1.5); /* train to see weights */ mkl->train(); cker->get_subkernel_weights().display_vector("weights"); /* cross-validation instances */ index_t n_folds=3; index_t n_runs=5; CMulticlassAccuracy* eval_crit=new CMulticlassAccuracy(); CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, n_folds); CCrossValidation *cross=new CCrossValidation(mkl, cfeats, labels, splitting, eval_crit); cross->set_autolock(false); cross->set_num_runs(n_runs); cross->set_conf_int_alpha(0.05); /* perform x-val and print result */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); SG_SPRINT("mean of %d %d-fold x-val runs: %f\n", n_runs, n_folds, result->mean); /* assert high accuracy */ ASSERT(result->mean>0.9); /* clean up */ SG_UNREF(ffeats_train); SG_UNREF(flabels_train); SG_UNREF(stream_features); SG_UNREF(stream_labels); SG_UNREF(features); SG_UNREF(labels); SG_UNREF(cfeats); SG_UNREF(cker); SG_UNREF(mkl); SG_UNREF(cross); SG_UNREF(result); } int main(int argc, char** argv){ shogun::init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); /* performs cross-validation on a multi-class mkl machine */ test_multiclass_mkl_cv(); exit_shogun(); }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/regression/KernelRidgeRegression.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/CrossValidationSplitting.h> #include <shogun/evaluation/MeanSquaredError.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=100; index_t num_features=1; /* training label data */ SGVector<float64_t> lab(num_vectors); /* fill data matrix and labels */ SGMatrix<float64_t> train_dat(num_features, num_vectors); SGVector<float64_t>::range_fill_vector(train_dat.matrix, num_vectors); for (index_t i=0; i<num_vectors; ++i) { /* labels are linear plus noise */ lab.vector[i]=i+CMath::normal_random(0, 1.0); } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels */ CRegressionLabels* labels=new CRegressionLabels(lab); /* kernel */ CLinearKernel* kernel=new CLinearKernel(); kernel->init(features, features); /* kernel ridge regression*/ float64_t tau=0.0001; CKernelRidgeRegression* krr=new CKernelRidgeRegression(tau, kernel, labels); /* evaluation criterion */ CMeanSquaredError* eval_crit= new CMeanSquaredError(); /* train and output */ krr->train(features); CRegressionLabels* output= CRegressionLabels::obtain_from_generic(krr->apply()); for (index_t i=0; i<num_vectors; ++i) { SG_SPRINT("x=%f, train=%f, predict=%f\n", train_dat.matrix[i], labels->get_label(i), output->get_label(i)); } /* evaluate training error */ float64_t eval_result=eval_crit->evaluate(output, labels); SG_SPRINT("training error: %f\n", eval_result); SG_UNREF(output); /* assert that regression "works". this is not guaranteed to always work * but should be a really coarse check to see if everything is going * approx. right */ ASSERT(eval_result<2); /* splitting strategy */ index_t n_folds=5; CCrossValidationSplitting* splitting= new CCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(krr, features, labels, splitting, eval_crit); cross->set_num_runs(100); cross->set_conf_int_alpha(0.05); /* actual evaluation */ CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("cross_validation estimate:\n"); result->print_result(); /* same crude assertion as for above evaluation */ ASSERT(result->mean<2); /* clean up */ SG_UNREF(result); SG_UNREF(cross); SG_UNREF(features); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void test() { SGMatrix<float64_t> data(3, 10); CDenseFeatures<float64_t>* f=new CDenseFeatures<float64_t>(data); SGVector<float64_t>::range_fill_vector(data.matrix, data.num_cols*data.num_rows, 1.0); SGMatrix<float64_t>::display_matrix(data.matrix, data.num_rows, data.num_cols, "original feature data"); index_t offset_subset=1; SGVector<index_t> feature_subset(8); SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); f->add_subset(feature_subset); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGVector<float64_t> vec=f->get_feature_vector(i); SG_SPRINT("%i: ", i); SGVector<float64_t>::display_vector(vec.vector, vec.vlen); f->free_feature_vector(vec, i); } index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); SGVector<index_t>::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); CDenseFeatures<float64_t>* subset_copy= (CDenseFeatures<float64_t>*)f->copy_subset(feature_copy_subset); SGMatrix<float64_t> subset_copy_matrix=subset_copy->get_feature_matrix(); SGMatrix<float64_t>::display_matrix(subset_copy_matrix.matrix, subset_copy_matrix.num_rows, subset_copy_matrix.num_cols, "copy matrix"); index_t num_its=subset_copy_matrix.num_rows*subset_copy_matrix.num_cols; for (index_t i=0; i<num_its; ++i) { index_t idx=i+(offset_copy+offset_subset)*subset_copy_matrix.num_rows; ASSERT(subset_copy_matrix.matrix[i]==data.matrix[idx]); } SG_UNREF(f); SG_UNREF(subset_copy); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SparseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void test() { index_t num_vectors=10; index_t num_features=3; /* create some sparse data */ SGSparseMatrix<float64_t> data=SGSparseMatrix<float64_t>(num_vectors, num_features); for (index_t i=0; i<num_vectors; ++i) { /* put elements only at even indices */ data.sparse_matrix[i]=SGSparseVector<float64_t>(num_features); /* fill */ for (index_t j=0; j<num_features; ++j) { data.sparse_matrix[i].features[j].entry=i+j; data.sparse_matrix[i].features[j].feat_index=3*j; } } CSparseFeatures<float64_t>* f=new CSparseFeatures<float64_t>(data); /* display sparse matrix */ SG_SPRINT("original data\n"); for (index_t i=0; i<num_vectors; ++i) { SG_SPRINT("sparse vector at %i: [", i); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", data.sparse_matrix[i].features[j].entry); SG_SPRINT("]\n"); } /* indices for a subset */ index_t offset_subset=1; SGVector<index_t> feature_subset(8); SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); /* set subset and print data */ f->add_subset(feature_subset); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=f->get_sparse_feature_vector(i); SG_SPRINT("sparse vector at %i: ", i); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", vec.features[j].entry); SG_SPRINT("]\n"); f->free_sparse_feature_vector(i); } /* indices that are to copy */ index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); SGVector<index_t>::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); /* copy a subset of features */ CSparseFeatures<float64_t>* subset_copy= (CSparseFeatures<float64_t>*)f->copy_subset(feature_copy_subset); /* print copied subset */ SG_SPRINT("copied features:\n"); for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i); SG_SPRINT("sparse vector at %i: ", i); for (index_t j=0; j<num_features; ++j) SG_SPRINT("%f, ", vec.features[j].entry); SG_SPRINT("]\n"); subset_copy->free_sparse_feature_vector(i); } /* test if all elements are copied correctly */ for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGSparseVector<float64_t> vec=subset_copy->get_sparse_feature_vector(i); index_t ind=i+offset_copy+offset_subset+1; for (index_t j=0; j<vec.num_feat_entries; ++j) { float64_t a_entry=vec.features[j].entry; float64_t b_entry=data.sparse_matrix[ind].features[j].entry; index_t a_idx=vec.features[j].feat_index; index_t b_idx=data.sparse_matrix[ind].features[j].feat_index; ASSERT(a_entry==b_entry); ASSERT(a_idx==b_idx); } subset_copy->free_sparse_feature_vector(i); } SG_UNREF(f); SG_UNREF(subset_copy); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void test() { index_t num_strings=10; index_t max_string_length=20; index_t min_string_length=max_string_length/2; SGStringList<char> strings(num_strings, max_string_length); SG_SPRINT("original string data:\n"); for (index_t i=0; i<num_strings; ++i) { index_t len=CMath::random(min_string_length, max_string_length); SGString<char> current(len); SG_SPRINT("[%i]: \"", i); /* fill with random uppercase letters (ASCII) */ for (index_t j=0; j<len; ++j) { current.string[j]=(char)CMath::random('A', 'Z'); /* attach \0 to print letter */ char* string=SG_MALLOC(char, 2); string[0]=current.string[j]; string[1]='\0'; SG_SPRINT("%s", string); SG_FREE(string); } SG_SPRINT("\"\n"); strings.strings[i]=current; } /* create num_feautres 2-dimensional vectors */ CStringFeatures<char>* f=new CStringFeatures<char>(strings, ALPHANUM); index_t offset_subset=1; SGVector<index_t> feature_subset(8); SGVector<index_t>::range_fill_vector(feature_subset.vector, feature_subset.vlen, offset_subset); SGVector<index_t>::display_vector(feature_subset.vector, feature_subset.vlen, "feature subset"); f->add_subset(feature_subset); SG_SPRINT("feature vectors after setting subset on original data:\n"); for (index_t i=0; i<f->get_num_vectors(); ++i) { SGVector<char> vec=f->get_feature_vector(i); SG_SPRINT("%i: ", i); for (index_t j=0; j<vec.vlen; ++j) SG_SPRINT("%c", vec.vector[j]); SG_SPRINT("\n"); f->free_feature_vector(vec, i); } index_t offset_copy=2; SGVector<index_t> feature_copy_subset(4); SGVector<index_t>::range_fill_vector(feature_copy_subset.vector, feature_copy_subset.vlen, offset_copy); SGVector<index_t>::display_vector(feature_copy_subset.vector, feature_copy_subset.vlen, "indices that are to be copied"); CStringFeatures<char>* subset_copy=(CStringFeatures<char>*)f->copy_subset( feature_copy_subset); for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGVector<char> vec=subset_copy->get_feature_vector(i); SG_SPRINT("%i: ", i); for (index_t j=0; j<vec.vlen; ++j) SG_SPRINT("%c", vec.vector[j]); SG_SPRINT("\n"); subset_copy->free_feature_vector(vec, i); } for (index_t i=0; i<subset_copy->get_num_vectors(); ++i) { SGVector<char> vec=subset_copy->get_feature_vector(i); for (index_t j=0; j<vec.vlen; ++j) { index_t offset_idx=i+(offset_copy+offset_subset); ASSERT(vec.vector[j]==strings.strings[offset_idx].string[j]); } subset_copy->free_feature_vector(vec, i); } SG_UNREF(f); SG_UNREF(subset_copy); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/CombinedFeatures.h> using namespace shogun; void test_dense_features() { /* create two matrices, feature objects for them, call create_merged_copy, * and check if it worked */ index_t n_1=3; index_t n_2=4; index_t dim=2; SGMatrix<float64_t> data_1(dim,n_1); for (index_t i=0; i<dim*n_1; ++i) data_1.matrix[i]=i; data_1.display_matrix("data_1"); SGMatrix<float64_t> data_2(dim,n_2); for (index_t i=0; i<dim*n_2; ++i) data_2.matrix[i]=CMath::randn_double(); data_1.display_matrix("data_2"); CDenseFeatures<float64_t>* features_1=new CDenseFeatures<float64_t>(data_1); CDenseFeatures<float64_t>* features_2=new CDenseFeatures<float64_t>(data_2); CFeatures* concatenation=features_1->create_merged_copy(features_2); SGMatrix<float64_t> concat_data= ((CDenseFeatures<float64_t>*)concatenation)->get_feature_matrix(); concat_data.display_matrix("concat_data"); /* check for equality with data_1 */ for (index_t i=0; i<dim*n_1; ++i) ASSERT(data_1.matrix[i]==concat_data.matrix[i]); /* check for equality with data_2 */ for (index_t i=0; i<dim*n_2; ++i) ASSERT(data_2.matrix[i]==concat_data.matrix[n_1*dim+i]); SG_UNREF(concatenation); SG_UNREF(features_1); SG_UNREF(features_2); } void test_combined_features() { /* create two matrices, feature objects for them, call create_merged_copy, * and check if it worked */ index_t n_1=3; index_t n_2=4; index_t dim=2; SGMatrix<float64_t> data_1(dim,n_1); for (index_t i=0; i<dim*n_1; ++i) data_1.matrix[i]=i; data_1.display_matrix("data_1"); SGMatrix<float64_t> data_2(dim,n_2); for (index_t i=0; i<dim*n_2; ++i) data_2.matrix[i]=CMath::randn_double(); data_1.display_matrix("data_2"); CCombinedFeatures* features_1=new CCombinedFeatures(); CCombinedFeatures* features_2=new CCombinedFeatures(); features_1->append_feature_obj(new CDenseFeatures<float64_t>(data_1)); features_2->append_feature_obj(new CDenseFeatures<float64_t>(data_2)); CFeatures* concatenation=features_1->create_merged_copy(features_2); CFeatures* sub=((CCombinedFeatures*)concatenation)->get_first_feature_obj(); CDenseFeatures<float64_t>* casted_sub= dynamic_cast<CDenseFeatures<float64_t>*>(sub); ASSERT(casted_sub); SGMatrix<float64_t> concat_data=casted_sub->get_feature_matrix(); SG_UNREF(sub); concat_data.display_matrix("concat_data"); /* check for equality with data_1 */ for (index_t i=0; i<dim*n_1; ++i) ASSERT(data_1.matrix[i]==concat_data.matrix[i]); /* check for equality with data_2 */ for (index_t i=0; i<dim*n_2; ++i) ASSERT(data_2.matrix[i]==concat_data.matrix[n_1*dim+i]); SG_UNREF(concatenation); SG_UNREF(features_1); SG_UNREF(features_2); } int main(int argc, char **argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_dense_features(); test_combined_features(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/mathematics/Math.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const int32_t num_labels=10; const int32_t num_classes=3; void test() { const int32_t num_subset_idx=CMath::random(1, num_labels); /* create labels */ CMulticlassLabels* labels=new CMulticlassLabels(num_labels); for (index_t i=0; i<num_labels; ++i) labels->set_label(i, i%num_classes); SG_REF(labels); /* print labels */ SGVector<float64_t> labels_data=labels->get_labels(); SGVector<float64_t>::display_vector(labels_data.vector, labels_data.vlen, "labels"); /* create subset indices */ SGVector<index_t> subset_idx(CMath::randperm(num_subset_idx), num_subset_idx); /* print subset indices */ SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n\n-------------------\n" "applying subset to features\n" "-------------------\n"); labels->add_subset(subset_idx); /* do some stuff do check and output */ ASSERT(labels->get_num_labels()==num_subset_idx); SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels()); for (index_t i=0; i<labels->get_num_labels(); ++i) { float64_t label=labels->get_label(i); SG_SPRINT("label %f:\n", label); ASSERT(label==labels_data.vector[subset_idx.vector[i]]); } /* remove features subset */SG_SPRINT("\n\n-------------------\n" "removing subset from features\n" "-------------------\n"); labels->remove_all_subsets(); ASSERT(labels->get_num_labels()==num_labels); SG_SPRINT("labels->get_num_labels(): %d\n", labels->get_num_labels()); for (index_t i=0; i<labels->get_num_labels(); ++i) { float64_t label=labels->get_label(i); SG_SPRINT("label %f:\n", label); ASSERT(label==labels_data.vector[i]); } SG_UNREF(labels); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void check_transposed(CDenseFeatures<int32_t>* features) { CDenseFeatures<int32_t>* transposed=features->get_transposed(); CDenseFeatures<int32_t>* double_transposed=transposed->get_transposed(); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> orig_vec=features->get_feature_vector(i); SGVector<int32_t> new_vec=double_transposed->get_feature_vector(i); ASSERT(orig_vec.vlen==new_vec.vlen); for (index_t j=0; j<orig_vec.vlen; j++) ASSERT(orig_vec.vector[j]==new_vec.vector[j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(orig_vec,i); double_transposed->free_feature_vector(new_vec, i); } SG_UNREF(transposed); SG_UNREF(double_transposed); } const int32_t num_vectors=6; const int32_t dim_features=6; void test() { const int32_t num_subset_idx=CMath::random(1, num_vectors); /* create feature data matrix */ SGMatrix<int32_t> data(dim_features, num_vectors); /* fill matrix with random data */ for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<dim_features; ++j) data.matrix[i*dim_features+j]=CMath::random(-5, 5); } /* create simple features */ CDenseFeatures<int32_t>* features=new CDenseFeatures<int32_t> (data); SG_REF(features); /* print feature matrix */ SGMatrix<int32_t>::display_matrix(data.matrix, data.num_rows, data.num_cols, "feature matrix"); /* create subset indices */ SGVector<index_t> subset_idx(CMath::randperm(num_subset_idx), num_subset_idx); /* print subset indices */ SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n\n-------------------\n" "applying subset to features\n" "-------------------\n"); features->add_subset(subset_idx); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_subset_idx); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> vec=features->get_feature_vector(i); SG_SPRINT("vector %d: ", i); SGVector<int32_t>::display_vector(vec.vector, vec.vlen); for (index_t j=0; j<dim_features; ++j) ASSERT(vec.vector[j]==data.matrix[subset_idx.vector[i]*num_vectors+j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(vec, i); } /* remove features subset */ SG_SPRINT("\n\n-------------------\n" "removing subset from features\n" "-------------------\n"); features->remove_all_subsets(); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_vectors); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGVector<int32_t> vec=features->get_feature_vector(i); SG_SPRINT("vector %d: ", i); SGVector<int32_t>::display_vector(vec.vector, vec.vlen); for (index_t j=0; j<dim_features; ++j) ASSERT(vec.vector[j]==data.matrix[i*num_vectors+j]); /* not necessary since feature matrix is in memory. for documentation */ features->free_feature_vector(vec, i); } SG_UNREF(features); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/SparseFeatures.h> #include <shogun/features/Subset.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const int32_t num_vectors=6; const int32_t dim_features=6; void check_transposed(CSparseFeatures<int32_t>* features) { CSparseFeatures<int32_t>* transposed=features->get_transposed(); CSparseFeatures<int32_t>* double_transposed=transposed->get_transposed(); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGSparseVector<int32_t> orig_vec=features->get_sparse_feature_vector(i); SGSparseVector<int32_t> new_vec= double_transposed->get_sparse_feature_vector(i); for (index_t j=0; j<dim_features; j++) ASSERT(orig_vec.features[j].entry==new_vec.features[j].entry); /* not necessary since feature matrix is in memory. for documentation */ features->free_sparse_feature_vector(i); double_transposed->free_sparse_feature_vector(i); } SG_UNREF(transposed); SG_UNREF(double_transposed); } void test() { const int32_t num_subset_idx=CMath::random(1, num_vectors); /* create feature data matrix */ SGMatrix<int32_t> data(dim_features, num_vectors); /* fill matrix with random data */ for (index_t i=0; i<num_vectors*dim_features; ++i) data.matrix[i]=CMath::random(1, 9); /* create sparse features */ CSparseFeatures<int32_t>* features=new CSparseFeatures<int32_t>(data); /* print dense feature matrix */ SGMatrix<int32_t>::display_matrix(data.matrix, data.num_rows, data.num_cols, "dense feature matrix"); /* create subset indices */ SGVector<index_t> subset_idx(CMath::randperm(num_subset_idx), num_subset_idx); /* print subset indices */ SGVector<index_t>::display_vector(subset_idx.vector, subset_idx.vlen, "subset indices"); /* apply subset to features */ SG_SPRINT("\n-------------------\n" "applying subset to features\n" "-------------------\n"); features->add_subset(subset_idx); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_subset_idx); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGSparseVector<int32_t> vec=features->get_sparse_feature_vector(i); SG_SPRINT("sparse_vector[%d]=", i); for (index_t j=0; j<vec.num_feat_entries; ++j) { SG_SPRINT("%d", vec.features[j].entry); if (j<vec.num_feat_entries-1) SG_SPRINT(","); } SG_SPRINT("\n"); for (index_t j=0; j<vec.num_feat_entries; ++j) { int32_t a=vec.features[j].entry; index_t ind=subset_idx.vector[i]*num_vectors+j; int32_t b=data.matrix[ind]; ASSERT(a==b); } features->free_sparse_feature_vector(i); } /* remove features subset */ SG_SPRINT("\n-------------------\n" "removing subset from features\n" "-------------------\n"); features->remove_all_subsets(); /* do some stuff do check and output */ ASSERT(features->get_num_vectors()==num_vectors); SG_SPRINT("features->get_num_vectors(): %d\n", features->get_num_vectors()); /* check get_Transposed method */ SG_SPRINT("checking transpose..."); check_transposed(features); SG_SPRINT("does work\n"); for (index_t i=0; i<features->get_num_vectors(); ++i) { SGSparseVector<int32_t> vec=features->get_sparse_feature_vector(i); SG_SPRINT("sparse_vector[%d]=", i); for (index_t j=0; j<vec.num_feat_entries; ++j) { SG_SPRINT("%d", vec.features[j].entry); if (j<vec.num_feat_entries-1) SG_SPRINT(","); } SG_SPRINT("\n"); for (index_t j=0; j<vec.num_feat_entries; ++j) ASSERT(vec.features[j].entry==data.matrix[i*num_vectors+j]); features->free_sparse_feature_vector(i); } SG_UNREF(features); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/features/SubsetStack.h> using namespace shogun; void test() { CSubsetStack* stack=new CSubsetStack(); SG_REF(stack); /* subset indices, each set is shifted by one */ SGVector<index_t> subset_a(10); SGVector<index_t> subset_b(4); subset_a.range_fill(1); subset_b.range_fill(1); /* add and remove subsets a couple of times */ stack->add_subset(subset_a); stack->remove_subset(); stack->add_subset(subset_b); stack->remove_subset(); /* add and remove subsets a couple of times, different order */ stack->add_subset(subset_a); stack->add_subset(subset_b); stack->remove_subset(); stack->remove_subset(); /** add two subsets and check if index mapping works */ stack->add_subset(subset_a); stack->add_subset(subset_b); /* remember, offset of one for each index set */ for (index_t i=0; i<subset_b.vlen; ++i) ASSERT(stack->subset_idx_conversion(i)==i+2); stack->remove_subset(); stack->remove_subset(); /* clean up */ SG_UNREF(stack); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DataGenerator.h> using namespace shogun; void test_custom_kernel_subsets() { /* create some data */ index_t m=10; CFeatures* features= new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data( m, 2, 1)); SG_REF(features); /* create a custom kernel */ CKernel* k=new CGaussianKernel(); k->init(features, features); CCustomKernel* l=new CCustomKernel(k); /* create a random permutation */ SGVector<index_t> subset(m); for (index_t run=0; run<100; ++run) { subset.range_fill(); subset.permute(); // subset.display_vector("permutation"); features->add_subset(subset); k->init(features, features); l->add_row_subset(subset); l->add_col_subset(subset); // k->get_kernel_matrix().display_matrix("K"); // l->get_kernel_matrix().display_matrix("L"); for (index_t i=0; i<m; ++i) { for (index_t j=0; j<m; ++j) { SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j, l->kernel(i, j)); ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8); } } features->remove_subset(); l->remove_row_subset(); l->remove_col_subset(); } SG_UNREF(k); SG_UNREF(l); SG_UNREF(features); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_custom_kernel_subsets(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/DataGenerator.h> using namespace shogun; void test_custom_kernel_subsets() { /* create some data */ index_t m=10; CFeatures* features= new CDenseFeatures<float64_t>(CDataGenerator::generate_mean_data( m, 2, 1)); SG_REF(features); /* create a custom kernel */ CKernel* k=new CGaussianKernel(); k->init(features, features); CCustomKernel* l=new CCustomKernel(k); /* create a random permutation */ SGVector<index_t> subset(m); for (index_t run=0; run<100; ++run) { subset.range_fill(); subset.permute(); // subset.display_vector("permutation"); features->add_subset(subset); k->init(features, features); l->add_row_subset(subset); l->add_col_subset(subset); // k->get_kernel_matrix().display_matrix("K"); // l->get_kernel_matrix().display_matrix("L"); for (index_t i=0; i<m; ++i) { for (index_t j=0; j<m; ++j) { SG_SDEBUG("K(%d,%d)=%f, L(%d,%d)=%f\n", i, j, k->kernel(i, j), i, j, l->kernel(i, j)); ASSERT(CMath::abs(k->kernel(i, j)-l->kernel(i, j))<10E-8); } } features->remove_subset(); l->remove_row_subset(); l->remove_col_subset(); } SG_UNREF(k); SG_UNREF(l); SG_UNREF(features); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_custom_kernel_subsets(); exit_shogun(); return 0; }
#include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <stdio.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create gaussian kernel with cache 10MB, width 0.5 CGaussianKernel* kernel = new CGaussianKernel(features, features, 10, 0.5); // print kernel matrix for (int32_t i=0; i<3; i++) { for (int32_t j=0; j<3; j++) { SG_SPRINT("%f ", kernel->kernel(i,j)); } SG_SPRINT("\n"); } // free up memory SG_UNREF(kernel); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test() { /* data matrix dimensions */ index_t num_vectors=6; index_t num_features=2; /* data means -1, 1 in all components, small std deviation */ SGVector<float64_t> mean_1(num_features); SGVector<float64_t> mean_2(num_features); SGVector<float64_t>::fill_vector(mean_1.vector, mean_1.vlen, -10.0); SGVector<float64_t>::fill_vector(mean_2.vector, mean_2.vlen, 10.0); float64_t sigma=0.5; SGVector<float64_t>::display_vector(mean_1.vector, mean_1.vlen, "mean 1"); SGVector<float64_t>::display_vector(mean_2.vector, mean_2.vlen, "mean 2"); /* fill data matrix around mean */ SGMatrix<float64_t> train_dat(num_features, num_vectors); for (index_t i=0; i<num_vectors; ++i) { for (index_t j=0; j<num_features; ++j) { float64_t mean=i<num_vectors/2 ? mean_1.vector[0] : mean_2.vector[0]; train_dat.matrix[i*num_features+j]=CMath::normal_random(mean, sigma); } } SGMatrix<float64_t>::display_matrix(train_dat.matrix, train_dat.num_rows, train_dat.num_cols, "training data"); /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels +/- 1 for each cluster */ SGVector<float64_t> lab(num_vectors); for (index_t i=0; i<num_vectors; ++i) lab.vector[i]=i<num_vectors/2 ? -1.0 : 1.0; SGVector<float64_t>::display_vector(lab.vector, lab.vlen, "training labels"); CBinaryLabels* labels=new CBinaryLabels(lab); SG_REF(labels); /* evaluation instance */ CContingencyTableEvaluation* eval=new CContingencyTableEvaluation(ACCURACY); /* kernel */ CKernel* kernel=new CLinearKernel(); kernel->init(features, features); /* create svm via libsvm */ float64_t svm_C=10; float64_t svm_eps=0.0001; CLibSVM* svm=new CLibSVM(svm_C, kernel, labels); svm->set_epsilon(svm_eps); /* now train a few times on different subsets on data and assert that * results are correct (data linear separable) */ svm->data_lock(labels, features); SGVector<index_t> indices(5); indices.vector[0]=1; indices.vector[1]=2; indices.vector[2]=3; indices.vector[3]=4; indices.vector[4]=5; SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices"); svm->train_locked(indices); CBinaryLabels* output=CBinaryLabels::obtain_from_generic(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels)); ASSERT(eval->evaluate(output, labels)==1); SG_UNREF(output); SG_SPRINT("\n\n"); indices=SGVector<index_t>(3); indices.vector[0]=1; indices.vector[1]=2; indices.vector[2]=3; SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices"); output=CBinaryLabels::obtain_from_generic(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels)); ASSERT(eval->evaluate(output, labels)==1); SG_UNREF(output); SG_SPRINT("\n\n"); indices=SGVector<index_t>(4); indices.range_fill(); SGVector<index_t>::display_vector(indices.vector, indices.vlen, "training indices"); svm->train_locked(indices); output=CBinaryLabels::obtain_from_generic(svm->apply()); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "apply() output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_SPRINT("accuracy: %f\n", eval->evaluate(output, labels)); ASSERT(eval->evaluate(output, labels)==1); SG_UNREF(output); SG_SPRINT("normal train\n"); svm->data_unlock(); svm->train(); output=CBinaryLabels::obtain_from_generic(svm->apply()); ASSERT(eval->evaluate(output, labels)==1); SGVector<float64_t>::display_vector(output->get_labels().vector, output->get_num_labels(), "output"); SGVector<float64_t>::display_vector(labels->get_labels().vector, labels->get_labels().vlen, "training labels"); SG_UNREF(output); /* clean up */ SG_UNREF(svm); SG_UNREF(features); SG_UNREF(eval); SG_UNREF(labels); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test(); exit_shogun(); return 0; }
#include <shogun/features/DenseFeatures.h> #include <shogun/kernel/DotKernel.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <stdio.h> using namespace shogun; class CReverseLinearKernel : public CDotKernel { public: /** default constructor */ CReverseLinearKernel() : CDotKernel(0) { } /** destructor */ virtual ~CReverseLinearKernel() { } /** initialize kernel * * @param l features of left-hand side * @param r features of right-hand side * @return if initializing was successful */ virtual bool init(CFeatures* l, CFeatures* r) { CDotKernel::init(l, r); return init_normalizer(); } /** load kernel init_data * * @param src file to load from * @return if loading was successful */ virtual bool load_init(FILE* src) { return false; } /** save kernel init_data * * @param dest file to save to * @return if saving was successful */ virtual bool save_init(FILE* dest) { return false; } /** return what type of kernel we are * * @return kernel type UNKNOWN (as it is not part * officially part of shogun) */ virtual EKernelType get_kernel_type() { return K_UNKNOWN; } /** return the kernel's name * * @return name "Reverse Linear" */ inline virtual const char* get_name() const { return "ReverseLinear"; } protected: /** compute kernel function for features a and b * idx_{a,b} denote the index of the feature vectors * in the corresponding feature object * * @param idx_a index a * @param idx_b index b * @return computed kernel function at indices a,b */ virtual float64_t compute(int32_t idx_a, int32_t idx_b) { int32_t alen, blen; bool afree, bfree; float64_t* avec= ((CDenseFeatures<float64_t>*) lhs)->get_feature_vector(idx_a, alen, afree); float64_t* bvec= ((CDenseFeatures<float64_t>*) rhs)->get_feature_vector(idx_b, blen, bfree); ASSERT(alen==blen); float64_t result=0; for (int32_t i=0; i<alen; i++) result+=avec[i]*bvec[alen-i-1]; ((CDenseFeatures<float64_t>*) lhs)->free_feature_vector(avec, idx_a, afree); ((CDenseFeatures<float64_t>*) rhs)->free_feature_vector(bvec, idx_b, bfree); return result; } }; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,3); for (int32_t i=0; i<6; i++) matrix.matrix[i]=i; // create three 2-dimensional vectors // shogun will now own the matrix created CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(); features->set_feature_matrix(matrix); // create reverse linear kernel CReverseLinearKernel* kernel = new CReverseLinearKernel(); kernel->init(features,features); // print kernel matrix for (int32_t i=0; i<3; i++) { for (int32_t j=0; j<3; j++) SG_SPRINT("%f ", kernel->kernel(i,j)); SG_SPRINT("\n"); } // free up memory SG_UNREF(kernel); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/labels/BinaryLabels.h> using namespace shogun; void test_sigmoid_fitting() { CBinaryLabels* labels=new CBinaryLabels(10); labels->set_values(SGVector<float64_t>(labels->get_num_labels())); for (index_t i=0; i<labels->get_num_labels(); ++i) labels->set_value(i%2==0 ? 1 : -1, i); labels->get_values().display_vector("scores"); labels->scores_to_probabilities(); /* only two probabilities will be the result, repeatedly, * assert against reference implementation */ ASSERT(CMath::abs(labels->get_value(0)-0.8571428439385661)<10E-15); ASSERT(CMath::abs(labels->get_value(1)-0.14285715606143384)<10E-15); SG_UNREF(labels); } int main() { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_sigmoid_fitting(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/CoverTree.h> using namespace shogun; class TEST_COVERTREE_POINT { public: TEST_COVERTREE_POINT(int32_t index, double value) { point_index = index; point_value = value; } inline double distance(const TEST_COVERTREE_POINT& p) const { return CMath::abs(p.point_value-point_value); } inline bool operator==(const TEST_COVERTREE_POINT& p) const { return (p.point_index==point_index); } int point_index; double point_value; }; int main(int argc, char** argv) { init_shogun(); int N = 100; CoverTree<TEST_COVERTREE_POINT> coverTree(N); for (int i=0; i<N; i++) coverTree.insert(TEST_COVERTREE_POINT(i,i*i)); std::vector<TEST_COVERTREE_POINT> neighbors = coverTree.kNearestNeighbors(TEST_COVERTREE_POINT(0,0.0),N-1); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2009 Soeren Sonnenburg * Copyright (C) 2009 Fraunhofer Institute FIRST and Max-Planck-Society */ #include <shogun/io/SGIO.h> #include <shogun/lib/Time.h> #include <shogun/lib/ShogunException.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/DynInt.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_warning(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_error(FILE* target, const char* str) { fprintf(target, "%s", str); } void gen_ints(uint256_t* &a, uint32_t* &b, uint32_t len) { a=SG_MALLOC(uint256_t, len); b=SG_MALLOC(uint32_t, len); CMath::init_random(17); for (uint32_t i=0; i<len; i++) { uint64_t r[4]={(uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random(), (uint64_t) CMath::random() << 32 | CMath::random()}; a[len-i-1]=r; b[len-i-1]=i; } } const int LEN = 5*1024; int main() { init_shogun(&print_message, &print_warning, &print_error); try { uint256_t* a; uint32_t* b; CTime t; t.io->set_loglevel(MSG_DEBUG); SG_SPRINT("gen data.."); t.start(); gen_ints(a,b, LEN); t.cur_time_diff(true); SG_SPRINT("qsort.."); t.start(); CMath::qsort_index(a, b, LEN); t.cur_time_diff(true); SG_SPRINT("\n\n"); for (uint32_t i=0; i<10; i++) { SG_SPRINT("a[%d]=", i); a[i].print_hex(); SG_SPRINT("\n"); } SG_SPRINT("\n\n"); uint64_t val1[4]={1,2,3,4}; uint64_t val2[4]={5,6,7,8}; a[0]=val1; a[1]=val2; a[2]=a[0]; CMath::swap(a[0],a[1]); printf("a[0]==a[1] %d\n", (int) (a[0] == a[1])); printf("a[0]<a[1] %d\n", (int) (a[0] < a[1])); printf("a[0]<=a[1] %d\n", (int) (a[0] <= a[1])); printf("a[0]>a[1] %d\n", (int) (a[0] > a[1])); printf("a[0]>=a[1] %d\n", (int) (a[0] >= a[1])); printf("a[0]==a[0] %d\n", (int) (a[0] == a[0])); printf("a[0]<a[0] %d\n", (int) (a[0] < a[0])); printf("a[0]<=a[0] %d\n", (int) (a[0] <= a[0])); printf("a[0]>a[0] %d\n", (int) (a[0] > a[0])); printf("a[0]>=a[0] %d\n", (int) (a[0] >= a[0])); SG_SPRINT("\n\n"); for (uint32_t i=0; i<10 ; i++) { SG_SPRINT("a[%d]=", i); a[i].print_hex(); printf("\n"); } SG_FREE(a); SG_FREE(b); } catch(ShogunException & sh) { SG_SPRINT("%s",sh.get_exception_string()); } exit_shogun(); return 0; }
#include <shogun/lib/FibonacciHeap.h> #include <stdio.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); double v[8] = {0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7}; int k[8] = {0,1,2,3,4,5,6,7}; CFibonacciHeap* heap = new CFibonacciHeap(8); for (int i=0; i<8; i++) heap->insert(k[i],v[i]); int k_extract; double v_extract; for (int i=0; i<8; i++) { k_extract = heap->extract_min(v_extract); if (v[k_extract]!=v_extract) { printf("Fibonacci heap goes wrong.\n"); } } delete heap; exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/GCArray.h> #include <shogun/kernel/Kernel.h> #include <shogun/kernel/GaussianKernel.h> #include <stdio.h> using namespace shogun; const int l=10; int main(int argc, char** argv) { init_shogun(); // create array of kernels CGCArray<CKernel*> kernels(l); // fill array with kernels for (int i=0; i<l; i++) kernels.set(new CGaussianKernel(10, 1.0), i); // print kernels for (int i=0; i<l; i++) { CKernel* kernel = kernels.get(i); printf("kernels[%d]=%p\n", i, kernel); SG_UNREF(kernel); } exit_shogun(); return 0; }
#include <shogun/lib/Hash.h> #include <stdio.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); uint8_t array[4]={0,1,2,3}; printf("hash(0)=%0x\n", CHash::MurmurHash3(&array[0], 1, 0xDEADBEAF)); printf("hash(1)=%0x\n", CHash::MurmurHash3(&array[1], 1, 0xDEADBEAF)); printf("hash(2)=%0x\n", CHash::MurmurHash3(&array[0], 2, 0xDEADBEAF)); printf("hash(3)=%0x\n", CHash::MurmurHash3(&array[0], 4, 0xDEADBEAF)); uint32_t h = 0xDEADBEAF; uint32_t carry = 0; CHash::IncrementalMurmurHash3(&h, &carry, &array[0], 1); printf("inc_hash(0)=%0x\n", h); CHash::IncrementalMurmurHash3(&h, &carry, &array[1], 1); printf("inc_hash(1)=%0x\n", h); CHash::IncrementalMurmurHash3(&h, &carry, &array[2], 1); printf("inc_hash(2)=%0x\n", h); CHash::IncrementalMurmurHash3(&h, &carry, &array[3], 1); printf("inc_hash(3)=%0x\n", h); h = CHash::FinalizeIncrementalMurmurHash3(h, carry, 4); printf("Final inc_hash(3)=%0x\n", h); exit_shogun(); return 0; }
#include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/lib/memory.h> #include <shogun/lib/IndirectObject.h> #include <shogun/mathematics/Math.h> #include <shogun/base/SGObject.h> #include <stdio.h> using namespace shogun; const int l=10; int main(int argc, char** argv) { init_shogun(); // create array a int32_t* a=SG_MALLOC(int32_t, l); for (int i=0; i<l; i++) a[i]=l-i; typedef CIndirectObject<int32_t, int32_t**> INDIRECT; // create array of indirect objects pointing to array a INDIRECT::set_array(&a); INDIRECT* x = SG_MALLOC(INDIRECT, l); INDIRECT::init_slice(x, l); printf("created array a and indirect object array x pointing to a.\n\n"); for (int i=0; i<l; i++) printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i])); //sort the array CMath::qsort(x, l); printf("\n\nvoila! sorted indirect object array x, keeping a const.\n\n"); for (int i=0; i<l; i++) printf("a[%d]=%d x[%d]=%d\n", i, a[i], i, int32_t(x[i])); SG_FREE(x); SG_FREE(a); exit_shogun(); return 0; }
#include <shogun/lib/Map.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> using namespace shogun; #define SIZE 6 void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message, &print_message, &print_message); const char* v[SIZE] = {"Russia", "England", "Germany", "USA", "France", "Spain"}; CMap<int32_t, const char*>* map = new CMap<int32_t, const char*>(SIZE/2, SIZE/2); for (int i=0; i<SIZE; i++) map->add(i, v[i]); map->remove(0); //SG_SPRINT("Num of elements: %d\n", map->get_num_elements()); for (int i=0; i<SIZE; i++) { if (map->contains(i)); //SG_SPRINT("key %d contains in map with index %d and data=%s\n", // i, map->index_of(i), map->get_element(i)); } SG_UNREF(map); exit_shogun(); return 0; }
#include <shogun/lib/Set.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> using namespace shogun; #define SIZE 8 void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message, &print_message, &print_message); double v[SIZE] = {0.0,0.1,0.2,0.2,0.3,0.4,0.5,0.5}; CSet<double>* set = new CSet<double>(SIZE/2, SIZE/2); for (int i=0; i<SIZE; i++) set->add(v[i]); set->remove(0.2); //SG_SPRINT("Num of elements: %d\n", set->get_num_elements()); for (int i=0; i<SIZE; i++) { if (set->contains(v[i])); //SG_SPRINT("%lg contains in set with index %d\n", v[i], set->index_of(v[i])); } SG_UNREF(set); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/mathematics/arpack.h> using namespace shogun; int main(int argc, char** argv) { init_shogun(); #ifdef HAVE_ARPACK int N = 100; int nev = 2; double* double_matrix = new double[N*N]; double* rhs_double_diag = new double[N]; double* double_eigenvalues = new double[nev]; double* double_eigenvectors = new double[nev*N]; for (int i=0; i<N; i++) { rhs_double_diag[i] = 1.0; for (int j=0; j<N; j++) { double_matrix[i*N+j] = i*i+j*j; } } int status = 0; arpack_dsxupd(double_matrix, NULL, false, N, 2, "LM", false, 1, false, false, 0.0, 0.0, double_eigenvalues, double_eigenvectors, status); if (status!=0) return -1; arpack_dsxupd(double_matrix, NULL, false, N, 2, "BE", false, 3, false, false, 1.0, 0.0, double_eigenvalues, double_eigenvectors, status); if (status!=0) return -1; arpack_dsxupd(double_matrix, rhs_double_diag, true, N, 2, "SM", false, 3, false, false, 0.0, 0.0, double_eigenvalues, double_eigenvectors, status); if (status!=0) return -1; delete[] double_eigenvalues; delete[] double_eigenvectors; delete[] double_matrix; delete[] rhs_double_diag; #endif // HAVE_ARPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/mathematics/Statistics.h> #include <shogun/mathematics/Math.h> using namespace shogun; void test() { SGVector<float64_t> data(10); SGVector<float64_t>::range_fill_vector(data.vector, data.vlen, 1.0); float64_t low, up, mean; float64_t error_prob=0.05; mean=CStatistics::confidence_intervals_mean(data, error_prob, low, up); SG_SPRINT("sample mean: %f. True mean lies in [%f,%f] with %f%%\n", mean, low, up, 100*(1-error_prob)); } int main(int argc, char **argv) { init_shogun_with_defaults(); test(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Sergey Lisitsyn * Written (W) 2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/lib/SGMatrix.h> #include <shogun/mathematics/Math.h> #include <shogun/mathematics/lapack.h> using namespace shogun; bool is_equal(float64_t a, float64_t b, float64_t eps) { return CMath::abs(a-b)<=eps; } void test_ev() { SGMatrix<float64_t> A(3,3); A(0,0)=0; A(0,1)=1; A(0,2)=0; A(1,0)=1; A(1,1)=0; A(1,2)=1; A(1,0)=0; A(2,1)=1; A(2,2)=0; SGVector<float64_t> ev=SGMatrix<float64_t>::compute_eigenvectors(A); SGMatrix<float64_t>::display_matrix(A.matrix, A.num_rows, A.num_cols, "A"); SGVector<float64_t>::display_vector(ev.vector, ev.vlen, "eigenvalues"); float64_t sqrt22=CMath::sqrt(2.0)/2.0; float64_t eps=10E-16; /* check for correct eigenvectors */ ASSERT(is_equal(A(0,0), 0.5, eps)); ASSERT(is_equal(A(0,1), -sqrt22, eps)); ASSERT(is_equal(A(0,2), 0.5, eps)); ASSERT(is_equal(A(1,0), -sqrt22, eps)); ASSERT(is_equal(A(1,1), 0, eps)); ASSERT(is_equal(A(1,2), sqrt22, eps)); ASSERT(is_equal(A(2,0), 0.5, eps)); ASSERT(is_equal(A(2,1), sqrt22, eps)); ASSERT(is_equal(A(2,2), 0.5, eps)); /* check for correct eigenvalues */ ASSERT(is_equal(ev[0], -sqrt22*2, eps)); ASSERT(is_equal(ev[1], 0, eps)); ASSERT(is_equal(ev[2], sqrt22*2, eps)); } void test_matrix_multiply() { index_t n=10; SGMatrix<float64_t> I=SGMatrix<float64_t>::create_identity_matrix(n,1.0); index_t m=4; SGMatrix<float64_t> A(n, m); SGVector<float64_t>::range_fill_vector(A.matrix, m*n); SGMatrix<float64_t>::display_matrix(I, "I"); SGMatrix<float64_t>::transpose_matrix(A.matrix, A.num_rows, A.num_cols); SGMatrix<float64_t>::display_matrix(A, "A transposed"); SGMatrix<float64_t>::transpose_matrix(A.matrix, A.num_rows, A.num_cols); SGMatrix<float64_t>::display_matrix(A, "A"); SG_SPRINT("multiply A by I and check result\n"); SGMatrix<float64_t> A2=SGMatrix<float64_t>::matrix_multiply(I, A); ASSERT(A2.num_rows==A.num_rows); ASSERT(A2.num_cols==A.num_cols); SGMatrix<float64_t>::display_matrix(A2); for (index_t i=0; i<A2.num_rows; ++i) { for (index_t j=0; j<A2.num_cols; ++j) ASSERT(A(i,j)==A2(i,j)); } SG_SPRINT("multiply A by transposed I and check result\n"); SGMatrix<float64_t> A3=SGMatrix<float64_t>::matrix_multiply(I, A, true); ASSERT(A3.num_rows==I.num_rows); ASSERT(A3.num_cols==A.num_cols); SGMatrix<float64_t>::display_matrix(A3); for (index_t i=0; i<A2.num_rows; ++i) { for (index_t j=0; j<A2.num_cols; ++j) ASSERT(A(i,j)==A3(i,j)); } SG_SPRINT("multiply transposed A by I and check result\n"); SGMatrix<float64_t> A4=SGMatrix<float64_t>::matrix_multiply(A, I, true, false); ASSERT(A4.num_rows==A.num_cols); ASSERT(A4.num_cols==I.num_cols); SGMatrix<float64_t>::display_matrix(A4); for (index_t i=0; i<A.num_rows; ++i) { for (index_t j=0; j<A.num_cols; ++j) ASSERT(A(i,j)==A4(j,i)); } SG_SPRINT("multiply A by scaled I and check result\n"); SGMatrix<float64_t> A5=SGMatrix<float64_t>::matrix_multiply(I, A, false, false, n); ASSERT(A5.num_rows==I.num_rows); ASSERT(A5.num_cols==A.num_cols); SGMatrix<float64_t>::display_matrix(A5); for (index_t i=0; i<A2.num_rows; ++i) { for (index_t j=0; j<A2.num_cols; ++j) ASSERT(n*A(i,j)==A5(i,j)); } } void test_lapack() { // size of square matrix int N = 100; // square matrix double* double_matrix = new double[N*N]; // for storing eigenpairs double* double_eigenvalues = new double[N]; double* double_eigenvectors = new double[N*N]; // for SVD double* double_U = new double[N*N]; double* double_s = new double[N]; double* double_Vt = new double[N*N]; // status (should be zero) int status; // DSYGVX for (int i=0; i<N; i++) { for (int j=0; j<N; j++) double_matrix[i*N+j] = ((double)(i-j))/(i+j+1); double_matrix[i*N+i] += 100; } status = 0; wrap_dsygvx(1,'V','U',N,double_matrix,N,double_matrix,N,1,3,double_eigenvalues,double_eigenvectors,&status); if (status!=0) SG_SERROR("DSYGVX/SSYGVX failed with code %d\n",status); delete[] double_eigenvectors; // DGEQRF+DORGQR status = 0; double* double_tau = new double[N]; wrap_dgeqrf(N,N,double_matrix,N,double_tau,&status); wrap_dorgqr(N,N,N,double_matrix,N,double_tau,&status); if (status!=0) SG_SERROR("DGEQRF/DORGQR failed with code %d\n",status); delete[] double_tau; // DGESVD for (int i=0; i<N; i++) { for (int j=0; j<N; j++) double_matrix[i*N+j] = i*i+j*j; } status = 0; wrap_dgesvd('A','A',N,N,double_matrix,N,double_s,double_U,N,double_Vt,N,&status); if (status!=0) SG_SERROR("DGESVD failed with code %d\n",status); delete[] double_s; delete[] double_U; delete[] double_Vt; // DSYEV status = 0; wrap_dsyev('V','U',N,double_matrix,N,double_eigenvalues,&status); if (status!=0) SG_SERROR("DSYEV failed with code %d\n",status); delete[] double_eigenvalues; delete[] double_matrix; } int main(int argc, char** argv) { init_shogun_with_defaults(); #ifdef HAVE_LAPACK SG_SPRINT("checking lapack\n"); test_lapack(); SG_SPRINT("compute_eigenvectors\n"); test_ev(); SG_SPRINT("matrix_multiply\n"); test_matrix_multiply(); #endif exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C1"); root->append_child(c); c->build_values(1.0, 2.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("width"); param_gaussian_kernel_width->build_values(1.0, 2.0, R_EXP); param_gaussian_kernel->append_child(param_gaussian_kernel_width); return root; } void apply_parameter_tree(CDynamicObjectArray* combinations) { /* create some data */ SGMatrix<float64_t> matrix(2,3); for (index_t i=0; i<6; i++) matrix.matrix[i]=i; /* create three 2-dimensional vectors * to avoid deleting these, REF now and UNREF when finished */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); SG_REF(features); /* create three labels, will be handed to svm and automaticall deleted */ CBinaryLabels* labels=new CBinaryLabels(3); SG_REF(labels); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); /* create libsvm with C=10 and train */ CLibSVM* svm=new CLibSVM(); SG_REF(svm); svm->set_labels(labels); for (index_t i=0; i<combinations->get_num_elements(); ++i) { SG_SPRINT("applying:\n"); CParameterCombination* current_combination=(CParameterCombination*) combinations->get_element(i); current_combination->print_tree(); Parameter* current_parameters=svm->m_parameters; current_combination->apply_to_modsel_parameter(current_parameters); SG_UNREF(current_combination); /* get kernel to set features, get_kernel SG_REF's the kernel */ CKernel* kernel=svm->get_kernel(); kernel->init(features, features); svm->train(); /* classify on training examples */ for (index_t i=0; i<3; i++) SG_SPRINT("output[%d]=%f\n", i, svm->apply_one(i)); /* unset features and SG_UNREF kernel */ kernel->cleanup(); SG_UNREF(kernel); SG_SPRINT("----------------\n\n"); } /* free up memory */ SG_UNREF(features); SG_UNREF(labels); SG_UNREF(svm); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* create example tree */ CModelSelectionParameters* tree=create_param_tree(); tree->print_tree(); SG_SPRINT("----------------------------------\n"); /* build combinations of parameter trees */ CDynamicObjectArray* combinations=tree->get_combinations(); apply_parameter_tree(combinations); /* print and directly delete them all */ for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=(CParameterCombination*) combinations->get_element(i); SG_UNREF(combination); } SG_UNREF(combinations); /* delete example tree (after processing of combinations because CSGObject * (namely the kernel) of the tree is SG_UNREF'ed (and not REF'ed anywhere * else) */ SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-1.0, 1.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); CModelSelectionParameters* gaussian_kernel_width= new CModelSelectionParameters("width"); gaussian_kernel_width->build_values(-1.0, 1.0, R_EXP, 1.0, 2.0); param_gaussian_kernel->append_child(gaussian_kernel_width); root->append_child(param_gaussian_kernel); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1.0, 2.0, R_LINEAR); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ m_metric->print_modsel_params(); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1.0, 2.0, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ SGMatrix<float64_t> matrix(dim_vectors, num_vectors); CBinaryLabels* labels=new CBinaryLabels(num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix.matrix[i]=CMath::randn_double(); /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm */ CLibSVM* classifier=new CLibSVM(); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(1); /* note that this automatically is not necessary since done automatically */ cross->set_autolock(true); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* now again but unlocked */ SG_UNREF(best_combination); cross->set_autolock(true); best_combination=grid_search->select_model(print_state); best_combination->apply_to_machine(classifier); SG_UNREF(result); result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result (unlocked): "); /* clean up destroy result parameter */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/Labels.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PolyKernel.h> #include <shogun/regression/KernelRidgeRegression.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/CrossValidationSplitting.h> #include <shogun/evaluation/MeanSquaredError.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ParameterCombination.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* tau=new CModelSelectionParameters("tau"); root->append_child(tau); tau->build_values(-1.0, 1.0, R_EXP); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); CModelSelectionParameters* gaussian_kernel_width= new CModelSelectionParameters("width"); gaussian_kernel_width->build_values(5.0, 8.0, R_EXP, 1.0, 2.0); param_gaussian_kernel->append_child(gaussian_kernel_width); root->append_child(param_gaussian_kernel); CPolyKernel* poly_kernel=new CPolyKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ poly_kernel->print_modsel_params(); CModelSelectionParameters* param_poly_kernel= new CModelSelectionParameters("kernel", poly_kernel); root->append_child(param_poly_kernel); CModelSelectionParameters* param_poly_kernel_degree= new CModelSelectionParameters("degree"); param_poly_kernel_degree->build_values(2, 3, R_LINEAR); param_poly_kernel->append_child(param_poly_kernel_degree); return root; } void test_cross_validation() { /* data matrix dimensions */ index_t num_vectors=30; index_t num_features=1; /* training label data */ SGVector<float64_t> lab(num_vectors); /* fill data matrix and labels */ SGMatrix<float64_t> train_dat(num_features, num_vectors); CMath::range_fill_vector(train_dat.matrix, num_vectors); for (index_t i=0; i<num_vectors; ++i) { /* labels are linear plus noise */ lab.vector[i]=i+CMath::normal_random(0, 1.0); } /* training features */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(train_dat); SG_REF(features); /* training labels */ CLabels* labels=new CLabels(lab); /* kernel ridge regression, only set labels for now, rest does not matter */ CKernelRidgeRegression* krr=new CKernelRidgeRegression(0, NULL, labels); /* evaluation criterion */ CMeanSquaredError* eval_crit= new CMeanSquaredError(); /* splitting strategy */ index_t n_folds=5; CCrossValidationSplitting* splitting= new CCrossValidationSplitting(labels, n_folds); /* cross validation instance, 10 runs, 95% confidence interval */ CCrossValidation* cross=new CCrossValidation(krr, features, labels, splitting, eval_crit); cross->set_num_runs(3); cross->set_conf_int_alpha(0.05); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ krr->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); /* print current combination */ bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(krr); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* clean up */ SG_UNREF(features); SG_UNREF(best_combination); SG_UNREF(result); SG_UNREF(grid_search); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_cross_validation(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/lib/config.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibLinear.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-2.0, 2.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-2.0, 2.0, R_EXP); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); #ifdef HAVE_LAPACK int32_t num_subsets=5; int32_t num_vectors=11; /* create some data */ SGMatrix<float64_t> matrix(2, num_vectors); for (int32_t i=0; i<num_vectors*2; i++) matrix.matrix[i]=i; /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create three labels */ CBinaryLabels* labels=new CBinaryLabels(num_vectors); for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create linear classifier (use -s 2 option to avoid warnings) */ CLibLinear* classifier=new CLibLinear(L2R_L2LOSS_SVC); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); /* set autolocking to false to get rid of warnings */ cross->set_autolock(false); CParameterCombination* best_combination=grid_search->select_model(); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/features/Labels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/mkl/MKLClassification.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(-1.0, 1.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(-1.0, 1.0, R_EXP); CCombinedKernel* kernel1=new CCombinedKernel(); kernel1->append_kernel(new CGaussianKernel(10, 2)); kernel1->append_kernel(new CGaussianKernel(10, 3)); kernel1->append_kernel(new CGaussianKernel(10, 4)); CModelSelectionParameters* param_kernel1= new CModelSelectionParameters("kernel", kernel1); root->append_child(param_kernel1); CCombinedKernel* kernel2=new CCombinedKernel(); kernel2->append_kernel(new CGaussianKernel(10, 20)); kernel2->append_kernel(new CGaussianKernel(10, 30)); kernel2->append_kernel(new CGaussianKernel(10, 40)); CModelSelectionParameters* param_kernel2= new CModelSelectionParameters("kernel", kernel2); root->append_child(param_kernel2); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); sg_io->set_loglevel(MSG_INFO); int32_t num_subsets=3; int32_t num_vectors=20; int32_t dim_vectors=3; /* create some data and labels */ float64_t* matrix=SG_MALLOC(float64_t, num_vectors*dim_vectors); CLabels* labels=new CLabels(num_vectors); for (int32_t i=0; i<num_vectors*dim_vectors; i++) matrix[i]=CMath::randn_double(); /* create num_feautres 2-dimensional vectors */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix, dim_vectors, num_vectors); /* create combined features */ CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* works */ // /* create svm */ // CMKLClassification* classifier=new CMKLClassification(new CLibSVM()); // classifier->set_interleaved_optimization_enabled(false); /* create svm */ CMKLClassification* classifier=new CMKLClassification(); // both fail: //classifier->set_interleaved_optimization_enabled(false); classifier->set_interleaved_optimization_enabled(true); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, comb_features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(1); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_ERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result.print_result(); /* clean up destroy result parameter */ SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidation.h> #include <shogun/evaluation/ContingencyTableEvaluation.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/modelselection/GridSearchModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/labels/BinaryLabels.h> #include <shogun/features/StringFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* create_param_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1=new CModelSelectionParameters("C1"); root->append_child(c1); c1->build_values(1.0, 2.0, R_EXP); CModelSelectionParameters* c2=new CModelSelectionParameters("C2"); root->append_child(c2); c2->build_values(1.0, 2.0, R_EXP); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ ds_kernel->print_modsel_params(); CModelSelectionParameters* param_ds_kernel= new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* ds_kernel_delta= new CModelSelectionParameters("delta"); ds_kernel_delta->build_values(1, 2, R_LINEAR); param_ds_kernel->append_child(ds_kernel_delta); CModelSelectionParameters* ds_kernel_theta= new CModelSelectionParameters("theta"); ds_kernel_theta->build_values(1, 2, R_LINEAR); param_ds_kernel->append_child(ds_kernel_theta); return root; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_strings=10; index_t max_string_length=20; index_t min_string_length=max_string_length/2; index_t num_subsets=num_strings/3; SGStringList<char> strings(num_strings, max_string_length); for (index_t i=0; i<num_strings; ++i) { index_t len=CMath::random(min_string_length, max_string_length); SGString<char> current(len); SG_SPRINT("string %i: \"", i); /* fill with random uppercase letters (ASCII) */ for (index_t j=0; j<len; ++j) { current.string[j]=(char)CMath::random('A', 'Z'); char* string=new char[2]; string[0]=current.string[j]; string[1]='\0'; SG_SPRINT("%s", string); delete[] string; } SG_SPRINT("\"\n"); strings.strings[i]=current; } /* create num_feautres 2-dimensional vectors */ CStringFeatures<char>* features=new CStringFeatures<char>(strings, ALPHANUM); /* create labels, two classes */ CBinaryLabels* labels=new CBinaryLabels(num_strings); for (index_t i=0; i<num_strings; ++i) labels->set_label(i, i%2==0 ? 1 : -1); /* create svm classifier */ CLibSVM* classifier=new CLibSVM(); /* splitting strategy */ CStratifiedCrossValidationSplitting* splitting_strategy= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* accuracy evaluation */ CContingencyTableEvaluation* evaluation_criterium= new CContingencyTableEvaluation(ACCURACY); /* cross validation class for evaluation in model selection */ CCrossValidation* cross=new CCrossValidation(classifier, features, labels, splitting_strategy, evaluation_criterium); cross->set_num_runs(2); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ classifier->print_modsel_params(); /* model parameter selection, deletion is handled by modsel class (SG_UNREF) */ CModelSelectionParameters* param_tree=create_param_tree(); param_tree->print_tree(); /* handles all of the above structures in memory */ CGridSearchModelSelection* grid_search=new CGridSearchModelSelection( param_tree, cross); bool print_state=true; CParameterCombination* best_combination=grid_search->select_model( print_state); SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(classifier); /* larger number of runs to have tighter confidence intervals */ cross->set_num_runs(10); cross->set_conf_int_alpha(0.01); classifier->data_lock(labels, features); CCrossValidationResult* result=(CCrossValidationResult*)cross->evaluate(); if (result->get_result_type() != CROSSVALIDATION_RESULT) SG_SERROR("Evaluation result is not of type CCrossValidationResult!"); SG_SPRINT("result: "); result->print_result(); /* clean up */ SG_UNREF(result); SG_UNREF(best_combination); SG_UNREF(grid_search); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } CModelSelectionParameters* build_complex_example_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1.0, 1.0, R_EXP); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1.0, 1.0, R_EXP); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); CModelSelectionParameters* param_power_kernel_metric1= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child(param_power_kernel_metric1); CModelSelectionParameters* param_power_kernel_metric1_k= new CModelSelectionParameters("k"); param_power_kernel_metric1_k->build_values(1.0, 12.0, R_LINEAR); param_power_kernel_metric1->append_child(param_power_kernel_metric1_k); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("width"); param_gaussian_kernel_width->build_values(1.0, 2.0, R_EXP); param_gaussian_kernel->append_child(param_gaussian_kernel_width); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ ds_kernel->print_modsel_params(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters("kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* param_ds_kernel_delta= new CModelSelectionParameters("delta"); param_ds_kernel_delta->build_values(1.0, 2.0, R_EXP); param_ds_kernel->append_child(param_ds_kernel_delta); CModelSelectionParameters* param_ds_kernel_theta= new CModelSelectionParameters("theta"); param_ds_kernel_theta->build_values(1.0, 2.0, R_EXP); param_ds_kernel->append_child(param_ds_kernel_theta); return root; } CModelSelectionParameters* build_sgobject_no_childs_tree() { CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); return param_power_kernel; } CModelSelectionParameters* build_leaf_node_tree() { CModelSelectionParameters* c_1=new CModelSelectionParameters("C1"); c_1->build_values(1.0, 1.0, R_EXP); return c_1; } CModelSelectionParameters* build_root_no_childs_tree() { return new CModelSelectionParameters(); } CModelSelectionParameters* build_root_value_childs_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c_1=new CModelSelectionParameters("C1"); root->append_child(c_1); c_1->build_values(1.0, 1.0, R_EXP); CModelSelectionParameters* c_2=new CModelSelectionParameters("C2"); root->append_child(c_2); c_2->build_values(1.0, 1.0, R_EXP); return root; } CModelSelectionParameters* build_root_sg_object_child_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); root->append_child(param_power_kernel); return root; } CModelSelectionParameters* build_root_sg_object_child_value_child_tree() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel= new CModelSelectionParameters("kernel", power_kernel); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1.0, 1.0, R_EXP); root->append_child(param_power_kernel); return root; } void test_get_combinations(CModelSelectionParameters* tree) { tree->print_tree(); /* build combinations of parameter trees */ CDynamicObjectArray* combinations=tree->get_combinations(); /* print and directly delete them all */ SG_SPRINT("----------------------------------\n"); for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination=(CParameterCombination*) combinations->get_element(i); combination->print_tree(); SG_UNREF(combination); } SG_UNREF(combinations); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); CModelSelectionParameters* tree; tree=build_root_no_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_leaf_node_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_sgobject_no_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_value_childs_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_sg_object_child_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_root_sg_object_child_value_child_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); tree=build_complex_example_tree(); SG_REF(tree); test_get_combinations(tree); SG_UNREF(tree); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/lib/DynamicObjectArray.h> #include <stdlib.h> using namespace std; using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void test_parameter_set_multiplication() { SG_SPRINT("\ntest_parameter_set_multiplication()\n"); DynArray<Parameter*> set1; DynArray<Parameter*> set2; SGVector<float64_t> param_vector(8); SGVector<float64_t>::range_fill_vector(param_vector.vector, param_vector.vlen); Parameter parameters[4]; parameters[0].add(¶m_vector.vector[0], "0"); parameters[0].add(¶m_vector.vector[1], "1"); set1.append_element(¶meters[0]); parameters[1].add(¶m_vector.vector[2], "2"); parameters[1].add(¶m_vector.vector[3], "3"); set1.append_element(¶meters[1]); parameters[2].add(¶m_vector.vector[4], "4"); parameters[2].add(¶m_vector.vector[5], "5"); set2.append_element(¶meters[2]); parameters[3].add(¶m_vector.vector[6], "6"); parameters[3].add(¶m_vector.vector[7], "7"); set2.append_element(¶meters[3]); DynArray<Parameter*>* result=new DynArray<Parameter*>();//CParameterCombination::parameter_set_multiplication(set1, set2); for (index_t i=0; i<result->get_num_elements(); ++i) { Parameter* p=result->get_element(i); for (index_t j=0; j<p->get_num_parameters(); ++j) SG_SPRINT("%s ", p->get_parameter(j)->m_name); SG_SPRINT("\n"); delete p; } delete result; } void test_leaf_sets_multiplication() { SG_SPRINT("\ntest_leaf_sets_multiplication()\n"); SGVector<float64_t> param_vector(6); SGVector<float64_t>::range_fill_vector(param_vector.vector, param_vector.vlen); CDynamicObjectArray sets; CParameterCombination* new_root=new CParameterCombination(); SG_REF(new_root); CDynamicObjectArray* current=new CDynamicObjectArray(); sets.append_element(current); Parameter* p=new Parameter(); p->add(¶m_vector.vector[0], "0"); CParameterCombination* pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[1], "1"); pc=new CParameterCombination(p); current->append_element(pc); /* first case: one element */ CDynamicObjectArray* result_simple= CParameterCombination::leaf_sets_multiplication(sets, new_root); SG_SPRINT("one set\n"); for (index_t i=0; i<result_simple->get_num_elements(); ++i) { CParameterCombination* current=(CParameterCombination*) result_simple->get_element(i); current->print_tree(); SG_UNREF(current); } SG_UNREF(result_simple); /* now more elements are created */ current=new CDynamicObjectArray(); sets.append_element(current); p=new Parameter(); p->add(¶m_vector.vector[2], "2"); pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[3], "3"); pc=new CParameterCombination(p); current->append_element(pc); current=new CDynamicObjectArray(); sets.append_element(current); p=new Parameter(); p->add(¶m_vector.vector[4], "4"); pc=new CParameterCombination(p); current->append_element(pc); p=new Parameter(); p->add(¶m_vector.vector[5], "5"); pc=new CParameterCombination(p); current->append_element(pc); /* second case: more element */ CDynamicObjectArray* result_complex= CParameterCombination::leaf_sets_multiplication(sets, new_root); SG_SPRINT("more sets\n"); for (index_t i=0; i<result_complex->get_num_elements(); ++i) { CParameterCombination* current=(CParameterCombination*) result_complex->get_element(i); current->print_tree(); SG_UNREF(current); } SG_UNREF(result_complex); SG_UNREF(new_root); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_parameter_set_multiplication(); test_leaf_sets_multiplication(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011-2012 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/distance/MinkowskiMetric.h> #include <shogun/distance/EuclideanDistance.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/regression/gp/ExactInferenceMethod.h> #include <shogun/regression/gp/GaussianLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> using namespace shogun; void test_tree(CModelSelectionParameters* tree) { SG_SPRINT("\n\ntree to process:\n"); tree->print_tree(); /* build combinations of parameter trees */ CDynamicObjectArray* combinations=tree->get_combinations(); /* print and directly delete them all */ SG_SPRINT("----------------------------------\n"); for (index_t i=0; i<combinations->get_num_elements(); ++i) { CParameterCombination* combination= (CParameterCombination*)combinations->get_element(i); combination->print_tree(); SG_UNREF(combination); } SG_UNREF(combinations); } CModelSelectionParameters* create_param_tree_1() { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c=new CModelSelectionParameters("C"); root->append_child(c); c->build_values(1, 2, R_EXP); CPowerKernel* power_kernel=new CPowerKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ power_kernel->print_modsel_params(); CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters( "kernel", power_kernel); root->append_child(param_power_kernel); CModelSelectionParameters* param_power_kernel_degree= new CModelSelectionParameters("degree"); param_power_kernel_degree->build_values(1, 2, R_EXP); param_power_kernel->append_child(param_power_kernel_degree); CMinkowskiMetric* m_metric=new CMinkowskiMetric(10); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ m_metric->print_modsel_params(); CModelSelectionParameters* param_power_kernel_metrikernel_width_sigma_param= new CModelSelectionParameters("distance", m_metric); param_power_kernel->append_child( param_power_kernel_metrikernel_width_sigma_param); CModelSelectionParameters* param_power_kernel_metrikernel_width_sigma_param_k= new CModelSelectionParameters("k"); param_power_kernel_metrikernel_width_sigma_param_k->build_values(1, 2, R_LINEAR); param_power_kernel_metrikernel_width_sigma_param->append_child( param_power_kernel_metrikernel_width_sigma_param_k); CGaussianKernel* gaussian_kernel=new CGaussianKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ gaussian_kernel->print_modsel_params(); CModelSelectionParameters* param_gaussian_kernel= new CModelSelectionParameters("kernel", gaussian_kernel); root->append_child(param_gaussian_kernel); CModelSelectionParameters* param_gaussian_kernel_width= new CModelSelectionParameters("width"); param_gaussian_kernel_width->build_values(1, 2, R_EXP); param_gaussian_kernel->append_child(param_gaussian_kernel_width); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); /* print all parameter available for modelselection * Dont worry if yours is not included, simply write to the mailing list */ ds_kernel->print_modsel_params(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters( "kernel", ds_kernel); root->append_child(param_ds_kernel); CModelSelectionParameters* param_ds_kernel_delta= new CModelSelectionParameters("delta"); param_ds_kernel_delta->build_values(1, 2, R_EXP); param_ds_kernel->append_child(param_ds_kernel_delta); CModelSelectionParameters* param_ds_kernel_theta= new CModelSelectionParameters("theta"); param_ds_kernel_theta->build_values(1, 2, R_EXP); param_ds_kernel->append_child(param_ds_kernel_theta); return root; } CModelSelectionParameters* create_param_tree_2() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters( "kernel", power_kernel); root->append_child(param_power_kernel); CMinkowskiMetric* metric=new CMinkowskiMetric(); CModelSelectionParameters* param_power_kernel_metric= new CModelSelectionParameters("distance", metric); param_power_kernel->append_child(param_power_kernel_metric); CModelSelectionParameters* param_metric_k=new CModelSelectionParameters( "k"); param_metric_k->build_values(2, 3, R_LINEAR); param_power_kernel_metric->append_child(param_metric_k); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters( "kernel", ds_kernel); root->append_child(param_ds_kernel); return root; } CModelSelectionParameters* create_param_tree_3() { CModelSelectionParameters* root=new CModelSelectionParameters(); CPowerKernel* power_kernel=new CPowerKernel(); CModelSelectionParameters* param_power_kernel=new CModelSelectionParameters( "kernel", power_kernel); root->append_child(param_power_kernel); CMinkowskiMetric* metric=new CMinkowskiMetric(); CModelSelectionParameters* param_power_kernel_metric= new CModelSelectionParameters("distance", metric); param_power_kernel->append_child(param_power_kernel_metric); CEuclideanDistance* euclidean=new CEuclideanDistance(); CModelSelectionParameters* param_power_kernel_distance= new CModelSelectionParameters("distance", euclidean); param_power_kernel->append_child(param_power_kernel_distance); CDistantSegmentsKernel* ds_kernel=new CDistantSegmentsKernel(); CModelSelectionParameters* param_ds_kernel=new CModelSelectionParameters( "kernel", ds_kernel); root->append_child(param_ds_kernel); return root; } #ifdef HAVE_EIGEN3 CModelSelectionParameters* create_param_tree_4a() { CModelSelectionParameters* root=new CModelSelectionParameters(); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); CRegressionLabels* labels=new CRegressionLabels(); CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2); CPowerKernel* power_kernel=new CPowerKernel(); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features, mean, labels, lik); CLibSVM* svm=new CLibSVM(); CPowerKernel* power_kernel_svm=new CPowerKernel(); CGaussianKernel* gaussian_kernel_svm=new CGaussianKernel(10, 2); CModelSelectionParameters* param_inf=new CModelSelectionParameters( "inference_method", inf); root->append_child(param_inf); CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters( "likelihood_model", lik); param_inf->append_child(param_inf_gaussian); CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters( "kernel", gaussian_kernel); param_inf->append_child(param_inf_kernel_1); CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters( "kernel", power_kernel); param_inf->append_child(param_inf_kernel_2); CModelSelectionParameters* param_svm=new CModelSelectionParameters( "SVM", svm); root->append_child(param_svm); CModelSelectionParameters* param_svm_kernel_1=new CModelSelectionParameters( "kernel", power_kernel_svm); param_svm->append_child(param_svm_kernel_1); CModelSelectionParameters* param_svm_kernel_2=new CModelSelectionParameters( "kernel", gaussian_kernel_svm); param_svm->append_child(param_svm_kernel_2); return root; } CModelSelectionParameters* create_param_tree_4b() { CModelSelectionParameters* root=new CModelSelectionParameters(); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); CRegressionLabels* labels=new CRegressionLabels(); CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2); CPowerKernel* power_kernel=new CPowerKernel(); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features, mean, labels, lik); CLibSVM* svm=new CLibSVM(); CPowerKernel* power_kernel_svm=new CPowerKernel(); CGaussianKernel* gaussian_kernel_svm=new CGaussianKernel(10, 2); CModelSelectionParameters* param_c=new CModelSelectionParameters("C1"); root->append_child(param_c); param_c->build_values(1,2,R_EXP); CModelSelectionParameters* param_inf=new CModelSelectionParameters( "inference_method", inf); root->append_child(param_inf); CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters( "likelihood_model", lik); param_inf->append_child(param_inf_gaussian); CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters( "kernel", gaussian_kernel); param_inf->append_child(param_inf_kernel_1); CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters( "kernel", power_kernel); param_inf->append_child(param_inf_kernel_2); CModelSelectionParameters* param_svm=new CModelSelectionParameters( "SVM", svm); root->append_child(param_svm); CModelSelectionParameters* param_svm_kernel_1=new CModelSelectionParameters( "kernel", power_kernel_svm); param_svm->append_child(param_svm_kernel_1); CModelSelectionParameters* param_svm_kernel_2=new CModelSelectionParameters( "kernel", gaussian_kernel_svm); param_svm->append_child(param_svm_kernel_2); return root; } CModelSelectionParameters* create_param_tree_5() { CModelSelectionParameters* root=new CModelSelectionParameters(); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); CRegressionLabels* labels=new CRegressionLabels(); CGaussianKernel* gaussian_kernel=new CGaussianKernel(10, 2); CLinearKernel* linear_kernel=new CLinearKernel(); CPowerKernel* power_kernel=new CPowerKernel(); CZeroMean* mean=new CZeroMean(); CGaussianLikelihood* lik=new CGaussianLikelihood(); CExactInferenceMethod* inf=new CExactInferenceMethod(gaussian_kernel, features, mean, labels, lik); CModelSelectionParameters* param_inf=new CModelSelectionParameters( "inference_method", inf); root->append_child(param_inf); CModelSelectionParameters* param_inf_gaussian=new CModelSelectionParameters( "likelihood_model", lik); param_inf->append_child(param_inf_gaussian); CModelSelectionParameters* param_inf_gaussian_sigma= new CModelSelectionParameters("sigma"); param_inf_gaussian->append_child(param_inf_gaussian_sigma); param_inf_gaussian_sigma->build_values(2.0, 3.0, R_EXP); CModelSelectionParameters* param_inf_kernel_1=new CModelSelectionParameters( "kernel", gaussian_kernel); param_inf->append_child(param_inf_kernel_1); CModelSelectionParameters* param_inf_kernel_width= new CModelSelectionParameters("width"); param_inf_kernel_1->append_child(param_inf_kernel_width); param_inf_kernel_width->build_values(1.0, 2.0, R_EXP); CModelSelectionParameters* param_inf_kernel_2=new CModelSelectionParameters( "kernel", linear_kernel); param_inf->append_child(param_inf_kernel_2); CModelSelectionParameters* param_inf_kernel_3=new CModelSelectionParameters( "kernel", power_kernel); param_inf->append_child(param_inf_kernel_3); return root; } #endif int main(int argc, char **argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); CModelSelectionParameters* tree=NULL; tree=create_param_tree_1(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_2(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_3(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); #ifdef HAVE_EIGEN3 tree=create_param_tree_4a(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_4b(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); tree=create_param_tree_5(); SG_REF(tree); test_tree(tree); SG_UNREF(tree); #endif exit_shogun(); return 0; }
#include <cstdio> #include <shogun/optimization/lbfgs/lbfgs.h> static lbfgsfloatval_t evaluate( void *instance, const lbfgsfloatval_t *x, lbfgsfloatval_t *g, const int n, const lbfgsfloatval_t step ) { int i; lbfgsfloatval_t fx = 0.0; for (i = 0;i < n;i += 2) { lbfgsfloatval_t t1 = 1.0 - x[i]; lbfgsfloatval_t t2 = 10.0 * (x[i+1] - x[i] * x[i]); g[i+1] = 20.0 * t2; g[i] = -2.0 * (x[i] * g[i+1] + t1); fx += t1 * t1 + t2 * t2; } return fx; } static int progress( void *instance, const lbfgsfloatval_t *x, const lbfgsfloatval_t *g, const lbfgsfloatval_t fx, const lbfgsfloatval_t xnorm, const lbfgsfloatval_t gnorm, const lbfgsfloatval_t step, int n, int k, int ls ) { printf("Iteration %d:\n", k); printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); printf(" xnorm = %f, gnorm = %f, step = %f\n", xnorm, gnorm, step); printf("\n"); return 0; } #define N 100 int main(int argc, char *argv[]) { int i, ret = 0; lbfgsfloatval_t fx; lbfgsfloatval_t *x = lbfgs_malloc(N); lbfgs_parameter_t param; if (x == NULL) { printf("ERROR: Failed to allocate a memory block for variables.\n"); return 1; } /* Initialize the variables. */ for (i = 0;i < N;i += 2) { x[i] = -1.2; x[i+1] = 1.0; } /* Initialize the parameters for the L-BFGS optimization. */ lbfgs_parameter_init(¶m); /*param.linesearch = LBFGS_LINESEARCH_BACKTRACKING;*/ /* Start the L-BFGS optimization; this will invoke the callback functions evaluate() and progress() when necessary. */ ret = lbfgs(N, x, &fx, evaluate, progress, NULL, ¶m); /* Report the result. */ printf("L-BFGS optimization terminated with status code = %d\n", ret); printf(" fx = %f, x[0] = %f, x[1] = %f\n", fx, x[0], x[1]); lbfgs_free(x); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } /* number of features and their dimension */ int32_t n=6; int main(int argc, char** argv) { init_shogun(&print_message); /* create some random data */ SGMatrix<float64_t> matrix(n,n); for(int32_t i=0; i<n*n; ++i) matrix.matrix[i]=CMath::random((float64_t)-n,(float64_t)n); SGMatrix<float64_t>::display_matrix(matrix.matrix, n, n); /* create n n-dimensional feature vectors */ CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); /* create gaussian kernel with cache 10MB, width will be changed later */ CGaussianKernel* kernel = new CGaussianKernel(10, 0); kernel->init(features, features); /* create n labels (+1,-1,+1,-1,...) */ CBinaryLabels* labels=new CBinaryLabels(n); for (int32_t i=0; i<n; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* create libsvm with C=10 and produced labels */ CLibSVM* svm=new CLibSVM(10, kernel, labels); /* iterate over different width parameters */ for (int32_t i=0; i<10; ++i) { SG_SPRINT("\n\ncurrent kernel width: 2^%d=%f\n", i, CMath::pow(2.0,i)); float64_t width=CMath::pow(2.0,i); /* create parameter to change current kernel width */ Parameter* param=new Parameter(); param->add(&width, "width", ""); /* tell kernel to use the newly produced parameter */ kernel->m_parameters->set_from_parameters(param); /* print kernel matrix */ for (int32_t i=0; i<n; i++) { for (int32_t j=0; j<n; j++) SG_SPRINT("%f ", kernel->kernel(i,j)); SG_SPRINT("\n"); } /* train and classify */ svm->train(); for (int32_t i=0; i<n; ++i) SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i, svm->apply_one(i), i, labels->get_label(i)); delete param; } /* free up memory */ SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/labels/BinaryLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } /* number of features and their dimension, number of kernels */ const int32_t n=7; int main(int argc, char** argv) { init_shogun(&print_message); /* create some random data and hand it to each kernel */ SGMatrix<float64_t> matrix(n,n); for (int32_t k=0; k<n*n; ++k) matrix.matrix[k]=CMath::random((float64_t) -n, (float64_t) n); SG_SPRINT("feature data:\n"); SGMatrix<float64_t>::display_matrix(matrix.matrix, n, n); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(matrix); /* create n kernels with n features each */ CGaussianKernel** kernels=SG_MALLOC(CGaussianKernel*, n); for (int32_t i=0; i<n; ++i) { kernels[i]=new CGaussianKernel(10, CMath::random(0.0, (float64_t)n*n)); /* hand data to kernel */ kernels[i]->init(features, features); } /* create n parameter instances, each with one kernel */ Parameter** parameters=SG_MALLOC(Parameter*, n); for (int32_t i=0; i<n; ++i) { parameters[i]=new Parameter(); parameters[i]->add((CSGObject**)&kernels[i], "kernel", ""); } /* create n labels (+1,-1,+1,-1,...) */ CBinaryLabels* labels=new CBinaryLabels(n); for (int32_t i=0; i<n; ++i) labels->set_label(i, i%2==0 ? +1 : -1); /* create libsvm with C=10 and produced labels */ CLibSVM* svm=new CLibSVM(10, NULL, labels); /* iterate over all parameter instances and set them as subkernel */ for (int32_t i=0; i<n; ++i) { SG_SPRINT("\nkernel %d has width %f\n", i, kernels[i]->get_width()); /* change kernel, old one is UNREF'ed, new one is REF'ed */ svm->m_parameters->set_from_parameters(parameters[i]); /* train and classify with the different kernels */ svm->train(); for (int32_t i=0; i<n; ++i) SG_SPRINT("output[%d]=%f\treal[%d]=%f\n", i, svm->apply_one(i), i, labels->get_label(i)); } /* free up memory: delete all Parameter instances */ for (int32_t i=0; i<n; ++i) delete parameters[i]; /* delete created arrays */ SG_FREE(kernels); SG_FREE(parameters); /* this also handles features, labels, and last kernel in kernels[n-1] */ SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/lib/config.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/classifier/svm/LibLinear.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/PowerKernel.h> #include <shogun/distance/MinkowskiMetric.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } void print_modsel_parameters(CSGObject* object) { SGStringList<char> modsel_params=object->get_modelsel_names(); SG_SPRINT("Parameters of %s available for model selection:\n", object->get_name()); char* type_string=SG_MALLOC(char, 100); for (index_t i=0; i<modsel_params.num_strings; ++i) { /* extract current name, ddescription and type, and print them */ const char* name=modsel_params.strings[i].string; index_t index=object->get_modsel_param_index(name); TSGDataType type=object->m_model_selection_parameters->get_parameter( index)->m_datatype; type.to_string(type_string, 100); SG_SPRINT("\"%s\": \"%s\", %s\n", name, object->get_modsel_param_descr(name), type_string); } SG_FREE(type_string); SG_SPRINT("\n"); } int main(int argc, char** argv) { init_shogun(&print_message); #ifndef HAVE_LAPACK CSGObject* object; object=new CLibSVM(); print_modsel_parameters(object); SG_UNREF(object); object=new CLibLinear(); print_modsel_parameters(object); SG_UNREF(object); object=new CDistantSegmentsKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CGaussianKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CPowerKernel(); print_modsel_parameters(object); SG_UNREF(object); object=new CMinkowskiMetric(); print_modsel_parameters(object); SG_UNREF(object); #endif // HAVE_LAPACK exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * DS-Kernel implementation Written (W) 2008 Sébastien Boisvert under GPLv3 * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/io/SGIO.h> #include <shogun/mathematics/Math.h> #include <shogun/base/Parameter.h> #include <shogun/kernel/string/DistantSegmentsKernel.h> #include <shogun/kernel/GaussianKernel.h> using namespace shogun; int32_t max=3; const float64_t initial_value=1; const float64_t another_value=2; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } bool test_float_scalar() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t original_parameter=initial_value; original_parameter_list->add(&original_parameter, "param", ""); float64_t new_parameter=another_value; Parameter* new_parameter_list=new Parameter(); new_parameter_list->add(&new_parameter, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); result&=original_parameter==another_value; delete original_parameter_list; delete new_parameter_list; return result; } bool test_float_vector() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t* original_parameter=SG_MALLOC(float64_t, max); SGVector<float64_t>::fill_vector(original_parameter, max, initial_value); original_parameter_list->add_vector(&original_parameter, &max, "param", ""); float64_t* new_parameter=SG_MALLOC(float64_t, max); SGVector<float64_t>::fill_vector(new_parameter, max, another_value); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_vector(&new_parameter, &max, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) result&=original_parameter[i]==another_value; delete original_parameter; delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_float_matrix() { bool result=true; Parameter* original_parameter_list=new Parameter(); float64_t* original_parameter=SG_MALLOC(float64_t, max*max); SGVector<float64_t>::fill_vector(original_parameter, max*max, initial_value); original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", ""); float64_t* new_parameter=SG_MALLOC(float64_t, max*max); SGVector<float64_t>::fill_vector(new_parameter, max*max, another_value); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", ""); original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max*max; ++i) result&=original_parameter[i]==another_value; delete original_parameter; delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_scalar() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject* original_parameter=new CGaussianKernel(10, 10); SG_REF(original_parameter); original_parameter_list->add(&original_parameter, "kernel", ""); CSGObject* new_parameter=new CDistantSegmentsKernel(10, 10, 10); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add(&new_parameter, "kernel", ""); /* note: old_parameter is SG_UNREF'ed, new one SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); result&=original_parameter==new_parameter; /* old original kernel was deleted by shogun's SG_UNREF */ SG_UNREF(new_parameter); delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_vector() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject** original_parameter=SG_MALLOC(CSGObject*, max); for (int32_t i=0; i<max; ++i) { original_parameter[i]=new CDistantSegmentsKernel(1, 1, 1); SG_REF(original_parameter[i]); } original_parameter_list->add_vector(&original_parameter, &max, "param", ""); CSGObject** new_parameter=SG_MALLOC(CSGObject*, max); for (int32_t i=0; i<max; ++i) new_parameter[i]=new CDistantSegmentsKernel(2, 2, 2); Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_vector(&new_parameter, &max, "param", ""); /* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) result&=original_parameter[i]==new_parameter[i]; /* old original kernels were deleted by shogun's SG_UNREF */ delete original_parameter; for (int32_t i=0; i<max; ++i) SG_UNREF(new_parameter[i]); delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } bool test_sgobject_matrix() { bool result=true; Parameter* original_parameter_list=new Parameter(); CSGObject** original_parameter=SG_MALLOC(CSGObject*, max*max); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) { original_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1); SG_REF(original_parameter[j*max+i]); } } original_parameter_list->add_matrix(&original_parameter, &max, &max, "param", ""); CSGObject** new_parameter=SG_MALLOC(CSGObject*, max*max); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) new_parameter[j*max+i]=new CDistantSegmentsKernel(1, 1, 1); } Parameter* new_parameter_list=new Parameter(); new_parameter_list->add_matrix(&new_parameter, &max, &max, "param", ""); /* note: old_parameters are SG_UNREF'ed, new ones SG_REF'ed */ original_parameter_list->set_from_parameters(new_parameter_list); for (int32_t i=0; i<max; ++i) { for (int32_t j=0; j<max; ++j) result&=original_parameter[j*max+i]==new_parameter[j*max+i]; } /* old original kernels were deleted by shogun's SG_UNREF */ delete original_parameter; for (int32_t i=0; i<max*max; ++i) SG_UNREF(new_parameter[i]); delete new_parameter; delete original_parameter_list; delete new_parameter_list; return result; } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); bool result=true; /* test wheater set_from_parameters works for these types */ result&=test_float_scalar(); result&=test_sgobject_scalar(); result&=test_sgobject_vector(); result&=test_sgobject_matrix(); result&=test_float_matrix(); result&=test_float_vector(); if (result) SG_SPRINT("SUCCESS!\n"); else SG_SPRINT("FAILURE!\n"); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2008-2010 Soeren Sonnenburg, Alexander Binder * Copyright (C) 2008-2009 Fraunhofer Institute FIRST and Max Planck Society * Copyright (C) 2010 Berlin Institute of Technology */ #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/LinearKernel.h> #include <shogun/preproc/RandomFourierGaussPreproc.h> #include <shogun/features/DenseFeatures.h> #include <shogun/classifier/svm/LibSVM.h> #include <shogun/lib/Mathematics.h> #include <shogun/lib/common.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> #include <vector> #include <iostream> #include <algorithm> #include <ctime> using namespace shogun; void gen_rand_data(float64_t* & feat, float64_t* & lab,const int32_t num,const int32_t dims,const float64_t dist) { lab=SG_MALLOC(float64_t, num); feat=SG_MALLOC(float64_t, num*dims); for (int32_t i=0; i<num; i++) { if (i<num/2) { lab[i]=-1.0; for (int32_t j=0; j<dims; j++) feat[i*dims+j]=CMath::random(0.0,1.0)+dist; } else { lab[i]=1.0; for (int32_t j=0; j<dims; j++) feat[i*dims+j]=CMath::random(0.0,1.0)-dist; } } CMath::display_vector(lab,num); CMath::display_matrix(feat,dims, num); } int main() { time_t a,b; int32_t dims=6000; float64_t dist=0.5; int32_t randomfourier_featurespace_dim=500; // the typical application of the below preprocessor are cases with high input dimensionalities of some thousands int32_t numtr=3000; int32_t numte=3000; const int32_t feature_cache=0; const int32_t kernel_cache=0; // important trick for RFgauss to work: kernel width is set such that average inner kernel distance is close one // the rfgauss approximation breaks down if average inner kernel distances (~~ kernel width to small compared to variance of data) are too large // try rbf_width=0.1 to see how it fails! - you will see the problem in the large number of negative kernel entries (numnegratio) for the rfgauss linear kernel const float64_t rbf_width=4000; const float64_t svm_C=10; const float64_t svm_eps=0.001; init_shogun(); float64_t* feattr(NULL); float64_t* labtr(NULL); a=time(NULL); std::cout << "generating train data"<<std::endl; gen_rand_data(feattr,labtr,numtr,dims,dist); float64_t* feattr2=SG_MALLOC(float64_t, numtr*dims); std::copy(feattr,feattr+numtr*dims,feattr2); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; float64_t* featte(NULL); float64_t* labte(NULL); a=time(NULL); std::cout << "generating test data"<<std::endl; gen_rand_data(featte,labte,numte,dims,dist); float64_t* featte2=SG_MALLOC(float64_t, numtr*dims); std::copy(featte,featte+numtr*dims,featte2); float64_t* featte3=SG_MALLOC(float64_t, numtr*dims); std::copy(featte,featte+numtr*dims,featte3); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create train labels CLabels* labelstr=new CLabels(); labelstr->set_labels(labtr, numtr); SG_REF(labelstr); // create train features a=time(NULL); std::cout << "initializing shogun train feature"<<std::endl; CDenseFeatures<float64_t>* featurestr1 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featurestr1); featurestr1->set_feature_matrix(feattr, dims, numtr); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create gaussian kernel // std::cout << "computing gaussian train kernel"<<std::endl; CGaussianKernel* kerneltr1 = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kerneltr1); kerneltr1->init(featurestr1, featurestr1); // create svm via libsvm and train CLibSVM* svm1 = new CLibSVM(svm_C, kerneltr1, labelstr); SG_REF(svm1); svm1->set_epsilon(svm_eps); a=time(NULL); std::cout << "training SVM over gaussian kernel"<<std::endl; svm1->train(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; printf("num_sv:%d b:%f\n", svm1->get_num_support_vectors(), svm1->get_bias()); a=time(NULL); std::cout << "initializing shogun test feature"<<std::endl; CDenseFeatures<float64_t>* featureste1 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featureste1); featureste1->set_feature_matrix(featte, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing gaussian test kernel"<<std::endl; CGaussianKernel* kernelte1 = new CGaussianKernel(kernel_cache, rbf_width); SG_REF(kernelte1); kernelte1->init(featurestr1, featureste1); svm1->set_kernel(kernelte1); a=time(NULL); std::cout << "scoring gaussian test kernel"<<std::endl; std::vector<float64_t> scoreste1(numte); float64_t err1=0; for(int32_t i=0; i< numte ;++i) { scoreste1[i]=svm1->classify_example(i); if(scoreste1[i]*labte[i]<0) { err1+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // *************************************** // now WITH the preprocessor a=time(NULL); std::cout << "initializing preprocessor"<<std::endl; CRandomFourierGaussPreproc *rfgauss=new CRandomFourierGaussPreproc; SG_REF(rfgauss); rfgauss->get_io()->set_loglevel(MSG_DEBUG); // ************************************************************ // set parameters of the preprocessor // ******************************** !!!!!!!!!!!!!!!!! CMath::sqrt(rbf_width/2.0) rfgauss->set_kernelwidth( CMath::sqrt(rbf_width/2.0) ); rfgauss->set_dim_input_space(dims); rfgauss->set_dim_feature_space(randomfourier_featurespace_dim); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // create train features a=time(NULL); std::cout << "initializing shogun train feature again"<<std::endl; CDenseFeatures<float64_t>* featurestr2 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featurestr2); featurestr2->set_feature_matrix(feattr2, dims, numtr); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** // add preprocessor featurestr2->add_preproc(rfgauss); // apply preprocessor a=time(NULL); std::cout << "applying preprocessor to train feature"<<std::endl; featurestr2->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // save random coefficients and state data of preprocessor for use with a new preprocessor object (see lines following "// now the same with a new preprocessor to show the usage of set_randomcoefficients" // Alternative: use built-in serialization to load and save state data from/to a file!!! float64_t *randomcoeff_additive2, * randomcoeff_multiplicative2; int32_t dim_feature_space2,dim_input_space2; float64_t kernelwidth2; rfgauss->get_randomcoefficients(&randomcoeff_additive2, &randomcoeff_multiplicative2, &dim_feature_space2, &dim_input_space2, &kernelwidth2); // create linear kernel //std::cout << "computing linear train kernel over preprocessed features"<<std::endl; CLinearKernel* kerneltr2 = new CLinearKernel(); SG_REF(kerneltr2); kerneltr2->init(featurestr2, featurestr2); // create svm via libsvm and train CLibSVM* svm2 = new CLibSVM(svm_C, kerneltr2, labelstr); SG_REF(svm2); svm2->set_epsilon(svm_eps); a=time(NULL); std::cout << "training SVM over linear kernel over preprocessed features"<<std::endl; svm2->train(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; printf("num_sv:%d b:%f\n", svm2->get_num_support_vectors(), svm2->get_bias()); a=time(NULL); std::cout << "initializing shogun test feature again"<<std::endl; CDenseFeatures<float64_t>* featureste2 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featureste2); featureste2->set_feature_matrix(featte2, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** CRandomFourierGaussPreproc *rfgauss2=new CRandomFourierGaussPreproc; SG_REF(rfgauss2); rfgauss2->get_io()->set_loglevel(MSG_DEBUG); // add preprocessor featureste2->add_preproc(rfgauss); // apply preprocessor a=time(NULL); std::cout << "applying same preprocessor to test feature"<<std::endl; featureste2->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing linear test kernel over preprocessed features"<<std::endl; CLinearKernel* kernelte2 = new CLinearKernel(); SG_REF(kernelte2); kernelte2->init(featurestr2, featureste2); //std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; svm2->set_kernel(kernelte2); a=time(NULL); std::cout << "scoring linear test kernel over preprocessed features"<<std::endl; std::vector<float64_t> scoreste2(numte); float64_t err2=0; for(int32_t i=0; i< numte ;++i) { scoreste2[i]=svm2->classify_example(i); if(scoreste2[i]*labte[i]<0) { err2+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "pausing 12 seconds"<<std::endl; sleep(12); // ************************************************************ // compare results // ************************************************************** int32_t num_labeldiffs=0; float64_t avg_scorediff=0; for(int32_t i=0; i< numte ;++i) { if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste2[i])) { ++num_labeldiffs; } avg_scorediff+=CMath::abs(scoreste1[i]-scoreste2[i])/numte; std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste2[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste2[i] <<" = " << CMath::abs(scoreste1[i]-scoreste2[i])<<std::endl; } std::cout << "usedwidth for rbf kernel"<< kerneltr1->get_width() << " " << kernelte1->get_width()<<std::endl; std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl; std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl; std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err2<<std::endl; a=time(NULL); std::cout << "computing effective kernel widths (means of inner distances)"<<std::endl; int32_t m, n; float64_t * kertr1; kerneltr1->get_kernel_matrix ( &kertr1, &m, &n); std::cout << "kernel size "<< m << " "<< n <<std::endl; float64_t avgdist1=0; for(int i=0; i<m ;++i) { for(int l=0; l<i ;++l) { avgdist1+= -CMath::log(kertr1[i+l*m])*2.0/m/(m+1.0); } } float64_t * kertr2; kerneltr2->get_kernel_matrix (&kertr2,&m, &n); float64_t avgdist2=0; float64_t numnegratio=0; for(int i=0; i<m ;++i) { for(int l=0; l<i ;++l) { if(kertr2[i+l*m]<=0) { numnegratio+=2.0/m/(m+1.0); } else { avgdist2+= -CMath::log(std::max(kertr2[i+l*m],1e-10))*2.0/m/(m+1.0); } } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "effective kernel width for gaussian kernel and RFgauss "<< avgdist1 << " " <<avgdist2/(1.0-numnegratio) << std::endl<< " numnegratio (negative entries in RFgauss approx kernel)"<< numnegratio<<std::endl; // ********************************************** // now the same with a new preprocessor to show the usage of set_randomcoefficients // ********************************************8 CDenseFeatures<float64_t>* featureste3 = new CDenseFeatures<float64_t>(feature_cache); SG_REF(featureste3); featureste3->set_feature_matrix(featte3, dims, numte); std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; // ************************************************************ // use preprocessor // ************************************************************** rfgauss2->set_randomcoefficients( randomcoeff_additive2, randomcoeff_multiplicative2, dim_feature_space2, dim_input_space2, kernelwidth2); // add preprocessor featureste3->add_preproc(rfgauss2); // apply preprocessor a=time(NULL); std::cout << "applying same preprocessor to test feature"<<std::endl; featureste3->apply_preproc(); std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; //std::cout << "computing linear test kernel over preprocessed features"<<std::endl; CLinearKernel* kernelte3 = new CLinearKernel(); SG_REF(kernelte3); kernelte2->init(featurestr2, featureste3); //std::cout << "finished"<<std::endl; //b=time(NULL); //std::cout<< "elapsed time in seconds "<<b-a <<std::endl; svm2->set_kernel(kernelte3); a=time(NULL); std::cout << "scoring linear test kernel over preprocessed features"<<std::endl; std::vector<float64_t> scoreste3(numte); float64_t err3=0; for(int32_t i=0; i< numte ;++i) { scoreste3[i]=svm2->classify_example(i); if(scoreste3[i]*labte[i]<0) { err3+=1.0/numte; } } std::cout << "finished"<<std::endl; b=time(NULL); std::cout<< "elapsed time in seconds "<<b-a <<std::endl; std::cout << "pausing 12 seconds"<<std::endl; sleep(12); // ************************************************************ // compare results // ************************************************************** num_labeldiffs=0; avg_scorediff=0; for(int32_t i=0; i< numte ;++i) { if( (int32_t)CMath::sign(scoreste1[i]) != (int32_t)CMath::sign(scoreste3[i])) { ++num_labeldiffs; } avg_scorediff+=CMath::abs(scoreste1[i]-scoreste3[i])/numte; std::cout<< "at sample i"<< i <<" label 1= " << CMath::sign(scoreste1[i]) <<" label 2= " << CMath::sign(scoreste3[i])<< " scorediff " << scoreste1[i] << " - " <<scoreste3[i] <<" = " << CMath::abs(scoreste1[i]-scoreste3[i])<<std::endl; } std::cout<< "number of different labels between gaussian kernel and rfgauss "<< num_labeldiffs<< " out of "<< numte << " labels "<<std::endl; std::cout<< "average test sample SVM output score difference between gaussian kernel and rfgauss "<< avg_scorediff<<std::endl; std::cout<< "classification errors gaussian kernel and rfgauss "<< err1 << " " <<err3<<std::endl; SG_FREE(randomcoeff_additive2); SG_FREE(randomcoeff_multiplicative2); SG_FREE(labtr); SG_FREE(labte); SG_FREE(kertr1); SG_FREE(kertr2); SG_UNREF(labelstr); SG_UNREF(kerneltr1); SG_UNREF(kerneltr2); SG_UNREF(kernelte1); SG_UNREF(kernelte2); SG_UNREF(kernelte3); SG_UNREF(featurestr1); SG_UNREF(featurestr2); SG_UNREF(featureste1); SG_UNREF(featureste2); SG_UNREF(featureste3); SG_UNREF(svm1); SG_UNREF(svm2); SG_UNREF(rfgauss); SG_UNREF(rfgauss2); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingDenseFeatures.h> #include <shogun/multiclass/tree/RandomConditionalProbabilityTree.h> using namespace shogun; int main() { init_shogun_with_defaults(); const char* train_file_name = "../data/7class_example4_train.dense"; const char* test_file_name = "../data/7class_example4_test.dense"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); CStreamingDenseFeatures<float32_t>* train_features = new CStreamingDenseFeatures<float32_t>(train_file, true, 1024); SG_REF(train_features); CRandomConditionalProbabilityTree *cpt = new CRandomConditionalProbabilityTree(); cpt->set_num_passes(1); cpt->set_features(train_features); cpt->train(); cpt->print_tree(); CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); CStreamingDenseFeatures<float32_t>* test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *pred = cpt->apply_multiclass(test_features); test_features->reset_stream(); SG_SPRINT("num_labels = %d\n", pred->get_num_labels()); SG_UNREF(test_features); SG_UNREF(test_file); test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); test_features = new CStreamingDenseFeatures<float32_t>(test_file, true, 1024); SG_REF(test_features); CMulticlassLabels *gnd = new CMulticlassLabels(pred->get_num_labels()); test_features->start_parser(); for (int32_t i=0; i < pred->get_num_labels(); ++i) { test_features->get_next_example(); gnd->set_int_label(i, test_features->get_label()); test_features->release_example(); } test_features->end_parser(); int32_t n_correct = 0; for (index_t i=0; i < pred->get_num_labels(); ++i) { if (pred->get_int_label(i) == gnd->get_int_label(i)) n_correct++; //SG_SPRINT("%d-%d ", pred->get_int_label(i), gnd->get_int_label(i)); } SG_SPRINT("\n"); SG_SPRINT("Multiclass Accuracy = %.2f%%\n", 100.0*n_correct / gnd->get_num_labels()); SG_UNREF(train_features); SG_UNREF(test_features); SG_UNREF(train_file); SG_UNREF(test_file); SG_UNREF(cpt); SG_UNREF(pred); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #if defined(HAVE_EIGEN3) && defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/LinearARDKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/regression/gp/ExactInferenceMethod.h> #include <shogun/regression/gp/GaussianLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel, SGVector<float64_t>& weights) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c2); CModelSelectionParameters* c3=new CModelSelectionParameters("sigma"); c2->append_child(c3); c3->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c4=new CModelSelectionParameters("scale"); c1->append_child(c4); c4->build_values(1.0, 1.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("weights"); c5->append_child(c6); c6->build_values_sgvector(0.001, 4.0, R_LINEAR, &weights); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGVector<float64_t> weights(dim_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(features); SG_REF(features2); SG_REF(labels); /*Allocate our Kernel*/ CLinearARDKernel* test_kernel = new CLinearARDKernel(10); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf, features, labels); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel, weights); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); CRegressionLabels* covariance = gp->apply_regression(features); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); CRegressionLabels* predictions = gp->apply_regression(); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); covariance->get_labels().display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(features); SG_UNREF(features2); SG_UNREF(predictions); SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #if defined(HAVE_EIGEN3) && defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/regression/gp/FITCInferenceMethod.h> #include <shogun/regression/gp/GaussianLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); c5->append_child(c6); c6->build_values(0.01, 4.0, R_LINEAR); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(labels); /*Allocate our Kernel*/ CGaussianKernel* test_kernel = new CGaussianKernel(10, 2); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); //SG_SPRINT("features2 bef inf rc= %d\n",features2->ref_count()); /*Allocate our inference method*/ CFITCInferenceMethod* inf = new CFITCInferenceMethod(test_kernel, features, mean, labels, lik, features2); //SG_SPRINT("features2 aft inf rc= %d\n",features2->ref_count()); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf, features, labels); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); CRegressionLabels* covariance = gp->apply_regression(features); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); CRegressionLabels* predictions = gp->apply_regression(); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); covariance->get_labels().display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(predictions); SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #if defined(HAVE_EIGEN3) && defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/regression/gp/ExactInferenceMethod.h> #include <shogun/regression/gp/GaussianLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.001, 4.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); c5->append_child(c6); c6->build_values(0.001, 4.0, R_LINEAR); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(features); SG_REF(features2); SG_REF(labels); /*Allocate our Kernel*/ CGaussianKernel* test_kernel = new CGaussianKernel(10, 2); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf, features, labels); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); CRegressionLabels* covariance = gp->apply_regression(features); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); CRegressionLabels* predictions = gp->apply_regression(); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); covariance->get_labels().display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(features); SG_UNREF(features2); SG_UNREF(predictions); SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #if defined(HAVE_EIGEN3) && defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/regression/gp/LaplacianInferenceMethod.h> #include <shogun/regression/gp/StudentsTLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.5, 4.0, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.01, 4.0, R_LINEAR); CModelSelectionParameters* c43=new CModelSelectionParameters("df"); c3->append_child(c43); c43->build_values(500.0, 1000.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); c5->append_child(c6); c6->build_values(0.01, 4.0, R_LINEAR); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); /* create testing features */ CDenseFeatures<float64_t>* features2=new CDenseFeatures<float64_t> (); features2->set_feature_matrix(matrix2); SG_REF(features); SG_REF(features2); SG_REF(labels); /*Allocate our Kernel*/ CGaussianKernel* test_kernel = new CGaussianKernel(10, 2); test_kernel->init(features, features); /*Allocate our mean function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our likelihood function*/ CStudentsTLikelihood* lik = new CStudentsTLikelihood(); /*Allocate our inference method*/ CLaplacianInferenceMethod* inf = new CLaplacianInferenceMethod(test_kernel, features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf, features, labels); SG_REF(gp); /*Build the parameter tree for model selection*/ CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); CRegressionLabels* covariance = gp->apply_regression(features); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); CRegressionLabels* predictions = gp->apply_regression(); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); covariance->get_labels().display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(features); SG_UNREF(features2); SG_UNREF(predictions); SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #if defined(HAVE_EIGEN3) && defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/CombinedDotFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/regression/gp/ExactInferenceMethod.h> #include <shogun/regression/gp/GaussianLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> #include <shogun/kernel/ProductKernel.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CProductKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.99, 1.01, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CList* list = kernel->get_list(); CModelSelectionParameters* cc1 = new CModelSelectionParameters("kernel_list", list); c5->append_child(cc1); CListElement* first = NULL; CSGObject* k = list->get_first_element(first); SG_UNREF(k); SG_REF(first); CModelSelectionParameters* cc2 = new CModelSelectionParameters("first", first); cc1->append_child(cc2); CKernel* sub_kernel1 = kernel->get_kernel(0); CModelSelectionParameters* cc3 = new CModelSelectionParameters("data", sub_kernel1); cc2->append_child(cc3); SG_UNREF(sub_kernel1); CListElement* second = first; k = list->get_next_element(second); SG_UNREF(k); SG_REF(second); CModelSelectionParameters* cc4 = new CModelSelectionParameters("next", second); cc2->append_child(cc4); CKernel* sub_kernel2 = kernel->get_kernel(1); CModelSelectionParameters* cc5 = new CModelSelectionParameters("data", sub_kernel2); cc4->append_child(cc5); SG_UNREF(sub_kernel2); CListElement* third = second; k = list->get_next_element(third); SG_UNREF(k); SG_REF(third); CModelSelectionParameters* cc6 = new CModelSelectionParameters("next", third); cc4->append_child(cc6); CKernel* sub_kernel3 = kernel->get_kernel(2); CModelSelectionParameters* cc7 = new CModelSelectionParameters("data", sub_kernel3); cc6->append_child(cc7); SG_UNREF(sub_kernel3); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); cc3->append_child(c6); c6->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c66 = new CModelSelectionParameters("combined_kernel_weight"); cc3->append_child(c66); c66->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c7 = new CModelSelectionParameters("width"); cc5->append_child(c7); c7->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c77 = new CModelSelectionParameters("combined_kernel_weight"); cc5->append_child(c77); c77->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c8 = new CModelSelectionParameters("width"); cc7->append_child(c8); c8->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c88 = new CModelSelectionParameters("combined_kernel_weight"); cc7->append_child(c88); c88->build_values(0.001, 1.0, R_LINEAR); SG_UNREF(list); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); CProductKernel* test_kernel = new CProductKernel(); CGaussianKernel* sub_kernel1 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel2 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel3 = new CGaussianKernel(10, 2); test_kernel->append_kernel(sub_kernel1); test_kernel->append_kernel(sub_kernel2); test_kernel->append_kernel(sub_kernel3); SG_REF(comb_features); SG_REF(labels); /*Allocate our Mean Function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our Likelihood Model*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, comb_features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf, comb_features, labels); SG_REF(gp); CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, comb_features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); CRegressionLabels* covariance = gp->apply_regression(comb_features); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); CRegressionLabels* predictions = gp->apply_regression(); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); covariance->get_labels().display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(predictions); SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(comb_features); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else int main(int argc, char **argv) { return 0; } #endif
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Jacob Walker */ #include <shogun/lib/config.h> #if defined(HAVE_EIGEN3) && defined(HAVE_NLOPT) #include <shogun/base/init.h> #include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/CombinedDotFeatures.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/mathematics/Math.h> #include <shogun/regression/gp/ExactInferenceMethod.h> #include <shogun/regression/gp/GaussianLikelihood.h> #include <shogun/regression/gp/ZeroMean.h> #include <shogun/regression/GaussianProcessRegression.h> #include <shogun/evaluation/GradientEvaluation.h> #include <shogun/modelselection/GradientModelSelection.h> #include <shogun/modelselection/ModelSelectionParameters.h> #include <shogun/modelselection/ParameterCombination.h> #include <shogun/evaluation/GradientCriterion.h> #include <shogun/kernel/CombinedKernel.h> using namespace shogun; int32_t num_vectors=4; int32_t dim_vectors=3; void build_matrices(SGMatrix<float64_t>& test, SGMatrix<float64_t>& train, CRegressionLabels* labels) { /*Fill Matrices with random nonsense*/ train[0] = -1; train[1] = -1; train[2] = -1; train[3] = 1; train[4] = 1; train[5] = 1; train[6] = -10; train[7] = -10; train[8] = -10; train[9] = 3; train[10] = 2; train[11] = 1; for (int32_t i=0; i<num_vectors*dim_vectors; i++) test[i]=i*sin(i)*.96; /* create labels, two classes */ for (index_t i=0; i<num_vectors; ++i) { if(i%2 == 0) labels->set_label(i, 1); else labels->set_label(i, -1); } } CModelSelectionParameters* build_tree(CInferenceMethod* inf, CLikelihoodModel* lik, CCombinedKernel* kernel) { CModelSelectionParameters* root=new CModelSelectionParameters(); CModelSelectionParameters* c1 = new CModelSelectionParameters("inference_method", inf); root->append_child(c1); CModelSelectionParameters* c2 = new CModelSelectionParameters("scale"); c1 ->append_child(c2); c2->build_values(0.99, 1.01, R_LINEAR); CModelSelectionParameters* c3 = new CModelSelectionParameters("likelihood_model", lik); c1->append_child(c3); CModelSelectionParameters* c4=new CModelSelectionParameters("sigma"); c3->append_child(c4); c4->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c5 = new CModelSelectionParameters("kernel", kernel); c1->append_child(c5); CList* list = kernel->get_list(); CModelSelectionParameters* cc1 = new CModelSelectionParameters("kernel_list", list); c5->append_child(cc1); CListElement* first = NULL; CSGObject* k = list->get_first_element(first); SG_UNREF(k); SG_REF(first); CModelSelectionParameters* cc2 = new CModelSelectionParameters("first", first); cc1->append_child(cc2); CKernel* sub_kernel1 = kernel->get_kernel(0); CModelSelectionParameters* cc3 = new CModelSelectionParameters("data", sub_kernel1); cc2->append_child(cc3); SG_UNREF(sub_kernel1); CListElement* second = first; k = list->get_next_element(second); SG_UNREF(k); SG_REF(second); CModelSelectionParameters* cc4 = new CModelSelectionParameters("next", second); cc2->append_child(cc4); CKernel* sub_kernel2 = kernel->get_kernel(1); CModelSelectionParameters* cc5 = new CModelSelectionParameters("data", sub_kernel2); cc4->append_child(cc5); SG_UNREF(sub_kernel2); CListElement* third = second; k = list->get_next_element(third); SG_UNREF(k); SG_REF(third); CModelSelectionParameters* cc6 = new CModelSelectionParameters("next", third); cc4->append_child(cc6); CKernel* sub_kernel3 = kernel->get_kernel(2); CModelSelectionParameters* cc7 = new CModelSelectionParameters("data", sub_kernel3); cc6->append_child(cc7); SG_UNREF(sub_kernel3); CModelSelectionParameters* c6 = new CModelSelectionParameters("width"); cc3->append_child(c6); c6->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c66 = new CModelSelectionParameters("combined_kernel_weight"); cc3->append_child(c66); c66->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c7 = new CModelSelectionParameters("width"); cc5->append_child(c7); c7->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c77 = new CModelSelectionParameters("combined_kernel_weight"); cc5->append_child(c77); c77->build_values(0.001, 1.0, R_LINEAR); CModelSelectionParameters* c8 = new CModelSelectionParameters("width"); cc7->append_child(c8); c8->build_values(1.0, 4.0, R_LINEAR); CModelSelectionParameters* c88 = new CModelSelectionParameters("combined_kernel_weight"); cc7->append_child(c88); c88->build_values(0.001, 1.0, R_LINEAR); SG_UNREF(list); return root; } int main(int argc, char **argv) { init_shogun_with_defaults(); /* create some data and labels */ SGMatrix<float64_t> matrix = SGMatrix<float64_t>(dim_vectors, num_vectors); SGMatrix<float64_t> matrix2 = SGMatrix<float64_t>(dim_vectors, num_vectors); CRegressionLabels* labels=new CRegressionLabels(num_vectors); build_matrices(matrix2, matrix, labels); /* create training features */ CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t> (); features->set_feature_matrix(matrix); CCombinedFeatures* comb_features=new CCombinedFeatures(); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); comb_features->append_feature_obj(features); CCombinedKernel* test_kernel = new CCombinedKernel(); CGaussianKernel* sub_kernel1 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel2 = new CGaussianKernel(10, 2); CGaussianKernel* sub_kernel3 = new CGaussianKernel(10, 2); test_kernel->append_kernel(sub_kernel1); test_kernel->append_kernel(sub_kernel2); test_kernel->append_kernel(sub_kernel3); SG_REF(comb_features); SG_REF(labels); /*Allocate our Mean Function*/ CZeroMean* mean = new CZeroMean(); /*Allocate our Likelihood Model*/ CGaussianLikelihood* lik = new CGaussianLikelihood(); /*Allocate our inference method*/ CExactInferenceMethod* inf = new CExactInferenceMethod(test_kernel, comb_features, mean, labels, lik); SG_REF(inf); /*Finally use these to allocate the Gaussian Process Object*/ CGaussianProcessRegression* gp = new CGaussianProcessRegression(inf, comb_features, labels); SG_REF(gp); CModelSelectionParameters* root = build_tree(inf, lik, test_kernel); /*Criterion for gradient search*/ CGradientCriterion* crit = new CGradientCriterion(); /*This will evaluate our inference method for its derivatives*/ CGradientEvaluation* grad=new CGradientEvaluation(gp, comb_features, labels, crit); grad->set_function(inf); gp->print_modsel_params(); root->print_tree(); /* handles all of the above structures in memory */ CGradientModelSelection* grad_search=new CGradientModelSelection( root, grad); /* set autolocking to false to get rid of warnings */ grad->set_autolock(false); /*Search for best parameters*/ CParameterCombination* best_combination=grad_search->select_model(true); /*Output all the results and information*/ if (best_combination) { SG_SPRINT("best parameter(s):\n"); best_combination->print_tree(); best_combination->apply_to_machine(gp); } CGradientResult* result=(CGradientResult*)grad->evaluate(); if(result->get_result_type() != GRADIENTEVALUATION_RESULT) SG_SERROR("Evaluation result not a GradientEvaluationResult!"); result->print_result(); SGVector<float64_t> alpha = inf->get_alpha(); SGVector<float64_t> labe = labels->get_labels(); SGVector<float64_t> diagonal = inf->get_diagonal_vector(); SGMatrix<float64_t> cholesky = inf->get_cholesky(); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_COV); CRegressionLabels* covariance = gp->apply_regression(comb_features); gp->set_return_type(CGaussianProcessRegression::GP_RETURN_MEANS); CRegressionLabels* predictions = gp->apply_regression(); alpha.display_vector("Alpha Vector"); labe.display_vector("Labels"); diagonal.display_vector("sW Matrix"); covariance->get_labels().display_vector("Predicted Variances"); predictions->get_labels().display_vector("Mean Predictions"); cholesky.display_matrix("Cholesky Matrix L"); matrix.display_matrix("Training Features"); matrix2.display_matrix("Testing Features"); /*free memory*/ SG_UNREF(predictions); SG_UNREF(covariance); SG_UNREF(labels); SG_UNREF(comb_features); SG_UNREF(inf); SG_UNREF(gp); SG_UNREF(grad_search); SG_UNREF(best_combination); SG_UNREF(result); exit_shogun(); return 0; } #else // HAVE_EIGEN3 && HAVE_NLOPT int main(int argc, char **argv) { return 0; } #endif // HAVE_EIGEN3 && HAVE_NLOPT
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/features/DenseFeatures.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } class CTestClass : public CSGObject { public: CTestClass() {} CTestClass(float64_t number, float64_t vec_start, int32_t features_start) { m_number=number; m_vec=SGVector<float64_t>(10); SGVector<float64_t>::range_fill_vector(m_vec.vector, m_vec.vlen, vec_start); m_mat=SGMatrix<float64_t>(3,3); SGVector<float64_t>::range_fill_vector(m_mat.matrix, m_mat.num_cols*m_mat.num_rows, vec_start); SGMatrix<int32_t> data=SGMatrix<int32_t>(3, 2); SGVector<int32_t>::range_fill_vector(data.matrix, data.num_rows*data.num_cols, features_start); m_features=new CDenseFeatures<int32_t>(data); SG_REF(m_features); m_parameters->add(&m_number, "number", "Test variable"); m_parameters->add(&m_mat, "mat", "Test variable"); m_parameters->add(&m_vec, "vec", "Test variable"); m_parameters->add((CSGObject**)&m_features, "features", "Test variable"); } virtual ~CTestClass() { SG_UNREF(m_features); } void print() { SG_PRINT("m_number=%f\n", m_number); SGVector<float64_t>::display_vector(m_vec.vector, m_vec.vlen, "m_vec"); SGVector<float64_t>::display_vector(m_mat.matrix, m_mat.num_cols*m_mat.num_rows, "m_mat"); SGMatrix<int32_t> features=m_features->get_feature_matrix(); SGMatrix<int32_t>::display_matrix(features.matrix, features.num_rows, features.num_cols, "m_features"); } inline virtual const char* get_name() const { return "TestClass"; } public: float64_t m_number; SGVector<float64_t> m_vec; SGMatrix<float64_t> m_mat; CDenseFeatures<int32_t>* m_features; }; const char* filename="test.txt"; void test_test_class_serial() { CTestClass* to_save=new CTestClass(10, 0, 0); CTestClass* to_load=new CTestClass(20, 10, 66); SG_SPRINT("original instance 1:\n"); to_save->print(); SG_SPRINT("original instance 2:\n"); to_load->print(); CSerializableAsciiFile* file; file=new CSerializableAsciiFile(filename, 'w'); to_save->save_serializable(file); file->close(); SG_UNREF(file); file=new CSerializableAsciiFile(filename, 'r'); to_load->load_serializable(file); file->close(); SG_UNREF(file); SG_SPRINT("deserialized instance 1 into instance 2: (should be equal to " "first instance)\n"); to_load->print(); /* assert that variable is equal */ ASSERT(to_load->m_number==to_save->m_number); /* assert that vector is equal */ for (index_t i=0; i<to_load->m_vec.vlen; ++i) { ASSERT(to_load->m_vec[i]==to_save->m_vec[i]); } /* assert that matrix is equal */ for (index_t i=0; i<to_load->m_mat.num_cols*to_load->m_mat.num_rows; ++i) { ASSERT(to_load->m_mat[i]==to_save->m_mat[i]); } /* assert that features object is equal */ SGMatrix<int32_t> features_loaded=to_load->m_features->get_feature_matrix(); SGMatrix<int32_t> features_saved=to_save->m_features->get_feature_matrix(); for (index_t i=0; i<features_loaded.num_rows*features_loaded.num_cols; ++i) { ASSERT(features_loaded[i]==features_saved[i]); } SG_UNREF(to_save); SG_UNREF(to_load); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); test_test_class_serial(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/base/Parameter.h> #include <shogun/io/SerializableAsciiFile.h> #include <shogun/io/SerializableJsonFile.h> #include <shogun/io/SerializableXmlFile.h> #include <shogun/io/SerializableHdf5File.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } const char* filename="filename.txt"; void print(Parameter* p) { TParameter* param=p->get_parameter(0); SGVector<float64_t>* v=(SGVector<float64_t>*)param->m_parameter; CMath::display_vector(v->vector, v->vlen, "vector:"); param=p->get_parameter(1); SGMatrix<float64_t>* m=(SGMatrix<float64_t>*)param->m_parameter; CMath::display_matrix(m->matrix, m->num_rows, m->num_cols, "matrix:"); } void check_content_equal(Parameter* save_param, Parameter* load_param) { TParameter* p; p=save_param->get_parameter(0); SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter; p=save_param->get_parameter(1); SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter; p=load_param->get_parameter(0); SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter; p=load_param->get_parameter(1); SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter; ASSERT(sv->vlen==lv->vlen); ASSERT(sm->num_rows==lm->num_rows); ASSERT(sm->num_cols==lm->num_cols); for (index_t i=0; i<sv->vlen; ++i) ASSERT(sv->vector[i]==lv->vector[i]); for (index_t i=0; i<sm->num_cols*sm->num_rows; ++i) ASSERT(sm->matrix[i]==lm->matrix[i]); } void test_ascii(Parameter* save_param, Parameter* load_param) { SG_SPRINT("testing ascii serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableAsciiFile* file; file=new CSerializableAsciiFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableAsciiFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_hdf5(Parameter* save_param, Parameter* load_param) { /* TODO, HDF5 file leaks memory */ SG_SPRINT("testing hdf5 serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableHdf5File* file; file=new CSerializableHdf5File(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableHdf5File(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_json(Parameter* save_param, Parameter* load_param) { /* TODO, json file leaks memory, also save methods */ SG_SPRINT("testing json serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableJsonFile* file; file=new CSerializableJsonFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableJsonFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void test_xml(Parameter* save_param, Parameter* load_param) { /* TODO, xml file leaks memory and produces a read error */ SG_SPRINT("testing xml serialization\n"); SG_SPRINT("to save:\n"); print(save_param); SG_SPRINT("loaded before:\n"); print(load_param); CSerializableXmlFile* file; file=new CSerializableXmlFile(filename, 'w'); save_param->save(file); file->close(); SG_UNREF(file); file=new CSerializableXmlFile(filename, 'r'); load_param->load(file); file->close(); SG_UNREF(file); SG_SPRINT("loaded after:\n"); print(load_param); check_content_equal(save_param, load_param); } void reset_values(Parameter* save_param, Parameter* load_param) { TParameter* p; p=save_param->get_parameter(0); SGVector<float64_t>* sv=(SGVector<float64_t>*)p->m_parameter; p=save_param->get_parameter(1); SGMatrix<float64_t>* sm=(SGMatrix<float64_t>*)p->m_parameter; p=load_param->get_parameter(0); SGVector<float64_t>* lv=(SGVector<float64_t>*)p->m_parameter; p=load_param->get_parameter(1); SGMatrix<float64_t>* lm=(SGMatrix<float64_t>*)p->m_parameter; sv->destroy_vector(); lv->destroy_vector(); sm->destroy_matrix(); lm->destroy_matrix(); *sv=SGVector<float64_t>(9); *lv=SGVector<float64_t>(3); *sm=SGMatrix<float64_t>(3, 3); *lm=SGMatrix<float64_t>(4, 4); CMath::range_fill_vector(sv->vector, sv->vlen); CMath::range_fill_vector(sm->matrix, sm->num_rows*sm->num_cols); CMath::fill_vector(lv->vector, lv->vlen, 0.0); CMath::fill_vector(lm->matrix, lm->num_rows*lm->num_cols, 0.0); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); /* for serialization */ SGVector<float64_t> sv; SGMatrix<float64_t> sm; Parameter* sp=new Parameter(); sp->add(&sv, "vector", "description"); sp->add(&sm, "matrix", "description"); /* for deserialization */ SGVector<float64_t> lv; SGMatrix<float64_t> lm; Parameter* lp=new Parameter(); lp->add(&lv, "vector", "description"); lp->add(&lm, "matrix", "description"); /* still leaks memory TODO */ reset_values(sp, lp); test_json(sp, lp); reset_values(sp, lp); test_ascii(sp, lp); /* still leaks memory TODO */ reset_values(sp, lp); test_hdf5(sp, lp); /* still leaks memory TODO */ reset_values(sp, lp); test_xml(sp, lp); /* clean up */ sv.destroy_vector(); sm.destroy_matrix(); lv.destroy_vector(); lm.destroy_matrix(); delete sp; delete lp; exit_shogun(); return 0; }
#include <shogun/labels/StructuredLabels.h> #include <shogun/loss/HingeLoss.h> #include <shogun/structure/HMSVMLabels.h> #include <shogun/structure/HMSVMModel.h> #include <shogun/structure/PrimalMosekSOSVM.h> #include <shogun/structure/TwoStateModel.h> using namespace shogun; int main(int argc, char ** argv) { init_shogun_with_defaults(); #ifdef USE_MOSEK CHMSVMModel* model = CTwoStateModel::simulate_two_state_data(); CStructuredLabels* labels = model->get_labels(); CFeatures* features = model->get_features(); CHingeLoss* loss = new CHingeLoss(); CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, loss, labels); SG_REF(sosvm); sosvm->train(); // sosvm->get_w().display_vector("w"); CStructuredLabels* out = CStructuredLabels::obtain_from_generic(sosvm->apply()); ASSERT( out->get_num_labels() == labels->get_num_labels() ); for ( int32_t i = 0 ; i < out->get_num_labels() ; ++i ) { CSequence* pred_seq = CSequence::obtain_from_generic( out->get_label(i) ); CSequence* true_seq = CSequence::obtain_from_generic( labels->get_label(i) ); SG_UNREF(pred_seq); SG_UNREF(true_seq); } SG_UNREF(out); SG_UNREF(features); // because model->get_features() increased the count SG_UNREF(labels); // because model->get_labels() increased the count SG_UNREF(sosvm); #endif /* USE_MOSEK */ exit_shogun(); return 0; }
#include <shogun/features/MatrixFeatures.h> #include <shogun/loss/HingeLoss.h> #include <shogun/structure/HMSVMLabels.h> #include <shogun/structure/HMSVMModel.h> #include <shogun/structure/PrimalMosekSOSVM.h> using namespace shogun; int main(int argc, char ** argv) { init_shogun_with_defaults(); #ifdef USE_MOSEK // Create structured labels CHMSVMLabels* labels = new CHMSVMLabels(5, 2); // Label sequences of with two states int32_t lab1[] = {0, 0, 1, 1}; int32_t lab2[] = {1, 1, 1, 0}; int32_t lab3[] = {0, 1, 0, 1}; int32_t lab4[] = {1, 0, 0, 0}; int32_t lab5[] = {0, 1, 1, 0}; // No need for ref_counting in SGVector since the data is allocated // during compilation time labels->add_label(SGVector< int32_t >(lab1, 4, false)); labels->add_label(SGVector< int32_t >(lab2, 4, false)); labels->add_label(SGVector< int32_t >(lab3, 4, false)); labels->add_label(SGVector< int32_t >(lab4, 4, false)); labels->add_label(SGVector< int32_t >(lab5, 4, false)); // Create features CMatrixFeatures< float64_t >* features = new CMatrixFeatures< float64_t >(5, 3); // Observation matrices with three states float64_t mat1[] = { 0., 1., 2., 1., 1., 1., 2., 2., 2., 1., 0., 1. }; float64_t mat2[] = { 1., 2., 2., 0., 2., 1., 1., 1., 0., 0., 2., 1. }; float64_t mat3[] = { 0., 1., 2., 1., 1., 2., 1., 1., 0., 0., 1., 0. }; float64_t mat4[] = { 1., 2., 1., 0., 2., 1., 0., 2., 0., 1., 0., 2. }; float64_t mat5[] = { 2., 2., 0., 1., 2., 1., 0., 1., 2., 0., 2., 0. }; features->set_feature_vector(SGMatrix< float64_t >(mat1, 3, 4, false), 0); features->set_feature_vector(SGMatrix< float64_t >(mat2, 3, 4, false), 1); features->set_feature_vector(SGMatrix< float64_t >(mat3, 3, 4, false), 2); features->set_feature_vector(SGMatrix< float64_t >(mat4, 3, 4, false), 3); features->set_feature_vector(SGMatrix< float64_t >(mat5, 3, 4, false), 4); CHMSVMModel* model = new CHMSVMModel(features, labels, SMT_TWO_STATE, 3); SG_REF(model); CHingeLoss* loss = new CHingeLoss(); CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, loss, labels); SG_REF(sosvm); sosvm->train(); sosvm->get_w().display_vector("w"); sosvm->get_slacks().display_vector("slacks"); // Free memory SG_UNREF(sosvm); SG_UNREF(model); #endif /* USE_MOSEK */ exit_shogun(); return 0; }
#include <shogun/classifier/svm/LibLinear.h> #include <shogun/evaluation/MulticlassAccuracy.h> #include <shogun/evaluation/StructuredAccuracy.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/labels/StructuredLabels.h> #include <shogun/lib/common.h> #include <shogun/loss/HingeLoss.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/mathematics/Math.h> #include <shogun/multiclass/MulticlassOneVsRestStrategy.h> #include <shogun/structure/MulticlassSOLabels.h> #include <shogun/structure/MulticlassModel.h> #include <shogun/structure/PrimalMosekSOSVM.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/lib/Time.h> using namespace shogun; #define DIMS 2 #define EPSILON 10e-5 #define NUM_SAMPLES 100 #define NUM_CLASSES 10 char FNAME[] = "data.out"; void gen_rand_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats) { float64_t means[DIMS]; float64_t stds[DIMS]; FILE* pfile = fopen(FNAME, "w"); for ( int32_t c = 0 ; c < NUM_CLASSES ; ++c ) { for ( int32_t j = 0 ; j < DIMS ; ++j ) { means[j] = CMath::random(-100, 100); stds[j] = CMath::random( 1, 5); } for ( int32_t i = 0 ; i < NUM_SAMPLES ; ++i ) { labs[c*NUM_SAMPLES+i] = c; fprintf(pfile, "%d", c); for ( int32_t j = 0 ; j < DIMS ; ++j ) { feats[(c*NUM_SAMPLES+i)*DIMS + j] = CMath::normal_random(means[j], stds[j]); fprintf(pfile, " %f", feats[(c*NUM_SAMPLES+i)*DIMS + j]); } fprintf(pfile, "\n"); } } fclose(pfile); } void read_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats) { FILE* pfile = fopen(FNAME, "r"); if (pfile == NULL) SG_SERROR("Unable to open file: %s\n", FNAME); int32_t label, idx; float32_t value; for ( int32_t i = 0 ; i < NUM_SAMPLES*NUM_CLASSES ; ++i ) { fscanf(pfile, "%d", &label); labs[i] = label; for ( int32_t j = 0 ; j < DIMS ; ++j ) { fscanf(pfile, "%d:%f", &idx, &value); feats[i*DIMS + j] = value; } } fclose(pfile); } int main(int argc, char ** argv) { init_shogun_with_defaults(); SGVector< float64_t > labs(NUM_CLASSES*NUM_SAMPLES); SGMatrix< float64_t > feats(DIMS, NUM_CLASSES*NUM_SAMPLES); gen_rand_data(labs, feats); //read_data(labs, feats); // Create train labels CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs); CMulticlassLabels* mlabels = new CMulticlassLabels(labs); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feats); // Create structured model CMulticlassModel* model = new CMulticlassModel(features, labels); // Create loss function CHingeLoss* loss = new CHingeLoss(); // Create SO-SVM CPrimalMosekSOSVM* sosvm = new CPrimalMosekSOSVM(model, loss, labels); CDualLibQPBMSOSVM* bundle = new CDualLibQPBMSOSVM(model, loss, labels, 1000); bundle->set_verbose(false); SG_REF(sosvm); SG_REF(bundle); CTime start; float64_t t1; sosvm->train(); SG_SPRINT(">>>> PrimalMosekSOSVM trained in %9.4f\n", (t1 = start.cur_time_diff(false))); bundle->train(); SG_SPRINT(">>>> BMRM trained in %9.4f\n", start.cur_time_diff(false)-t1); CStructuredLabels* out = CStructuredLabels::obtain_from_generic(sosvm->apply()); CStructuredLabels* bout = CStructuredLabels::obtain_from_generic(bundle->apply()); // Create liblinear svm classifier with L2-regularized L2-loss CLibLinear* svm = new CLibLinear(L2R_L2LOSS_SVC); // Add some configuration to the svm svm->set_epsilon(EPSILON); svm->set_bias_enabled(false); // Create a multiclass svm classifier that consists of several of the previous one CLinearMulticlassMachine* mc_svm = new CLinearMulticlassMachine( new CMulticlassOneVsRestStrategy(), (CDotFeatures*) features, svm, mlabels); SG_REF(mc_svm); // Train the multiclass machine using the data passed in the constructor mc_svm->train(); CMulticlassLabels* mout = CMulticlassLabels::obtain_from_generic(mc_svm->apply()); SGVector< float64_t > w = sosvm->get_w(); for ( int32_t i = 0 ; i < w.vlen ; ++i ) SG_SPRINT("%10f ", w[i]); SG_SPRINT("\n\n"); for ( int32_t i = 0 ; i < NUM_CLASSES ; ++i ) { CLinearMachine* lm = (CLinearMachine*) mc_svm->get_machine(i); SGVector< float64_t > mw = lm->get_w(); for ( int32_t j = 0 ; j < mw.vlen ; ++j ) SG_SPRINT("%10f ", mw[j]); SG_UNREF(lm); // because of CLinearMulticlassMachine::get_machine() } SG_SPRINT("\n"); CStructuredAccuracy* structured_evaluator = new CStructuredAccuracy(); CMulticlassAccuracy* multiclass_evaluator = new CMulticlassAccuracy(); SG_REF(structured_evaluator); SG_REF(multiclass_evaluator); SG_SPRINT("SO-SVM: %5.2f%\n", 100.0*structured_evaluator->evaluate(out, labels)); SG_SPRINT("BMRM: %5.2f%\n", 100.0*structured_evaluator->evaluate(bout, labels)); SG_SPRINT("MC: %5.2f%\n", 100.0*multiclass_evaluator->evaluate(mout, mlabels)); // Free memory SG_UNREF(multiclass_evaluator); SG_UNREF(structured_evaluator); SG_UNREF(mout); SG_UNREF(mc_svm); SG_UNREF(bundle); SG_UNREF(sosvm); SG_UNREF(bout); SG_UNREF(out); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Michal Uricar * Copyright (C) 2012 Michal Uricar */ #include <shogun/classifier/svm/LibLinear.h> #include <shogun/features/DenseFeatures.h> #include <shogun/io/SGIO.h> #include <shogun/labels/MulticlassLabels.h> #include <shogun/labels/StructuredLabels.h> #include <shogun/lib/common.h> #include <shogun/loss/HingeLoss.h> #include <shogun/machine/LinearMulticlassMachine.h> #include <shogun/mathematics/Math.h> #include <shogun/multiclass/MulticlassOneVsRestStrategy.h> #include <shogun/structure/MulticlassSOLabels.h> #include <shogun/structure/MulticlassModel.h> #include <shogun/structure/DualLibQPBMSOSVM.h> #include <shogun/io/streaming/StreamingAsciiFile.h> #include <shogun/features/streaming/StreamingSparseFeatures.h> using namespace shogun; #define DIMS 2 #define EPSILON 10e-5 #define NUM_SAMPLES 100 #define NUM_CLASSES 10 char FNAME[] = "data.svmlight"; /** Reads multiclass trainig data stored in svmlight format (i.e. label nz_idx_1:value1 nz_idx_2:value2 ... nz_idx_N:valueN ) * * @param fname path to file with training data * @param DIM dimension of features * @param N number of feature vectors * @param labs vector with labels * @param feats matrix with features */ void read_data(const char fname[], uint32_t DIM, uint32_t N, SGVector<float64_t> labs, SGMatrix<float64_t> feats) { CStreamingAsciiFile* file=new CStreamingAsciiFile(fname); SG_REF(file); CStreamingSparseFeatures< float64_t >* stream_features= new CStreamingSparseFeatures< float64_t >(file, true, 1024); SG_REF(stream_features); SGVector<float64_t > vec(DIM); stream_features->start_parser(); uint32_t num_vectors=0; while (stream_features->get_next_example()) { vec.zero(); stream_features->add_to_dense_vec(1.0, vec, DIM); labs[num_vectors]=stream_features->get_label(); for (uint32_t i=0; i<DIM; ++i) feats[num_vectors*DIM+i]=vec[i]; num_vectors++; stream_features->release_example(); } stream_features->end_parser(); SG_UNREF(stream_features); } /** Generates random multiclass training data and stores them in svmlight format * * @param labs returned vector with labels * @param feats returned matrix with features */ void gen_rand_data(SGVector< float64_t > labs, SGMatrix< float64_t > feats) { float64_t means[DIMS]; float64_t stds[DIMS]; FILE* pfile = fopen(FNAME, "w"); CMath::init_random(17); for ( int32_t c = 0 ; c < NUM_CLASSES ; ++c ) { for ( int32_t j = 0 ; j < DIMS ; ++j ) { means[j] = CMath::random(-100, 100); stds[j] = CMath::random( 1, 5); } for ( int32_t i = 0 ; i < NUM_SAMPLES ; ++i ) { labs[c*NUM_SAMPLES+i] = c; fprintf(pfile, "%d", c); for ( int32_t j = 0 ; j < DIMS ; ++j ) { feats[(c*NUM_SAMPLES+i)*DIMS + j] = CMath::normal_random(means[j], stds[j]); fprintf(pfile, " %d:%f", j+1, feats[(c*NUM_SAMPLES+i)*DIMS + j]); } fprintf(pfile, "\n"); } } fclose(pfile); } int main(int argc, char * argv[]) { // initialization //------------------------------------------------------------------------- float64_t lambda=0.01, eps=0.01; bool icp=1; uint32_t cp_models=1; ESolver solver=BMRM; uint32_t feat_dim, num_feat; init_shogun_with_defaults(); if (argc > 1 && argc < 8) { SG_SERROR("Usage: so_multiclass_BMRM <data.in> <feat_dim> <num_feat> <lambda> <icp> <epsilon> <solver> [<cp_models>]\n"); return -1; } if (argc > 1) { // parse command line arguments for parameters setting SG_SPRINT("arg[1] = %s\n", argv[1]); feat_dim=::atoi(argv[2]); num_feat=::atoi(argv[3]); lambda=::atof(argv[4]); icp=::atoi(argv[5]); eps=::atof(argv[6]); if (strcmp("BMRM", argv[7])==0) solver=BMRM; if (strcmp("PPBMRM", argv[7])==0) solver=PPBMRM; if (strcmp("P3BMRM", argv[7])==0) solver=P3BMRM; if (argc > 8) { cp_models=::atoi(argv[8]); } } else { // default parameters feat_dim=DIMS; num_feat=NUM_SAMPLES*NUM_CLASSES; lambda=1e3; icp=1; eps=0.01; solver=BMRM; } SGVector<float64_t> labs(num_feat); SGMatrix<float64_t> feats(feat_dim, num_feat); if (argc==1) { gen_rand_data(labs, feats); } else { // read data read_data(argv[1], feat_dim, num_feat, labs, feats); } // Create train labels CMulticlassSOLabels* labels = new CMulticlassSOLabels(labs); // Create train features CDenseFeatures< float64_t >* features = new CDenseFeatures< float64_t >(feats); // Create structured model CMulticlassModel* model = new CMulticlassModel(features, labels); // Create loss function CHingeLoss* loss = new CHingeLoss(); // Create SO-SVM CDualLibQPBMSOSVM* sosvm = new CDualLibQPBMSOSVM( model, loss, labels, lambda); SG_REF(sosvm); sosvm->set_cleanAfter(10); sosvm->set_cleanICP(icp); sosvm->set_TolRel(eps); sosvm->set_cp_models(cp_models); sosvm->set_solver(solver); // Train //------------------------------------------------------------------------- SG_SPRINT("Train using lambda = %lf ICP removal = %d \n", sosvm->get_lambda(), sosvm->get_cleanICP()); sosvm->train(); bmrm_return_value_T res = sosvm->get_result(); SG_SPRINT("result = { Fp=%lf, Fd=%lf, nIter=%d, nCP=%d, nzA=%d, exitflag=%d }\n", res.Fp, res.Fd, res.nIter, res.nCP, res.nzA, res.exitflag); CStructuredLabels* out = CStructuredLabels::obtain_from_generic(sosvm->apply()); SG_REF(out); SG_SPRINT("\n"); // Compute error //------------------------------------------------------------------------- float64_t error=0.0; for (uint32_t i=0; i<num_feat; ++i) { CRealNumber* rn = CRealNumber::obtain_from_generic( out->get_label(i) ); error+=(rn->value==labs.get_element(i)) ? 0.0 : 1.0; SG_UNREF(rn); // because of out->get_label(i) above } SG_SPRINT("Error = %lf %% \n", error/num_feat*100); // Free memory SG_UNREF(sosvm); SG_UNREF(out); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/CrossValidationSplitting.h> #include <shogun/labels/RegressionLabels.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_labels; index_t num_subsets; index_t runs=100; while (runs-->0) { num_labels=CMath::random(10, 150); num_subsets=CMath::random(1, 5); index_t desired_size=CMath::round( (float64_t)num_labels/(float64_t)num_subsets); /* this will throw an error */ if (num_labels<num_subsets) continue; SG_SPRINT("num_labels=%d\nnum_subsets=%d\n\n", num_labels, num_subsets); /* build labels */ CRegressionLabels* labels=new CRegressionLabels(num_labels); for (index_t i=0; i<num_labels; ++i) { labels->set_label(i, CMath::random(-10.0, 10.0)); SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i)); } SG_SPRINT("\n"); /* build splitting strategy */ CCrossValidationSplitting* splitting= new CCrossValidationSplitting(labels, num_subsets); /* build index sets (twice to ensure memory is not leaking) */ splitting->build_subsets(); splitting->build_subsets(); for (index_t i=0; i<num_subsets; ++i) { SG_SPRINT("subset %d\n", i); SGVector<index_t> subset=splitting->generate_subset_indices(i); SGVector<index_t> inverse=splitting->generate_subset_inverse(i); SGVector<index_t>::display_vector(subset.vector, subset.vlen, "subset indices"); SGVector<index_t>::display_vector(inverse.vector, inverse.vlen, "inverse indices"); SG_SPRINT("checking subset size: %d vs subset desired size %d\n", subset.vlen, desired_size); ASSERT(CMath::abs(subset.vlen-desired_size)<=1); ASSERT(subset.vlen+inverse.vlen==num_labels); for (index_t j=0; j<subset.vlen; ++j) SG_SPRINT("%d:(%f),", subset.vector[j], labels->get_label(j)); SG_SPRINT("\n"); SG_SPRINT("inverse %d\n", i); for (index_t j=0; j<inverse.vlen; ++j) SG_SPRINT("%d(%d),", inverse.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n\n"); } /* clean up */ SG_UNREF(splitting); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Heiko Strathmann * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society */ #include <shogun/base/init.h> #include <shogun/evaluation/StratifiedCrossValidationSplitting.h> #include <shogun/labels/MulticlassLabels.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char **argv) { init_shogun(&print_message, &print_message, &print_message); index_t num_labels, num_classes, num_subsets; index_t runs=50; while (runs-->0) { num_labels=CMath::random(5, 100); num_classes=CMath::random(2, 10); num_subsets=CMath::random(1, 10); /* this will throw an error */ if (num_labels<num_subsets) continue; SG_SPRINT("num_labels=%d\nnum_classes=%d\nnum_subsets=%d\n\n", num_labels, num_classes, num_subsets); /* build labels */ CMulticlassLabels* labels=new CMulticlassLabels(num_labels); for (index_t i=0; i<num_labels; ++i) { labels->set_label(i, CMath::random()%num_classes); SG_SPRINT("label(%d)=%.18g\n", i, labels->get_label(i)); } SG_SPRINT("\n"); /* print classes */ SGVector<float64_t> classes=labels->get_unique_labels(); SGVector<float64_t>::display_vector(classes.vector, classes.vlen, "classes"); /* build splitting strategy */ CStratifiedCrossValidationSplitting* splitting= new CStratifiedCrossValidationSplitting(labels, num_subsets); /* build index sets (twice to ensure memory is not leaking) */ splitting->build_subsets(); splitting->build_subsets(); for (index_t i=0; i<num_subsets; ++i) { SGVector<index_t> subset=splitting->generate_subset_indices(i); SGVector<index_t> inverse=splitting->generate_subset_inverse(i); SG_SPRINT("subset %d\n", i); for (index_t j=0; j<subset.vlen; ++j) SG_SPRINT("%d(%d),", subset.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n"); SG_SPRINT("inverse %d\n", i); for (index_t j=0; j<inverse.vlen; ++j) SG_SPRINT("%d(%d),", inverse.vector[j], (int32_t)labels->get_label(j)); SG_SPRINT("\n\n"); } /* check whether number of labels in every subset is nearly equal */ for (index_t i=0; i<num_classes; ++i) { SG_SPRINT("checking class %d\n", i); /* count number of elements for this class */ SGVector<index_t> temp=splitting->generate_subset_indices(0); int32_t count=0; for (index_t j=0; j<temp.vlen; ++j) { if ((int32_t)labels->get_label(temp.vector[j])==i) ++count; } /* check all subsets for same ratio */ for (index_t j=0; j<num_subsets; ++j) { SGVector<index_t> subset=splitting->generate_subset_indices(j); int32_t temp_count=0; for (index_t k=0; k<subset.vlen; ++k) { if ((int32_t)labels->get_label(subset.vector[k])==i) ++temp_count; } /* at most one difference */ SG_SPRINT("number in subset %d: %d\n", j, temp_count); ASSERT(CMath::abs(temp_count-count)<=1); } } /* clean up */ SG_UNREF(splitting); } exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/HSIC.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; void create_fixed_data_kernel_small(CFeatures*& features_p, CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q) { index_t m=2; index_t d=3; SGMatrix<float64_t> p(d,2*m); for (index_t i=0; i<2*d*m; ++i) p.matrix[i]=i; // p.display_matrix("p"); SGMatrix<float64_t> q(d,2*m); for (index_t i=0; i<2*d*m; ++i) q.matrix[i]=i+10; // q.display_matrix("q"); features_p=new CDenseFeatures<float64_t>(p); features_q=new CDenseFeatures<float64_t>(q); float64_t sigma_x=2; float64_t sigma_y=3; float64_t sq_sigma_x_twice=sigma_x*sigma_x*2; float64_t sq_sigma_y_twice=sigma_y*sigma_y*2; /* shoguns kernel width is different */ kernel_p=new CGaussianKernel(10, sq_sigma_x_twice); kernel_q=new CGaussianKernel(10, sq_sigma_y_twice); } void create_fixed_data_kernel_big(CFeatures*& features_p, CFeatures*& features_q, CKernel*& kernel_p, CKernel*& kernel_q) { index_t m=10; index_t d=7; SGMatrix<float64_t> p(d,m); for (index_t i=0; i<d*m; ++i) p.matrix[i]=(i+8)%3; // p.display_matrix("p"); SGMatrix<float64_t> q(d,m); for (index_t i=0; i<d*m; ++i) q.matrix[i]=((i+10)*(i%4+2))%4; // q.display_matrix("q"); features_p=new CDenseFeatures<float64_t>(p); features_q=new CDenseFeatures<float64_t>(q); float64_t sigma_x=2; float64_t sigma_y=3; float64_t sq_sigma_x_twice=sigma_x*sigma_x*2; float64_t sq_sigma_y_twice=sigma_y*sigma_y*2; /* shoguns kernel width is different */ kernel_p=new CGaussianKernel(10, sq_sigma_x_twice); kernel_q=new CGaussianKernel(10, sq_sigma_y_twice); } /** tests the hsic statistic for a single fixed data case and ensures * equality with sma implementation */ void test_hsic_fixed() { CFeatures* features_p=NULL; CFeatures* features_q=NULL; CKernel* kernel_p=NULL; CKernel* kernel_q=NULL; create_fixed_data_kernel_small(features_p, features_q, kernel_p, kernel_q); index_t m=features_p->get_num_vectors(); /* unref features since convienience constructor is HSIC was used */ CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q); SG_UNREF(features_p); SG_UNREF(features_q); /* assert matlab result, note that compute statistic computes m*hsic */ float64_t difference=hsic->compute_statistic(); SG_SPRINT("hsic fixed: %f\n", difference); ASSERT(CMath::abs(difference-m*0.164761446385339)<10E-16); SG_UNREF(hsic); } void test_hsic_gamma() { CFeatures* features_p=NULL; CFeatures* features_q=NULL; CKernel* kernel_p=NULL; CKernel* kernel_q=NULL; create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q); /* unref features since convienience constructor is HSIC was used */ CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q); SG_UNREF(features_p); SG_UNREF(features_q); hsic->set_null_approximation_method(HSIC_GAMMA); float64_t p=hsic->compute_p_value(0.05); SG_SPRINT("p-value: %f\n", p); ASSERT(CMath::abs(p-0.172182287884256)<10E-15); SG_UNREF(hsic); } void test_hsic_bootstrap() { CFeatures* features_p=NULL; CFeatures* features_q=NULL; CKernel* kernel_p=NULL; CKernel* kernel_q=NULL; create_fixed_data_kernel_big(features_p, features_q, kernel_p, kernel_q); /* unref features since convienience constructor is HSIC was used */ CHSIC* hsic=new CHSIC(kernel_p, kernel_q, features_p, features_q); SG_UNREF(features_p); SG_UNREF(features_q); /* do bootstrapping */ hsic->set_null_approximation_method(BOOTSTRAP); float64_t p=hsic->compute_p_value(0.05); SG_SPRINT("p-value: %f\n", p); /* ensure that bootstrapping of hsic leads to same results as using * CKernelIndependenceTestStatistic */ CMath::init_random(1); float64_t mean1=CStatistics::mean(hsic->bootstrap_null()); float64_t var1=CStatistics::variance(hsic->bootstrap_null()); SG_SPRINT("mean1=%f, var1=%f\n", mean1, var1); CMath::init_random(1); float64_t mean2=CStatistics::mean( hsic->CKernelIndependenceTestStatistic::bootstrap_null()); float64_t var2=CStatistics::variance(hsic->bootstrap_null()); SG_SPRINT("mean2=%f, var2=%f\n", mean2, var2); /* assert than results are the same from bot bootstrapping impl. */ ASSERT(CMath::abs(mean1-mean2)<10E-8); ASSERT(CMath::abs(var1-var2)<10E-8); SG_UNREF(hsic); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); test_hsic_fixed(); test_hsic_gamma(); test_hsic_bootstrap(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/LinearTimeMMD.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; /** tests the linear mmd statistic for a single data case and ensures * equality with matlab implementation. Since data from memory is used, * this is rather complicated, i.e. create dense features and then create * streaming dense features from them. Normally, just use streaming features * directly. */ void test_linear_mmd_fixed() { index_t m=2; index_t d=3; float64_t sigma=2; float64_t sq_sigma_twice=sigma*sigma*2; SGMatrix<float64_t> data(d,2*m); for (index_t i=0; i<2*d*m; ++i) data.matrix[i]=i; /* create data matrix for each features (appended is not supported) */ SGMatrix<float64_t> data_p(d, m); memcpy(&(data_p.matrix[0]), &(data.matrix[0]), sizeof(float64_t)*d*m); SGMatrix<float64_t> data_q(d, m); memcpy(&(data_q.matrix[0]), &(data.matrix[d*m]), sizeof(float64_t)*d*m); CDenseFeatures<float64_t>* features_p=new CDenseFeatures<float64_t>(data_p); CDenseFeatures<float64_t>* features_q=new CDenseFeatures<float64_t>(data_q); /* create stremaing features from dense features */ CStreamingFeatures* streaming_p= new CStreamingDenseFeatures<float64_t>(features_p); CStreamingFeatures* streaming_q= new CStreamingDenseFeatures<float64_t>(features_q); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); /* create MMD instance */ CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, streaming_p, streaming_q, m); /* start streaming features parser */ streaming_p->start_parser(); streaming_q->start_parser(); /* assert matlab result */ float64_t statistic=mmd->compute_statistic(); SG_SPRINT("statistic=%f\n", statistic); float64_t difference=statistic-0.034218118311602; ASSERT(CMath::abs(difference)<10E-16); /* start streaming features parser */ streaming_p->end_parser(); streaming_q->end_parser(); SG_UNREF(mmd); } /** tests the linear mmd statistic for a random data case (fixed distribution * and ensures equality with matlab implementation */ void test_linear_mmd_random() { index_t d=3; index_t m=10000; float64_t difference=0.5; float64_t sigma=2; index_t num_runs=100; num_runs=3; //speed up SGVector<float64_t> mmds(num_runs); /* create data generator classes that implement a meanshift in q */ CMeanShiftDataGenerator<float64_t>* gen_p= new CMeanShiftDataGenerator<float64_t>(0, d); CMeanShiftDataGenerator<float64_t>* gen_q= new CMeanShiftDataGenerator<float64_t>(difference, d); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, gen_p, gen_q, m); /* start parser of streaming features */ gen_p->start_parser(); gen_q->start_parser(); /* compute statistic streams new data all the time */ for (index_t i=0; i<num_runs; ++i) mmds[i]=mmd->compute_statistic(); /* stop parser of streaming features */ gen_p->end_parser(); gen_q->end_parser(); float64_t mean=CStatistics::mean(mmds); float64_t var=CStatistics::variance(mmds); SG_SPRINT("mean %f\n", mean); SG_SPRINT("var %f\n", var); SG_UNREF(mmd); } void test_linear_mmd_variance_estimate() { index_t d=3; index_t m=10000; float64_t difference=0.5; float64_t sigma=2; index_t num_runs=100; num_runs=10; //speed up SGVector<float64_t> vars(num_runs); /* create data generator classes that implement a meanshift in q */ CMeanShiftDataGenerator<float64_t>* gen_p= new CMeanShiftDataGenerator<float64_t>(0, d); CMeanShiftDataGenerator<float64_t>* gen_q= new CMeanShiftDataGenerator<float64_t>(difference, d); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, gen_p, gen_q, m); /* start parser of streaming features */ gen_p->start_parser(); gen_q->start_parser(); for (index_t i=0; i<num_runs; ++i) vars[i]=mmd->compute_variance_estimate(); /* stop parser of streaming features */ gen_p->end_parser(); gen_q->end_parser(); float64_t mean=CStatistics::mean(vars); float64_t var=CStatistics::variance(vars); /* MATLAB 100-run 3 sigma interval for mean is * [2.487949168976897e-05, 2.816652377191562e-05] */ SG_SPRINT("mean variance %f\n", mean); // ASSERT(mean>2.487949168976897e-05); // ASSERT(mean<2.816652377191562e-05); /* MATLAB 100-run variance is 8.321246145460274e-06 quite stable */ SG_SPRINT("var of variance %f\n", var); ASSERT(CMath::abs(var-8.321246145460274e-06)<10E-6); SG_UNREF(mmd); } void test_linear_mmd_variance_estimate_vs_bootstrap() { index_t d=3; index_t m=50000; m=1000; //speed up float64_t difference=0.5; float64_t sigma=2; /* create data generator classes that implement a meanshift in q */ CMeanShiftDataGenerator<float64_t>* gen_p= new CMeanShiftDataGenerator<float64_t>(0, d); CMeanShiftDataGenerator<float64_t>* gen_q= new CMeanShiftDataGenerator<float64_t>(difference, d); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, gen_p, gen_q, m); /* start parser of streaming features */ gen_p->start_parser(); gen_q->start_parser(); /* for checking results, set to 100 */ mmd->set_bootstrap_iterations(100); mmd->set_bootstrap_iterations(100); // speed up SGVector<float64_t> null_samples=mmd->bootstrap_null(); float64_t bootstrap_variance=CStatistics::variance(null_samples); float64_t statistic, estimated_variance; /* it is also possible to compute these separately, but this only requires * one loop and values are connected */ mmd->compute_statistic_and_variance(statistic, estimated_variance); float64_t variance_error=CMath::abs(bootstrap_variance-estimated_variance); /* start parser of streaming features */ gen_p->end_parser(); gen_q->end_parser(); /* assert that variances error is less than 10E-5 of statistic */ SG_SPRINT("null distribution variance: %f\n", bootstrap_variance); SG_SPRINT("estimated variance: %f\n", estimated_variance); SG_SPRINT("linear mmd itself: %f\n", statistic); SG_SPRINT("variance error: %f\n", variance_error); SG_SPRINT("error/statistic: %f\n", variance_error/statistic); // ASSERT(variance_error/statistic<10E-5); SG_UNREF(mmd); } void test_linear_mmd_type2_error() { index_t d=3; index_t m=10000; float64_t difference=0.4; float64_t sigma=2; index_t num_runs=500; num_runs=50; // speed up index_t num_errors=0; /* create data generator classes that implement a meanshift in q */ CMeanShiftDataGenerator<float64_t>* gen_p= new CMeanShiftDataGenerator<float64_t>(0, d); CMeanShiftDataGenerator<float64_t>* gen_q= new CMeanShiftDataGenerator<float64_t>(difference, d); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, gen_p, gen_q, m); mmd->set_null_approximation_method(MMD1_GAUSSIAN); for (index_t i=0; i<num_runs; ++i) { float64_t statistic=mmd->compute_statistic(); float64_t p_value_est=mmd->compute_p_value(statistic); /* lets allow a 5% type 1 error */ num_errors+=p_value_est<0.05 ? 0 : 1; } float64_t type_2_error=(float64_t)num_errors/(float64_t)num_runs; SG_SPRINT("type2 error est: %f\n", type_2_error); /* for 100 MATLAB runs, 3*sigma error range lies in * [0.024568646859226, 0.222231353140774] */ // ASSERT(type_2_error>0.024568646859226); // ASSERT(type_2_error<0.222231353140774); SG_UNREF(mmd); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); /* all tests have been "speed up" by reducing the number of runs/samples. * If you have any doubts in the results, set all num_runs to original * numbers and activate asserts. If they fail, something is likely wrong. */ test_linear_mmd_fixed(); test_linear_mmd_random(); test_linear_mmd_variance_estimate(); test_linear_mmd_variance_estimate_vs_bootstrap(); test_linear_mmd_type2_error(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/LinearTimeMMD.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CombinedKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/features/CombinedFeatures.h> #include <shogun/mathematics/Statistics.h> using namespace shogun; //SGMatrix<float64_t> create_fixed_data(index_t m, index_t dim) //{ // SGMatrix<float64_t> data(dim,2*m); // for (index_t i=0; i<2*dim*m; ++i) // data.matrix[i]=i*i; // // data.display_matrix("data"); // // return data; //} // //void test_linear_mmd_optimize_weights() //{ // index_t m=8; // index_t dim=2; // SGMatrix<float64_t> data=create_fixed_data(m, dim); // // /* create a number of kernels with different widths */ // SGVector<float64_t> sigmas(3); // SGVector<float64_t> shogun_sigmas(sigmas.vlen); // // CCombinedKernel* kernel=new CCombinedKernel(); // CCombinedFeatures* features=new CCombinedFeatures(); // for (index_t i=0; i<sigmas.vlen; ++i) // { // sigmas[i]=CMath::pow(2.0, i-2)*1000; // shogun_sigmas[i]=sigmas[i]*sigmas[i]*2; // kernel->append_kernel(new CGaussianKernel(10, shogun_sigmas[i])); // features->append_feature_obj(new CDenseFeatures<float64_t>(data)); // } // // sigmas.display_vector("sigmas"); // // CLinearTimeMMD* mmd=new CLinearTimeMMD(kernel, features, m); // mmd->optimize_kernel_weights(); // // SGVector<float64_t> weights=kernel->get_subkernel_weights(); // weights.display_vector("weights"); // // /* MATLAB program returns these weights: */ // ASSERT(CMath::abs(weights[0]-0.622266982205087)<10E-16); // ASSERT(weights[1]==0); // ASSERT(CMath::abs(weights[2]-0.377733017794913)<10E-16); // // SG_UNREF(mmd); //} int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); // test_linear_mmd_optimize_weights(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann */ #include <shogun/base/init.h> #include <shogun/statistics/QuadraticTimeMMD.h> #include <shogun/kernel/GaussianKernel.h> #include <shogun/kernel/CustomKernel.h> #include <shogun/features/DenseFeatures.h> #include <shogun/mathematics/Statistics.h> #include <shogun/features/DataGenerator.h> using namespace shogun; /** tests the quadratic mmd statistic for a single data case and ensures * equality with matlab implementation */ void test_quadratic_mmd_fixed() { index_t n=2; index_t d=3; float64_t sigma=2; float64_t sq_sigma_twice=sigma*sigma*2; SGMatrix<float64_t> data(d,2*n); for (index_t i=0; i<2*d*n; ++i) data.matrix[i]=i; CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data); CGaussianKernel* kernel=new CGaussianKernel(10, sq_sigma_twice); kernel->init(features, features); CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, n); /* unbiased statistic, shogun return m*MMD */ mmd->set_statistic_type(UNBIASED); float64_t difference=CMath::abs(mmd->compute_statistic()/n-0.051325806508381); ASSERT(difference<=10E-16); /* biased statistic */ mmd->set_statistic_type(BIASED); difference=CMath::abs(mmd->compute_statistic()/n-1.017107688196714); ASSERT(difference<=10E-16); SG_UNREF(mmd); } /** tests the quadratic mmd statistic bootstrapping for a random data case and * ensures equality with matlab implementation (unbiased statistic) */ void test_quadratic_mmd_bootstrap() { /* reproducable results */ CMath::init_random(1); index_t dimension=3; index_t m=100; float64_t difference=0.5; float64_t sigma=2; index_t num_iterations=1000; num_iterations=10; //speed up SGMatrix<float64_t> data=CDataGenerator::generate_mean_data(m, dimension, difference); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m); mmd->set_statistic_type(UNBIASED); mmd->set_bootstrap_iterations(num_iterations); /* use fixed seed */ CMath::init_random(1); SGVector<float64_t> null_samples=mmd->bootstrap_null(); float64_t mean=CStatistics::mean(null_samples); float64_t var=CStatistics::variance(null_samples); /* MATLAB mean 2-sigma confidence interval for 1000 repretitions is * [-3.169406734013459e-04, 3.296399498466372e-04] */ SG_SPRINT("mean %f\n", mean); // ASSERT(mean>-3.169406734013459e-04); // ASSERT(mean<3.296399498466372e-04); /* MATLAB variance 2-sigma confidence interval for 1000 repretitions is * [2.194192869469228e-05,2.936672859339959e-05] */ SG_SPRINT("var %f\n", var); // ASSERT(var>2.194192869469228e-05); // ASSERT(var<2.936672859339959e-05); /* now again but with a precomputed kernel, same features. * This avoids re-computing the kernel matrix in every bootstrapping * iteration and should be num_iterations times faster */ SG_REF(features); /* re-init kernel before kernel matrix is computed: this is due to a design * error in subsets and should be worked on! */ kernel->init(features, features); CCustomKernel* precomputed_kernel=new CCustomKernel(kernel); SG_UNREF(mmd); mmd=new CQuadraticTimeMMD(precomputed_kernel, features, m); mmd->set_statistic_type(UNBIASED); mmd->set_bootstrap_iterations(num_iterations); CMath::init_random(1); null_samples=mmd->bootstrap_null(); /* assert that results do not change */ SG_SPRINT("mean %f, var %f\n", CStatistics::mean(null_samples), CStatistics::variance(null_samples)); ASSERT(CMath::abs(mean-CStatistics::mean(null_samples))<10E-5); ASSERT(CMath::abs(var-CStatistics::variance(null_samples))<10E-5); SG_UNREF(mmd); SG_UNREF(features); } #ifdef HAVE_LAPACK /** tests the quadratic mmd statistic threshold method spectrum for radnom data * case and ensures equality with matlab implementation */ void test_quadratic_mmd_spectrum() { index_t dimension=3; index_t m=100; float64_t difference=0.5; float64_t sigma=2; SGMatrix<float64_t> data=CDataGenerator::generate_mean_data(m, dimension, difference); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m); mmd->set_num_samples_sepctrum(1000); mmd->set_num_samples_sepctrum(10); //speed up mmd->set_num_eigenvalues_spectrum(m); mmd->set_null_approximation_method(MMD2_SPECTRUM); mmd->set_statistic_type(BIASED); /* compute p-value for a fixed statistic value */ float64_t p=mmd->compute_p_value(2); /* MATLAB 1000 iterations 3 sigma confidence interval is * [0.021240218376709, 0.060875781623291] */ SG_SPRINT("p %f\n", p); // ASSERT(p>0.021240218376709); // ASSERT(p<0.060875781623291); SG_UNREF(mmd); } #endif // HAVE_LAPACK /** tests the quadratic mmd statistic threshold method gamma for fixed data * case and ensures equality with matlab implementation */ void test_quadratic_mmd_gamma() { index_t dimension=3; index_t m=100; float64_t sigma=4; /* note: fixed data this time */ SGMatrix<float64_t> data(dimension, 2*m); for (index_t i=0; i<2*dimension*m; ++i) data.matrix[i]=i; CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m); mmd->set_null_approximation_method(MMD2_GAMMA); mmd->set_statistic_type(BIASED); /* compute p-value for a fixed statistic value */ float64_t p=mmd->compute_p_value(2); SG_SPRINT("p: %f\n", p); /* MATLAB 1000 iterations mean: 0.511547577996229 with variance 10E-15, * asserting with only 10-12 to avoid problems. Shold never fail. */ ASSERT(CMath::abs(p-0.511547577996229)<10E-12); SG_UNREF(mmd); } /** tests the quadratic mmd statistic for a random data case (fixed distribution * and ensures equality with matlab implementation (unbiased case) */ void test_quadratic_mmd_random() { index_t dimension=3; index_t m=300; float64_t difference=0.5; float64_t sigma=2; index_t num_runs=100; num_runs=10; //speed up SGVector<float64_t> mmds(num_runs); /* pre-allocate data matrix and features, just change elements later */ SGMatrix<float64_t> data(dimension, 2*m); CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(data); /* shoguns kernel width is different */ CGaussianKernel* kernel=new CGaussianKernel(100, sigma*sigma*2); CQuadraticTimeMMD* mmd=new CQuadraticTimeMMD(kernel, features, m); mmd->set_statistic_type(UNBIASED); for (index_t i=0; i<num_runs; ++i) { /* use pre-allocated space for data generation */ CDataGenerator::generate_mean_data(m, dimension, difference, data); kernel->init(features, features); mmds[i]=mmd->compute_statistic(); } /* MATLAB 95% mean confidence interval 0.007495841715582 0.037960088792417 */ float64_t mean=CStatistics::mean(mmds); SG_SPRINT("mean %f\n", mean); // ASSERT((mean>0.007495841715582) && (mean<0.037960088792417)); /* MATLAB variance is 5.800439687240292e-05 quite stable */ float64_t variance=CStatistics::variance(mmds); SG_SPRINT("variance: %f\n", variance); // ASSERT(CMath::abs(variance-5.800439687240292e-05)<10E-5); SG_UNREF(mmd); } int main(int argc, char** argv) { init_shogun_with_defaults(); // sg_io->set_loglevel(MSG_DEBUG); /* all tests have been "speed up" by reducing the number of runs/samples. * If you have any doubts in the results, set all num_runs to original * numbers and activate asserts. If they fail, something is wrong. */ test_quadratic_mmd_fixed(); test_quadratic_mmd_random(); test_quadratic_mmd_bootstrap(); #ifdef HAVE_LAPACK test_quadratic_mmd_spectrum(); #endif test_quadratic_mmd_gamma(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This file demonstrates how a regular CDenseFeatures object can * be used as input for the StreamingFeatures framework, effectively * making it suitable for using online learning algorithms. */ #include <shogun/features/streaming/StreamingDenseFeatures.h> #include <shogun/io/streaming/StreamingFileFromDenseFeatures.h> #include <shogun/mathematics/Math.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> #include <shogun/base/init.h> #include <stdlib.h> #include <stdio.h> using namespace shogun; #define NUM 10 #define DIMS 2 #define DIST 0.5 void gen_rand_data(SGMatrix<float64_t> feat, SGVector<float64_t> lab) { for (int32_t i=0; i<NUM; i++) { if (i<NUM/2) { for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0, 1.0)+DIST; if (lab.vector) lab[i]=0; } else { for (int32_t j=0; j<DIMS; j++) feat[i*DIMS+j]=CMath::random(0.0, 1.0)-DIST; if (lab.vector) lab[i]=1; } } feat.display_matrix("feat"); lab.display_vector("lab"); } void test_general() { SGMatrix<float64_t> feat(DIMS, NUM); SGVector<float64_t> lab(NUM); // Generate random data, features and labels gen_rand_data(feat, lab); // Create features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(feat); // Create a StreamingDenseFeatures object which uses the above as input; // labels (float64_t*) are optional CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures< float64_t>(features, lab); SG_REF(streaming); // Start parsing of the examples; in this case, it is trivial - returns each vector from the DenseFeatures object streaming->start_parser(); int32_t counter=0; SG_SPRINT("Processing examples...\n\n"); // Run a while loop over all the examples. Note that since // features are "streaming", there is no predefined // number_of_vectors known to the StreamingFeatures object. // Thus, this loop must be used to iterate over all the // features while (streaming->get_next_example()) { counter++; // Get the current vector; no other vector is accessible SGVector<float64_t> vec=streaming->get_vector(); float64_t label=streaming->get_label(); SG_SPRINT("Vector %d: [\t", counter); for (int32_t i=0; i<vec.vlen; i++) { SG_SPRINT("%f\t", vec.vector[i]); } SG_SPRINT("Label=%f\t", label); // Calculate dot product of the current vector (from // the StreamingFeatures object) with itself (the // vector passed as argument) float64_t dot_prod=streaming->dense_dot(vec.vector, vec.vlen); SG_SPRINT("]\nDot product of the vector with itself: %f", dot_prod); SG_SPRINT("\n\n"); // Free the example, since we are done with processing it. streaming->release_example(); } // Now that all examples are used, end the parser. streaming->end_parser(); SG_UNREF(streaming); SG_UNREF(features); } void test_get_streamed_features() { /* create streaming features from dense features and then make call and * assert that data is equal */ SGMatrix<float64_t> feat(DIMS, NUM); SGVector<float64_t> lab(NUM); // Generate random data, features and labels gen_rand_data(feat, lab); // Create features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(feat); // Create a StreamingDenseFeatures object which uses the above as input; // labels (float64_t*) are optional CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures< float64_t>(features, lab); SG_REF(streaming); streaming->start_parser(); CDenseFeatures<float64_t>* dense= (CDenseFeatures<float64_t>*)streaming->get_streamed_features(NUM); streaming->end_parser(); /* assert that matrices are equal */ ASSERT(dense->get_feature_matrix().equals(feat)); SG_UNREF(dense); SG_UNREF(features); SG_UNREF(streaming); } void test_get_streamed_features_too_many() { /* create streaming features from dense features and then make call and * assert that data is equal. requests more data than available */ SGMatrix<float64_t> feat(DIMS, NUM); SGVector<float64_t> lab(NUM); // Generate random data, features and labels gen_rand_data(feat, lab); // Create features CDenseFeatures<float64_t>* features=new CDenseFeatures<float64_t>(); SG_REF(features); features->set_feature_matrix(feat); // Create a StreamingDenseFeatures object which uses the above as input; // labels (float64_t*) are optional CStreamingDenseFeatures<float64_t>* streaming=new CStreamingDenseFeatures< float64_t>(features, lab); SG_REF(streaming); streaming->start_parser(); /* request more features than available */ CDenseFeatures<float64_t>* dense= (CDenseFeatures<float64_t>*)streaming->get_streamed_features(NUM+10); streaming->end_parser(); /* assert that matrices are equal */ ASSERT(dense->get_feature_matrix().equals(feat)); SG_UNREF(dense); SG_UNREF(features); SG_UNREF(streaming); } int main() { init_shogun_with_defaults(); sg_io->set_loglevel(MSG_DEBUG); test_general(); test_get_streamed_features(); test_get_streamed_features_too_many(); // exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2012 Heiko Strathmann * * This file demonstrates how to use data generators based on the streaming * features framework */ #include <shogun/base/init.h> #include <shogun/features/streaming/generators/MeanShiftDataGenerator.h> using namespace shogun; void test_mean_shift() { index_t dimension=3; index_t mean_shift=100; index_t num_runs=1000; CMeanShiftDataGenerator<float64_t>* gen= new CMeanShiftDataGenerator<float64_t>(mean_shift, dimension); SGVector<float64_t> avg(dimension); avg.zero(); for (index_t i=0; i<num_runs; ++i) { gen->get_next_example(); avg.add(gen->get_vector()); gen->release_example(); } /* average */ avg.scale(1.0/num_runs); avg.display_vector("mean_shift"); /* roughly assert correct model parameters */ ASSERT(avg[0]-mean_shift<mean_shift/100); for (index_t i=1; i<dimension; ++i) ASSERT(avg[i]<0.5 && avg[i]>-0.5); /* draw whole matrix and test that too */ CDenseFeatures<float64_t>* features=(CDenseFeatures<float64_t>*) gen->get_streamed_features(num_runs); avg=SGVector<float64_t>(dimension); for (index_t i=0; i<dimension; ++i) { float64_t sum=0; for (index_t j=0; j<num_runs; ++j) sum+=features->get_feature_matrix()(i, j); avg[i]=sum/num_runs; } avg.display_vector("mean_shift"); ASSERT(avg[0]-mean_shift<mean_shift/100); for (index_t i=1; i<dimension; ++i) ASSERT(avg[i]<0.5 && avg[i]>-0.5); SG_UNREF(features); SG_UNREF(gen); } int main(int argc, char** argv) { init_shogun_with_defaults(); test_mean_shift(); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the online variant of SGD which * relies on the streaming features framework. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingDenseFeatures.h> #include <shogun/classifier/svm/OnlineLibLinear.h> using namespace shogun; int main() { init_shogun_with_defaults(); // Create a StreamingAsciiFile from the training data const char* train_file_name = "../data/train_densereal.light"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); // The bool value is true if examples are labelled. // 1024 is a good standard value for the number of examples for the parser to hold at a time. CStreamingDenseFeatures<float64_t>* train_features = new CStreamingDenseFeatures<float64_t>(train_file, true, 1024); SG_REF(train_features); // Create an OnlineLiblinear object from the features. The first parameter is 'C'. COnlineLibLinear* svm = new COnlineLibLinear(1, train_features); svm->set_bias_enabled(false); // Enable/disable bias svm->train(); // Train train_file->close(); // Now we want to test on other data const char* test_file_name = "../data/fm_test_densereal.dat"; CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingDenseFeatures<float64_t>* test_features = new CStreamingDenseFeatures<float64_t>(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CRegressionLabels* test_labels = svm->apply_regression(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(svm); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the online variant of SGD which * relies on the streaming features framework. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingSparseFeatures.h> #include <shogun/classifier/svm/OnlineSVMSGD.h> using namespace shogun; int main() { init_shogun_with_defaults(); // Create a StreamingAsciiFile from the training data char* train_file_name = "../data/train_sparsereal.light"; CStreamingAsciiFile* train_file = new CStreamingAsciiFile(train_file_name); SG_REF(train_file); // Create a StreamingSparseFeatures from the StreamingAsciiFile. // The bool value is true if examples are labelled. // 1024 is a good standard value for the number of examples for the parser to hold at a time. CStreamingSparseFeatures<float64_t>* train_features = new CStreamingSparseFeatures<float64_t>(train_file, true, 1024); SG_REF(train_features); // Create an OnlineSVMSGD object from the features. The first parameter is 'C'. COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features); sgd->set_bias_enabled(false); // Enable/disable bias sgd->set_lambda(0.1); // Choose lambda sgd->train(); // Train train_file->close(); // Now we want to test on other data char* test_file_name = "../data/fm_test_sparsereal.dat"; CStreamingAsciiFile* test_file = new CStreamingAsciiFile(test_file_name); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingSparseFeatures<float64_t>* test_features = new CStreamingSparseFeatures<float64_t>(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CLabels* test_labels = sgd->apply(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(sgd); exit_shogun(); return 0; }
// This example simply demonstrates the use/working of StreamingStringFeatures #include <shogun/io/StreamingAsciiFile.h> #include <shogun/features/StreamingStringFeatures.h> using namespace shogun; void display_vector(const SGString<char> &vec) { printf("\nNew Vector\n------------------\n"); printf("Length=%d.\n", vec.slen); for (int32_t i=0; i<vec.slen; i++) { printf("%c", vec.string[i]); } printf("\n"); } int main(int argc, char **argv) { init_shogun_with_defaults(); // Create a StreamingAsciiFile from our input file CStreamingAsciiFile* file = new CStreamingAsciiFile("../data/fm_train_dna.dat"); // This file contains unlabelled data, so the second arg is `false'. CStreamingStringFeatures<char>* feat = new CStreamingStringFeatures<char>(file, false, 1024); // Alphabet to use is DNA feat->use_alphabet(DNA); // Loop over all examples and simply display each example feat->start_parser(); while (feat->get_next_example()) { SGString<char> vec = feat->get_vector(); display_vector(vec); feat->release_example(); } feat->end_parser(); // Get the alphabet and display the histogram CAlphabet* alpha = feat->get_alphabet(); printf("\nThe histogram is:\n"); alpha->print_histogram(); SG_UNREF(alpha); SG_UNREF(feat); SG_UNREF(file); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of the Vowpal Wabbit learning algorithm. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingVwFile.h> #include <shogun/features/StreamingVwFeatures.h> #include <shogun/classifier/vw/VowpalWabbit.h> using namespace shogun; int main() { init_shogun_with_defaults(); char* train_file_name = "../data/train_sparsereal.light"; CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name); train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format SG_REF(train_file); CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024); SG_REF(train_features); CVowpalWabbit* vw = new CVowpalWabbit(train_features); vw->set_regressor_out("./vw_regressor_text.dat"); // Save regressor to this file vw->set_adaptive(false); // Use adaptive learning vw->train_machine(); SG_SPRINT("Weights have been output in text form to vw_regressor_text.dat.\n"); train_file->close(); CStreamingVwFile* test_file = new CStreamingVwFile(train_file_name); test_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, true, 1024); test_features->start_parser(); while (test_features->get_next_example()) { VwExample *example = test_features->get_example(); float64_t pred = vw->predict_and_finalize(example); printf("%.2lf\n", pred); test_features->release_example(); } test_features->end_parser(); test_file->close(); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(vw); SG_UNREF(test_features); SG_UNREF(test_file); exit_shogun(); return 0; }
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 3 of the License, or * (at your option) any later version. * * Written (W) 2011 Shashwat Lal Das * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society * * This example demonstrates use of online SGD with CStreamingVwFeatures * as the features object. */ #include <shogun/lib/common.h> #include <shogun/io/StreamingVwFile.h> #include <shogun/features/StreamingVwFeatures.h> #include <shogun/classifier/svm/OnlineSVMSGD.h> using namespace shogun; int main() { init_shogun_with_defaults(); const char* train_file_name = "../data/train_sparsereal.light"; CStreamingVwFile* train_file = new CStreamingVwFile(train_file_name); train_file->set_parser_type(T_SVMLIGHT); // Treat the file as SVMLight format SG_REF(train_file); CStreamingVwFeatures* train_features = new CStreamingVwFeatures(train_file, true, 1024); SG_REF(train_features); COnlineSVMSGD* sgd = new COnlineSVMSGD(1, train_features); sgd->set_bias_enabled(false); sgd->set_lambda(0.1); sgd->train(); train_file->close(); // Now we want to test on other data const char* test_file_name = "../data/fm_test_sparsereal.dat"; CStreamingVwFile* test_file = new CStreamingVwFile(test_file_name); test_file->set_parser_type(T_SVMLIGHT); SG_REF(test_file); // Similar, but 'false' since the file contains unlabelled examples CStreamingVwFeatures* test_features = new CStreamingVwFeatures(test_file, false, 1024); SG_REF(test_features); // Apply on all examples and return a CLabels* CBinaryLabels* test_labels = sgd->apply_binary(test_features); for (int32_t i=0; i<test_labels->get_num_labels(); i++) SG_SPRINT("For example %d, predicted label is %f.\n", i, test_labels->get_label(i)); SG_UNREF(test_features); SG_UNREF(test_file); SG_UNREF(train_features); SG_UNREF(train_file); SG_UNREF(sgd); exit_shogun(); return 0; }
#include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/transfer/multitask/MultitaskLeastSquaresRegression.h> #include <shogun/transfer/multitask/Task.h> #include <shogun/transfer/multitask/TaskTree.h> #include <shogun/transfer/multitask/TaskGroup.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun(&print_message); // create some data SGMatrix<float64_t> matrix(2,4); for (int32_t i=0; i<2*4; i++) matrix.matrix[i]=i; CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CRegressionLabels* labels=new CRegressionLabels(4); labels->set_label(0, -1.4); labels->set_label(1, +1.5); labels->set_label(2, -1.2); labels->set_label(3, +1.1); CTask* first_task = new CTask(0,2); CTask* second_task = new CTask(2,4); CTaskGroup* task_group = new CTaskGroup(); task_group->append_task(first_task); task_group->append_task(second_task); CMultitaskLeastSquaresRegression* regressor = new CMultitaskLeastSquaresRegression(0.5,features,labels,task_group); regressor->train(); regressor->set_current_task(0); regressor->get_w().display_vector(); SG_UNREF(regressor); exit_shogun(); return 0; }
#include <shogun/labels/RegressionLabels.h> #include <shogun/features/DenseFeatures.h> #include <shogun/transfer/multitask/MultitaskLogisticRegression.h> #include <shogun/transfer/multitask/Task.h> #include <shogun/transfer/multitask/TaskTree.h> #include <shogun/transfer/multitask/TaskGroup.h> #include <shogun/base/init.h> #include <shogun/lib/common.h> #include <shogun/io/SGIO.h> using namespace shogun; void print_message(FILE* target, const char* str) { fprintf(target, "%s", str); } int main(int argc, char** argv) { init_shogun_with_defaults(); // create some data SGMatrix<float64_t> matrix(2,4); for (int32_t i=0; i<2*4; i++) matrix.matrix[i]=i; CDenseFeatures<float64_t>* features= new CDenseFeatures<float64_t>(matrix); // create three labels CBinaryLabels* labels=new CBinaryLabels(4); labels->set_label(0, -1); labels->set_label(1, +1); labels->set_label(2, -1); labels->set_label(3, +1); CTask* first_task = new CTask(0,2); CTask* second_task = new CTask(2,4); CTaskGroup* task_group = new CTaskGroup(); task_group->append_task(first_task); task_group->append_task(second_task); CMultitaskLogisticRegression* regressor = new CMultitaskLogisticRegression(0.5,features,labels,task_group); regressor->train(); regressor->set_current_task(0); regressor->get_w().display_vector(); CTask* root_task = new CTask(0,4); root_task->add_subtask(first_task); root_task->add_subtask(second_task); CTaskTree* task_tree = new CTaskTree(root_task); regressor->set_task_relation(task_tree); regressor->train(); regressor->set_current_task(0); regressor->get_w().display_vector(); SG_UNREF(regressor); exit_shogun(); return 0; }