CrossValidationSplitting.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 2012 Heiko Strathmann
00008  * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/evaluation/CrossValidationSplitting.h>
00012 #include <shogun/labels/Labels.h>
00013 
00014 using namespace shogun;
00015 
00016 CCrossValidationSplitting::CCrossValidationSplitting() :
00017     CSplittingStrategy(0, 0)
00018 {
00019 }
00020 
00021 CCrossValidationSplitting::CCrossValidationSplitting(
00022         CLabels* labels, index_t num_subsets) :
00023     CSplittingStrategy(labels, num_subsets)
00024 {
00025 }
00026 
00027 void CCrossValidationSplitting::build_subsets()
00028 {
00029     /* ensure that subsets are empty and set flag to filled */
00030     reset_subsets();
00031     m_is_filled=true;
00032 
00033     /* permute indices */
00034     SGVector<index_t> indices(m_labels->get_num_labels());
00035     indices.range_fill();
00036     for (index_t i=0; i<indices.vlen; ++i)
00037     {
00038         CMath::swap(indices.vector[i],
00039                 indices.vector[CMath::random(0, indices.vlen-1)]);
00040     }
00041 
00042     index_t num_subsets=m_subset_indices->get_num_elements();
00043 
00044     /* distribute indices to subsets */
00045     index_t current_subset=0;
00046     for (index_t i=0; i<indices.vlen; ++i)
00047     {
00048         /* fill current subset */
00049         CDynamicArray<index_t>* current=(CDynamicArray<index_t>*)
00050                 m_subset_indices->get_element(current_subset);
00051 
00052         /* add element of current index */
00053         current->append_element(indices.vector[i]);
00054 
00055         /* unref */
00056         SG_UNREF(current);
00057 
00058         /* iterate over subsets */
00059         current_subset=(current_subset+1) % num_subsets;
00060     }
00061 
00062     /* finally shuffle to avoid that subsets with low indices have more
00063      * elements, which happens if the number of class labels is not equal to
00064      * the number of subsets */
00065     m_subset_indices->shuffle();
00066 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation