00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2012 Heiko Strathmann 00008 * Copyright (C) 2012 Berlin Institute of Technology and Max-Planck-Society 00009 */ 00010 00011 #include <shogun/evaluation/CrossValidationSplitting.h> 00012 #include <shogun/labels/Labels.h> 00013 00014 using namespace shogun; 00015 00016 CCrossValidationSplitting::CCrossValidationSplitting() : 00017 CSplittingStrategy(0, 0) 00018 { 00019 } 00020 00021 CCrossValidationSplitting::CCrossValidationSplitting( 00022 CLabels* labels, index_t num_subsets) : 00023 CSplittingStrategy(labels, num_subsets) 00024 { 00025 } 00026 00027 void CCrossValidationSplitting::build_subsets() 00028 { 00029 /* ensure that subsets are empty and set flag to filled */ 00030 reset_subsets(); 00031 m_is_filled=true; 00032 00033 /* permute indices */ 00034 SGVector<index_t> indices(m_labels->get_num_labels()); 00035 indices.range_fill(); 00036 for (index_t i=0; i<indices.vlen; ++i) 00037 { 00038 CMath::swap(indices.vector[i], 00039 indices.vector[CMath::random(0, indices.vlen-1)]); 00040 } 00041 00042 index_t num_subsets=m_subset_indices->get_num_elements(); 00043 00044 /* distribute indices to subsets */ 00045 index_t current_subset=0; 00046 for (index_t i=0; i<indices.vlen; ++i) 00047 { 00048 /* fill current subset */ 00049 CDynamicArray<index_t>* current=(CDynamicArray<index_t>*) 00050 m_subset_indices->get_element(current_subset); 00051 00052 /* add element of current index */ 00053 current->append_element(indices.vector[i]); 00054 00055 /* unref */ 00056 SG_UNREF(current); 00057 00058 /* iterate over subsets */ 00059 current_subset=(current_subset+1) % num_subsets; 00060 } 00061 00062 /* finally shuffle to avoid that subsets with low indices have more 00063 * elements, which happens if the number of class labels is not equal to 00064 * the number of subsets */ 00065 m_subset_indices->shuffle(); 00066 }