Labels.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Subset support written (W) 2011 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/features/Labels.h>
00014 #include <shogun/lib/common.h>
00015 #include <shogun/io/File.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/base/Parameter.h>
00019 #include <shogun/lib/Set.h>
00020 
00021 using namespace shogun;
00022 
00023 CLabels::CLabels()
00024 : CSGObject()
00025 {
00026     init();
00027 }
00028 
00029 CLabels::CLabels(int32_t num_lab)
00030 : CSGObject()
00031 {
00032     init();
00033     labels=SGVector<float64_t>(num_lab);
00034 }
00035 
00036 CLabels::CLabels(SGVector<float64_t> src)
00037 : CSGObject()
00038 {
00039     init();
00040 
00041     set_labels(src);
00042     m_num_classes=get_num_classes();
00043 }
00044 
00045 void CLabels::set_to_one()
00046 {
00047     ASSERT(labels.vector);
00048     index_t subset_size=get_num_labels();
00049     for (int32_t i=0; i<subset_size; i++)
00050         labels.vector[subset_idx_conversion(i)]=+1;
00051 }
00052 
00053 CLabels::CLabels(CFile* loader)
00054 : CSGObject()
00055 {
00056     init();
00057     load(loader);
00058 }
00059 
00060 CLabels::~CLabels()
00061 {
00062     labels.destroy_vector();
00063     delete m_subset;
00064     m_subset=NULL;
00065 
00066     m_num_classes=0;
00067 }
00068 
00069 void CLabels::init()
00070 {
00071     m_parameters->add(&labels, "labels", "The labels.");
00072     m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
00073 
00074     labels=SGVector<float64_t>();
00075     m_num_classes=0;
00076     m_subset=NULL;
00077 }
00078 
00079 void CLabels::set_labels(SGVector<float64_t> v)
00080 {
00081     if (m_subset)
00082         SG_ERROR("A subset is set, cannot set labels\n");
00083 
00084     labels.free_vector();
00085     labels=v;
00086     labels.do_free=false;
00087 }
00088 
00089 bool CLabels::is_two_class_labeling()
00090 {
00091     ASSERT(labels.vector);
00092     bool found_plus_one=false;
00093     bool found_minus_one=false;
00094 
00095     int32_t subset_size=get_num_labels();
00096     for (int32_t i=0; i<subset_size; i++)
00097     {
00098         int32_t real_i=subset_idx_conversion(i);
00099         if (labels.vector[real_i]==+1.0)
00100             found_plus_one=true;
00101         else if (labels.vector[real_i]==-1.0)
00102             found_minus_one=true;
00103         else
00104         {
00105             SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 "
00106                     "allowed)\n", i, labels.vector[real_i]);
00107         }
00108     }
00109 
00110     if (!found_plus_one)
00111         SG_ERROR("Not a two class labeling - no positively labeled examples found\n");
00112     if (!found_minus_one)
00113         SG_ERROR("Not a two class labeling - no negatively labeled examples found\n");
00114 
00115     return true;
00116 }
00117 
00118 int32_t CLabels::get_num_classes()
00119 {
00120     CSet<float64_t>* classes=new CSet<float64_t>();
00121     for (int32_t i=0; i<get_num_labels(); i++)
00122         classes->add(get_label(i));
00123 
00124     int32_t result=classes->get_num_elements();
00125     SG_UNREF(classes);
00126     return result;
00127 }
00128 
00129 SGVector<float64_t> CLabels::get_classes()
00130 {
00131     CSet<float64_t>* classes=new CSet<float64_t>();
00132 
00133     for (int32_t i=0; i<get_num_labels(); i++)
00134         classes->add(get_label(i));
00135 
00136     SGVector<float64_t> result(classes->get_num_elements());
00137     memcpy(result.vector, classes->get_array(),
00138             sizeof(float64_t)*classes->get_num_elements());
00139 
00140     SG_UNREF(classes);
00141     return result;
00142 }
00143 
00144 SGVector<float64_t> CLabels::get_labels()
00145 {
00146     if (m_subset)
00147         SG_ERROR("get_labels() is not possible on subset");
00148 
00149     return labels;
00150 }
00151 
00152 SGVector<int32_t> CLabels::get_int_labels()
00153 {
00154     SGVector<int32_t> intlab(get_num_labels(), true);
00155 
00156     for (int32_t i=0; i<get_num_labels(); i++)
00157         intlab.vector[i]= get_int_label(i);
00158 
00159     return intlab;
00160 }
00161 
00162 void CLabels::set_int_labels(SGVector<int32_t> lab)
00163 {
00164     if (m_subset)
00165         SG_ERROR("set_int_labels() is not possible on subset");
00166 
00167     labels.free_vector();
00168     labels = SGVector<float64_t>(lab.vlen);
00169 
00170     for (int32_t i=0; i<lab.vlen; i++)
00171         set_int_label(i, labels.vector[i]);
00172 }
00173 
00174 void CLabels::load(CFile* loader)
00175 {
00176     remove_subset();
00177 
00178     SG_SET_LOCALE_C;
00179     labels.free_vector();
00180 
00181     ASSERT(loader);
00182     loader->get_vector(labels.vector, labels.vlen);
00183     m_num_classes=get_num_classes();
00184     SG_RESET_LOCALE;
00185 }
00186 
00187 void CLabels::save(CFile* writer)
00188 {
00189     if (m_subset)
00190         SG_ERROR("save() is not possible on subset");
00191 
00192     SG_SET_LOCALE_C;
00193     ASSERT(writer);
00194     ASSERT(labels.vector && labels.vlen>0);
00195     writer->set_vector(labels.vector, labels.vlen);
00196     SG_RESET_LOCALE;
00197 }
00198 
00199 bool CLabels::set_label(int32_t idx, float64_t label)
00200 {
00201     int32_t real_num=subset_idx_conversion(idx);
00202     if (labels.vector && real_num<get_num_labels())
00203     {
00204         labels.vector[real_num]=label;
00205         return true;
00206     }
00207     else 
00208         return false;
00209 }
00210 
00211 bool CLabels::set_int_label(int32_t idx, int32_t label)
00212 { 
00213     int32_t real_num=subset_idx_conversion(idx);
00214     if (labels.vector && real_num<get_num_labels())
00215     {
00216         labels.vector[real_num]= (float64_t) label;
00217         return true;
00218     }
00219     else 
00220         return false;
00221 }
00222 
00223 float64_t CLabels::get_label(int32_t idx)
00224 {
00225     int32_t real_num=subset_idx_conversion(idx);
00226     ASSERT(labels.vector && idx<get_num_labels());
00227     return labels.vector[real_num];
00228 }
00229 
00230 int32_t CLabels::get_int_label(int32_t idx)
00231 {
00232     int32_t real_num=subset_idx_conversion(idx);
00233     ASSERT(labels.vector && idx<get_num_labels());
00234     if (labels.vector[real_num] != float64_t((int32_t(labels.vector[real_num]))))
00235         SG_ERROR("label[%d]=%g is not an integer\n", labels.vector[real_num]);
00236 
00237     return int32_t(labels.vector[real_num]);
00238 }
00239 
00240 int32_t CLabels::get_num_labels()
00241 {
00242     return m_subset ? m_subset->get_size() : labels.vlen;
00243 }
00244 
00245 void CLabels::set_subset(CSubset* subset)
00246 {
00247     SG_UNREF(m_subset);
00248     m_subset=subset;
00249     SG_REF(subset);
00250 }
00251 
00252 void CLabels::remove_subset()
00253 {
00254     set_subset(NULL);
00255 }
00256 
00257 index_t CLabels::subset_idx_conversion(index_t idx) const
00258 {
00259     return m_subset ? m_subset->subset_idx_conversion(idx) : idx;
00260 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation