Labels.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Subset support written (W) 2011 Heiko Strathmann
00010  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00011  */
00012 
00013 #include <shogun/features/Labels.h>
00014 #include <shogun/lib/common.h>
00015 #include <shogun/io/File.h>
00016 #include <shogun/io/SGIO.h>
00017 #include <shogun/mathematics/Math.h>
00018 #include <shogun/base/Parameter.h>
00019 #include <shogun/lib/Set.h>
00020 
00021 using namespace shogun;
00022 
00023 CLabels::CLabels()
00024 : CSGObject()
00025 {
00026     init();
00027 }
00028 
00029 CLabels::CLabels(int32_t num_lab)
00030 : CSGObject()
00031 {
00032     init();
00033     labels=SGVector<float64_t>(num_lab);
00034 }
00035 
00036 CLabels::CLabels(SGVector<float64_t> src)
00037 : CSGObject()
00038 {
00039     init();
00040 
00041     set_labels(src);
00042     m_num_classes=get_num_classes();
00043 }
00044 
00045 void CLabels::set_to_one()
00046 {
00047     ASSERT(labels.vector);
00048     index_t subset_size=get_num_labels();
00049     for (int32_t i=0; i<subset_size; i++)
00050         labels.vector[subset_idx_conversion(i)]=+1;
00051 }
00052 
00053 CLabels::CLabels(CFile* loader)
00054 : CSGObject()
00055 {
00056     init();
00057     load(loader);
00058 }
00059 
00060 CLabels::~CLabels()
00061 {
00062     labels.destroy_vector();
00063     delete m_subset;
00064     m_subset=NULL;
00065 
00066     m_num_classes=0;
00067 }
00068 
00069 void CLabels::init()
00070 {
00071     m_parameters->add(&labels, "labels", "The labels.");
00072     m_parameters->add((CSGObject**)&m_subset, "subset", "Subset object");
00073 
00074     labels=SGVector<float64_t>();
00075     m_num_classes=0;
00076     m_subset=NULL;
00077 }
00078 
00079 void CLabels::set_labels(SGVector<float64_t> v)
00080 {
00081     if (m_subset)
00082         SG_ERROR("A subset is set, cannot set labels\n");
00083 
00084     labels.free_vector();
00085     labels=v;
00086     labels.do_free=false;
00087 }
00088 
00089 bool CLabels::is_two_class_labeling()
00090 {
00091     ASSERT(labels.vector);
00092     bool found_plus_one=false;
00093     bool found_minus_one=false;
00094 
00095     int32_t subset_size=get_num_labels();
00096     for (int32_t i=0; i<subset_size; i++)
00097     {
00098         int32_t real_i=subset_idx_conversion(i);
00099         if (labels.vector[real_i]==+1.0)
00100             found_plus_one=true;
00101         else if (labels.vector[real_i]==-1.0)
00102             found_minus_one=true;
00103         else
00104         {
00105             SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 "
00106                     "allowed)\n", i, labels.vector[real_i]);
00107         }
00108     }
00109 
00110     if (!found_plus_one)
00111         SG_ERROR("Not a two class labeling - no positively labeled examples found\n");
00112     if (!found_minus_one)
00113         SG_ERROR("Not a two class labeling - no negatively labeled examples found\n");
00114 
00115     return true;
00116 }
00117 
00118 int32_t CLabels::get_num_classes()
00119 {
00120     CSet<float64_t>* classes=new CSet<float64_t>();
00121     for (int32_t i=0; i<get_num_labels(); i++)
00122         classes->add(get_label(i));
00123 
00124     int32_t result=classes->get_num_elements();
00125     SG_UNREF(classes);
00126     return result;
00127 }
00128 
00129 SGVector<float64_t> CLabels::get_classes()
00130 {
00131     CSet<float64_t>* classes=new CSet<float64_t>();
00132 
00133     for (int32_t i=0; i<get_num_labels(); i++)
00134         classes->add(get_label(i));
00135 
00136     SGVector<float64_t> result(classes->get_num_elements());
00137     memcpy(result.vector, classes->get_array(),
00138             sizeof(float64_t)*classes->get_num_elements());
00139 
00140     SG_UNREF(classes);
00141     return result;
00142 }
00143 
00144 SGVector<float64_t> CLabels::get_labels()
00145 {
00146     if (m_subset)
00147         SG_ERROR("get_labels() is not possible on subset");
00148 
00149     return labels;
00150 }
00151 
00152 SGVector<int32_t> CLabels::get_int_labels()
00153 {
00154     SGVector<int32_t> intlab(get_num_labels(), true);
00155 
00156     for (int32_t i=0; i<get_num_labels(); i++)
00157         intlab.vector[i]= get_int_label(i);
00158 
00159     return intlab;
00160 }
00161 
00162 void CLabels::set_int_labels(SGVector<int32_t> lab)
00163 {
00164     if (m_subset)
00165         SG_ERROR("set_int_labels() is not possible on subset");
00166 
00167     labels.free_vector();
00168     labels = SGVector<float64_t>(lab.vlen);
00169 
00170     for (int32_t i=0; i<lab.vlen; i++)
00171         set_int_label(i, labels.vector[i]);
00172 }
00173 
00174 void CLabels::load(CFile* loader)
00175 {
00176     remove_subset();
00177 
00178     SG_SET_LOCALE_C;
00179     labels.free_vector();
00180 
00181     ASSERT(loader);
00182     loader->get_vector(labels.vector, labels.vlen);
00183     m_num_classes=get_num_classes();
00184     SG_RESET_LOCALE;
00185 }
00186 
00187 void CLabels::save(CFile* writer)
00188 {
00189     if (m_subset)
00190         SG_ERROR("save() is not possible on subset");
00191 
00192     SG_SET_LOCALE_C;
00193     ASSERT(writer);
00194     ASSERT(labels.vector && labels.vlen>0);
00195     writer->set_vector(labels.vector, labels.vlen);
00196     SG_RESET_LOCALE;
00197 }
00198 
00199 void CLabels::set_subset(CSubset* subset)
00200 {
00201     SG_UNREF(m_subset);
00202     m_subset=subset;
00203     SG_REF(subset);
00204 }
00205 
00206 void CLabels::remove_subset()
00207 {
00208     set_subset(NULL);
00209 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation