Labels.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Written (W) 1999-2008 Gunnar Raetsch
00009  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00010  */
00011 
00012 #include "features/Labels.h"
00013 #include "lib/common.h"
00014 #include "lib/File.h"
00015 #include "lib/io.h"
00016 #include "lib/Mathematics.h"
00017 #include "base/Parameter.h"
00018 
00019 using namespace shogun;
00020 
00021 CLabels::CLabels()
00022 : CSGObject()
00023 {
00024     init(0, 0);
00025 }
00026 
00027 CLabels::CLabels(int32_t num_lab)
00028 : CSGObject()
00029 {
00030     init(num_lab, 0);
00031 
00032     labels=new float64_t[num_lab];
00033     for (int32_t i=0; i<num_lab; i++)
00034         labels[i]=0;
00035 }
00036 
00037 CLabels::CLabels(float64_t* p_labels, int32_t len)
00038 : CSGObject()
00039 {
00040     init(0, 0);
00041 
00042     set_labels(p_labels, len);
00043 
00044     // We don't allocate the confidences matrix, unless it is
00045     // necessary.  For problems with many classes and samples it might
00046     // get really big.
00047     m_num_classes=get_num_classes();
00048     m_confidences=NULL;
00049     m_confidence_classes = 0;
00050     m_confidence_labels = 0;
00051 }
00052 
00053 void CLabels::set_to_one()
00054 {
00055     ASSERT(labels);
00056     for (int32_t i=0; i<num_labels; i++)
00057         labels[i]=+1;
00058 }
00059 
00060 CLabels::CLabels(float64_t* in_confidences, int32_t in_num_labels,
00061                  int32_t in_num_classes)
00062 : CSGObject()
00063 {
00064     init(0, 0);
00065 
00066     labels=new float64_t[in_num_labels];
00067     for (int32_t i=0; i<in_num_labels; i++)
00068         labels[i]=0;
00069 
00070     m_num_classes=in_num_classes;
00071     m_confidences=in_confidences;
00072     m_confidence_classes = in_num_classes;
00073     m_confidence_labels = in_num_labels;
00074     find_labels();
00075 }
00076 
00077 CLabels::CLabels(CFile* loader)
00078 : CSGObject()
00079 {
00080     init(0, 0);
00081 
00082     load(loader);
00083 }
00084 
00085 CLabels::~CLabels()
00086 {
00087     delete[] labels;
00088     delete[] m_confidences;
00089 
00090     num_labels=0;
00091     m_num_classes=0;
00092     labels=NULL;
00093     m_confidences=NULL;
00094     m_confidence_classes = 0;
00095     m_confidence_labels = 0;
00096 }
00097 
00098 void
00099 CLabels::init(int32_t num_labels_, int32_t num_classes)
00100 {
00101     m_parameters->add_vector(&labels, &num_labels, "labels",
00102                              "The labels.");
00103     m_parameters->add_matrix(&m_confidences, &m_confidence_classes,
00104                              &m_confidence_labels, "m_confidences",
00105                              "Confidence matrix.");
00106 
00107     labels = NULL;
00108     num_labels = num_labels_;
00109     m_confidences=NULL;
00110     m_confidence_classes = 0;
00111     m_confidence_labels = 0;
00112     m_num_classes=num_classes;
00113 }
00114 
00115 void CLabels::set_labels(float64_t* p_labels, int32_t len)
00116 {
00117     ASSERT(len>0);
00118     num_labels=len;
00119 
00120     delete[] labels;
00121     labels=CMath::clone_vector(p_labels, len);
00122 }
00123 
00124 void CLabels::set_confidences(float64_t* in_confidences, int32_t in_num_labels, 
00125                               int32_t in_num_classes)
00126 {
00127     if (num_labels && (num_labels != in_num_labels))
00128     {
00129         SG_ERROR("Shape of confidence matrix mismatch (number of "
00130                 "labels = %d does not match %d\n", num_labels, in_num_labels);
00131     }
00132 
00133     if (m_num_classes && (m_num_classes != in_num_classes))
00134     {
00135         SG_ERROR("Shape of confidence matrix mismatch (number of "
00136                 "num_classes = %d does not match %d\n", m_num_classes, in_num_classes);
00137     }
00138 
00139     delete[] m_confidences;
00140 
00141     num_labels=in_num_labels;
00142     m_num_classes=in_num_classes;
00143     m_confidences=in_confidences;
00144     m_confidence_classes = in_num_classes;
00145     m_confidence_labels = in_num_labels;
00146     find_labels();
00147 }
00148 
00149 float64_t* CLabels::get_confidences(int32_t& out_num_labels, int32_t& out_num_classes)
00150 {
00151     out_num_labels=num_labels;
00152     out_num_classes=m_num_classes;
00153     
00154     if (!num_labels || !m_num_classes || !m_confidences)
00155         SG_ERROR("No labels / confidences set\n");
00156 
00157     float64_t* out_conf=new float64_t[num_labels*m_num_classes];
00158     memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t));
00159     return out_conf;
00160 }
00161 
00162 void CLabels::get_confidences(float64_t** dst, int32_t* out_num_labels, int32_t* out_num_classes)
00163 {
00164     ASSERT(dst && out_num_labels && out_num_classes);
00165 
00166     if (num_labels<=0 || m_num_classes<=0 || !m_confidences)
00167         SG_ERROR("No labels / confidences set\n");
00168 
00169     *dst=NULL;
00170     *out_num_labels=num_labels;
00171     *out_num_classes=m_num_classes;
00172 
00173     float64_t* out_conf= (float64_t*) malloc((size_t) sizeof(float64_t)*num_labels*m_num_classes);
00174     memcpy(out_conf, m_confidences, num_labels*m_num_classes*sizeof(float64_t));
00175     *dst=out_conf;
00176 }
00177 
00178 float64_t* CLabels::get_sample_confidences(const int32_t& in_sample_index, 
00179                                            int32_t& out_num_classes)
00180 {
00181     out_num_classes=m_num_classes;
00182 
00183     if (!(in_sample_index>=0 && in_sample_index<num_labels &&
00184                 m_num_classes && m_confidences))
00185     {
00186         SG_ERROR("No labels / confidences set\n");
00187     }
00188 
00189     float64_t* out_conf=new float64_t[m_num_classes];
00190     for (int32_t n_class=0; n_class<m_num_classes; n_class++)
00191     {
00192         out_conf[n_class]=m_confidences[n_class+in_sample_index*m_num_classes];
00193     }
00194     return out_conf;
00195 }
00196 
00197 void CLabels::find_labels()
00198 {
00199     ASSERT(m_confidences);
00200     ASSERT(labels);
00201     
00202     float64_t max_conf;
00203     int32_t index;
00204     for (int32_t n_samp=0; n_samp<num_labels; n_samp++)
00205     {
00206         max_conf=m_confidences[n_samp];
00207         labels[n_samp]=0;
00208         for (int32_t n_class=1; n_class<m_num_classes; n_class++)
00209         {
00210             index=n_samp+n_class*m_num_classes;
00211             if (m_confidences[index]>max_conf)
00212             {
00213                 max_conf=m_confidences[index];
00214                 labels[n_samp]=n_class;             
00215             }
00216         }
00217     }
00218 }
00219 
00220 bool CLabels::is_two_class_labeling()
00221 {
00222     ASSERT(labels);
00223     bool found_plus_one=false;
00224     bool found_minus_one=false;
00225 
00226     for (int32_t i=0; i<num_labels; i++)
00227     {
00228         if (labels[i]==+1.0)
00229             found_plus_one=true;
00230         else if (labels[i]==-1.0)
00231             found_minus_one=true;
00232         else
00233             SG_ERROR("Not a two class labeling label[%d]=%f (only +1/-1 allowed)\n", i, labels[i]);
00234     }
00235 
00236     if (!found_plus_one)
00237         SG_ERROR("Not a two class labeling - no positively labeled examples found\n");
00238     if (!found_minus_one)
00239         SG_ERROR("Not a two class labeling - no negatively labeled examples found\n");
00240 
00241     return true;
00242 }
00243 
00244 int32_t CLabels::get_num_classes()
00245 {
00246     int32_t n=-1;
00247     int32_t* lab=get_int_labels(n);
00248 
00249     int32_t num_classes=0;
00250     for (int32_t i=0; i<n; i++)
00251         num_classes=CMath::max(num_classes,lab[i]);
00252 
00253     delete[] lab;
00254 
00255     return num_classes+1;
00256 }
00257 
00258 float64_t* CLabels::get_labels(int32_t &len)
00259 {
00260     len=num_labels;
00261 
00262     if (num_labels>0)
00263     {
00264         float64_t* _labels=new float64_t[num_labels] ;
00265         for (int32_t i=0; i<len; i++)
00266             _labels[i]=get_label(i) ;
00267         return _labels ;
00268     }
00269     else 
00270         return NULL;
00271 }
00272 
00273 void CLabels::get_labels(float64_t** p_labels, int32_t* len)
00274 {
00275     ASSERT(p_labels && len);
00276     *p_labels=NULL;
00277     *len=num_labels;
00278 
00279     if (num_labels>0)
00280     {
00281         *p_labels=(float64_t*) malloc(sizeof(float64_t)*num_labels);
00282 
00283         for (int32_t i=0; i<num_labels; i++)
00284             (*p_labels)[i]=get_label(i);
00285     }
00286 }
00287 
00288 int32_t* CLabels::get_int_labels(int32_t &len)
00289 {
00290     len=num_labels;
00291 
00292     if (num_labels>0)
00293     {
00294         int32_t* _labels=new int32_t[num_labels] ;
00295         for (int32_t i=0; i<len; i++)
00296             _labels[i]= (int32_t) get_label(i) ;
00297         return _labels ;
00298     }
00299     else 
00300         return NULL;
00301 }
00302 
00303 void CLabels::set_int_labels(int32_t * mylabels, int32_t len)
00304 {
00305     num_labels = len ;
00306     delete[] labels ;
00307     
00308     labels = new float64_t[num_labels] ;
00309     for (int32_t i=0; i<num_labels; i++)
00310         set_int_label(i, mylabels[i]) ;
00311 }
00312 
00313 void CLabels::load(CFile* loader)
00314 {
00315     SG_SET_LOCALE_C;
00316     delete[] labels;
00317     delete[] m_confidences;
00318     m_confidences = NULL;
00319     m_confidence_classes = 0;
00320     m_confidence_labels = 0;
00321     num_labels=0;
00322     ASSERT(loader);
00323     loader->get_real_vector(labels, num_labels);
00324     m_num_classes=get_num_classes();
00325     SG_RESET_LOCALE;
00326 }
00327 
00328 void CLabels::save(CFile* writer)
00329 {
00330     SG_SET_LOCALE_C;
00331     ASSERT(writer);
00332     ASSERT(labels && labels>0);
00333     writer->set_real_vector(labels, num_labels);
00334     SG_RESET_LOCALE;
00335 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation