PluginEstimate.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Written (W) 1999-2009 Soeren Sonnenburg
00008  * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society
00009  */
00010 
00011 #include <shogun/lib/common.h>
00012 #include <shogun/io/SGIO.h>
00013 #include <shogun/features/StringFeatures.h>
00014 #include <shogun/features/Labels.h>
00015 #include <shogun/distributions/LinearHMM.h>
00016 #include <shogun/classifier/PluginEstimate.h>
00017 
00018 using namespace shogun;
00019 
00020 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo)
00021 : CMachine(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10),
00022     pos_model(NULL), neg_model(NULL), features(NULL)
00023 {
00024     m_parameters->add(&m_pos_pseudo,
00025             "pos_pseudo","pseudo count for positive class");
00026     m_parameters->add(&m_neg_pseudo,
00027             "neg_pseudo", "pseudo count for negative class");
00028 
00029     m_parameters->add((CSGObject**) &pos_model,
00030             "pos_model", "LinearHMM modelling positive class.");
00031     m_parameters->add((CSGObject**) &neg_model,
00032             "neg_model", "LinearHMM modelling negative class.");
00033 
00034     m_parameters->add((CSGObject**) &features,
00035             "features", "String Features.");
00036 }
00037 
00038 CPluginEstimate::~CPluginEstimate()
00039 {
00040     SG_UNREF(pos_model);
00041     SG_UNREF(neg_model);
00042 
00043     SG_UNREF(features);
00044 }
00045 
00046 bool CPluginEstimate::train_machine(CFeatures* data)
00047 {
00048     ASSERT(labels);
00049     if (data)
00050     {
00051         if (data->get_feature_class() != C_STRING ||
00052                 data->get_feature_type() != F_WORD)
00053         {
00054             SG_ERROR("Features not of class string type word\n");
00055         }
00056 
00057         set_features((CStringFeatures<uint16_t>*) data);
00058     }
00059     ASSERT(features);
00060 
00061     SG_UNREF(pos_model);
00062     SG_UNREF(neg_model);
00063 
00064     pos_model=new CLinearHMM(features);
00065     neg_model=new CLinearHMM(features);
00066 
00067     SG_REF(pos_model);
00068     SG_REF(neg_model);
00069 
00070     int32_t* pos_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
00071     int32_t* neg_indizes=SG_MALLOC(int32_t, ((CStringFeatures<uint16_t>*) features)->get_num_vectors());
00072 
00073     ASSERT(labels->get_num_labels()==features->get_num_vectors());
00074 
00075     int32_t pos_idx=0;
00076     int32_t neg_idx=0;
00077 
00078     for (int32_t i=0; i<labels->get_num_labels(); i++)
00079     {
00080         if (labels->get_label(i) > 0)
00081             pos_indizes[pos_idx++]=i;
00082         else
00083             neg_indizes[neg_idx++]=i;
00084     }
00085 
00086     SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo);
00087     pos_model->train(pos_indizes, pos_idx, m_pos_pseudo);
00088     neg_model->train(neg_indizes, neg_idx, m_neg_pseudo);
00089 
00090     SG_FREE(pos_indizes);
00091     SG_FREE(neg_indizes);
00092     
00093     return true;
00094 }
00095 
00096 CLabels* CPluginEstimate::apply()
00097 {
00098     ASSERT(features);
00099     CLabels* result=new CLabels(features->get_num_vectors());
00100     ASSERT(result->get_num_labels()==features->get_num_vectors());
00101 
00102     for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
00103         result->set_label(vec, apply(vec));
00104 
00105     return result;
00106 }
00107 
00108 CLabels* CPluginEstimate::apply(CFeatures* data)
00109 {
00110     if (!data)
00111         SG_ERROR("No features specified\n");
00112 
00113     if (data->get_feature_class() != C_STRING ||
00114             data->get_feature_type() != F_WORD)
00115     {
00116         SG_ERROR("Features not of class string type word\n");
00117     }
00118 
00119     set_features((CStringFeatures<uint16_t>*) data);
00120     return apply();
00121 }
00122 
00123 float64_t CPluginEstimate::apply(int32_t vec_idx)
00124 {
00125     ASSERT(features);
00126 
00127     int32_t len;
00128     bool free_vec;
00129     uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec);
00130 
00131     if ((!pos_model) || (!neg_model))
00132         SG_ERROR( "model(s) not assigned\n");
00133       
00134     float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
00135     features->free_feature_vector(vector, vec_idx, free_vec);
00136     return result;
00137 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation