Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include "lib/common.h"
00012 #include "lib/io.h"
00013 #include "features/StringFeatures.h"
00014 #include "features/Labels.h"
00015 #include "distributions/LinearHMM.h"
00016 #include "classifier/PluginEstimate.h"
00017
00018 using namespace shogun;
00019
00020 CPluginEstimate::CPluginEstimate(float64_t pos_pseudo, float64_t neg_pseudo)
00021 : CClassifier(), m_pos_pseudo(1e-10), m_neg_pseudo(1e-10),
00022 pos_model(NULL), neg_model(NULL), features(NULL)
00023 {
00024 m_parameters->add(&m_pos_pseudo,
00025 "pos_pseudo","pseudo count for positive class");
00026 m_parameters->add(&m_neg_pseudo,
00027 "neg_pseudo", "pseudo count for negative class");
00028
00029 m_parameters->add((CSGObject**) &pos_model,
00030 "pos_model", "LinearHMM modelling positive class.");
00031 m_parameters->add((CSGObject**) &neg_model,
00032 "neg_model", "LinearHMM modelling negative class.");
00033
00034 m_parameters->add((CSGObject**) &features,
00035 "features", "String Features.");
00036 }
00037
00038 CPluginEstimate::~CPluginEstimate()
00039 {
00040 SG_UNREF(pos_model);
00041 SG_UNREF(neg_model);
00042
00043 SG_UNREF(features);
00044 }
00045
00046 bool CPluginEstimate::train(CFeatures* data)
00047 {
00048 ASSERT(labels);
00049 if (data)
00050 {
00051 if (data->get_feature_class() != C_STRING ||
00052 data->get_feature_type() != F_WORD)
00053 {
00054 SG_ERROR("Features not of class string type word\n");
00055 }
00056
00057 set_features((CStringFeatures<uint16_t>*) data);
00058 }
00059 ASSERT(features);
00060
00061 SG_UNREF(pos_model);
00062 SG_UNREF(neg_model);
00063
00064 pos_model=new CLinearHMM(features);
00065 neg_model=new CLinearHMM(features);
00066
00067 SG_REF(pos_model);
00068 SG_REF(neg_model);
00069
00070 int32_t* pos_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00071 int32_t* neg_indizes=new int32_t[((CStringFeatures<uint16_t>*) features)->get_num_vectors()];
00072
00073 ASSERT(labels->get_num_labels()==features->get_num_vectors());
00074
00075 int32_t pos_idx=0;
00076 int32_t neg_idx=0;
00077
00078 for (int32_t i=0; i<labels->get_num_labels(); i++)
00079 {
00080 if (labels->get_label(i) > 0)
00081 pos_indizes[pos_idx++]=i;
00082 else
00083 neg_indizes[neg_idx++]=i;
00084 }
00085
00086 SG_INFO( "training using pseudos %f and %f\n", m_pos_pseudo, m_neg_pseudo);
00087 pos_model->train(pos_indizes, pos_idx, m_pos_pseudo);
00088 neg_model->train(neg_indizes, neg_idx, m_neg_pseudo);
00089
00090 delete[] pos_indizes;
00091 delete[] neg_indizes;
00092
00093 return true;
00094 }
00095
00096 CLabels* CPluginEstimate::classify()
00097 {
00098 ASSERT(features);
00099 CLabels* result=new CLabels(features->get_num_vectors());
00100 ASSERT(result->get_num_labels()==features->get_num_vectors());
00101
00102 for (int32_t vec=0; vec<features->get_num_vectors(); vec++)
00103 result->set_label(vec, classify_example(vec));
00104
00105 return result;
00106 }
00107
00108 CLabels* CPluginEstimate::classify(CFeatures* data)
00109 {
00110 if (!data)
00111 SG_ERROR("No features specified\n");
00112
00113 if (data->get_feature_class() != C_STRING ||
00114 data->get_feature_type() != F_WORD)
00115 {
00116 SG_ERROR("Features not of class string type word\n");
00117 }
00118
00119 set_features((CStringFeatures<uint16_t>*) data);
00120 return classify();
00121 }
00122
00123 float64_t CPluginEstimate::classify_example(int32_t vec_idx)
00124 {
00125 ASSERT(features);
00126
00127 int32_t len;
00128 bool free_vec;
00129 uint16_t* vector=features->get_feature_vector(vec_idx, len, free_vec);
00130
00131 if ((!pos_model) || (!neg_model))
00132 SG_ERROR( "model(s) not assigned\n");
00133
00134 float64_t result=pos_model->get_log_likelihood_example(vector, len) - neg_model->get_log_likelihood_example(vector, len);
00135 features->free_feature_vector(vector, vec_idx, free_vec);
00136 return result;
00137 }