FeatureBlockLogisticRegression.cpp

Go to the documentation of this file.
00001 /*
00002  * This program is free software; you can redistribute it and/or modify
00003  * it under the terms of the GNU General Public License as published by
00004  * the Free Software Foundation; either version 3 of the License, or
00005  * (at your option) any later version.
00006  *
00007  * Copyright (C) 2012 Sergey Lisitsyn
00008  */
00009 
00010 #include <shogun/classifier/FeatureBlockLogisticRegression.h>
00011 #include <shogun/lib/slep/slep_solver.h>
00012 #include <shogun/lib/slep/slep_options.h>
00013 
00014 #include <shogun/lib/IndexBlockGroup.h>
00015 #include <shogun/lib/IndexBlockTree.h>
00016 
00017 namespace shogun
00018 {
00019 
00020 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() :
00021     CLinearMachine(), 
00022     m_feature_relation(NULL), m_z(0.0)
00023 {
00024     register_parameters();
00025 }
00026 
00027 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression(
00028      float64_t z, CDotFeatures* train_features, 
00029      CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
00030     CLinearMachine(), 
00031     m_feature_relation(NULL)
00032 {
00033     set_feature_relation(feature_relation);
00034     set_z(z);
00035     set_q(2.0);
00036     set_features(train_features);
00037     set_labels(train_labels);
00038     set_termination(0);
00039     set_regularization(0);
00040     set_tolerance(1e-3);
00041     set_max_iter(1000);
00042     register_parameters();
00043 }
00044 
00045 CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression()
00046 {
00047     SG_UNREF(m_feature_relation);
00048 }
00049 
00050 void CFeatureBlockLogisticRegression::register_parameters()
00051 {
00052     SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
00053     SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
00054     SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
00055     SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
00056     SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
00057     SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
00058     SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
00059 }
00060 
00061 CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const
00062 {
00063     SG_REF(m_feature_relation);
00064     return m_feature_relation;
00065 }
00066 
00067 void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation)
00068 {
00069     SG_UNREF(m_feature_relation);
00070     SG_REF(feature_relation);
00071     m_feature_relation = feature_relation;
00072 }
00073 
00074 int32_t CFeatureBlockLogisticRegression::get_max_iter() const
00075 {
00076     return m_max_iter;
00077 }
00078 
00079 int32_t CFeatureBlockLogisticRegression::get_regularization() const
00080 {
00081     return m_regularization;
00082 }
00083 
00084 int32_t CFeatureBlockLogisticRegression::get_termination() const
00085 {
00086     return m_termination;
00087 }
00088 
00089 float64_t CFeatureBlockLogisticRegression::get_tolerance() const
00090 {
00091     return m_tolerance;
00092 }
00093 
00094 float64_t CFeatureBlockLogisticRegression::get_z() const
00095 {
00096     return m_z;
00097 }
00098 
00099 float64_t CFeatureBlockLogisticRegression::get_q() const
00100 {
00101     return m_q;
00102 }
00103 
00104 void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter)
00105 {
00106     ASSERT(max_iter>=0);
00107     m_max_iter = max_iter;
00108 }
00109 
00110 void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization)
00111 {
00112     ASSERT(regularization==0 || regularization==1);
00113     m_regularization = regularization;
00114 }
00115 
00116 void CFeatureBlockLogisticRegression::set_termination(int32_t termination)
00117 {
00118     ASSERT(termination>=0 && termination<=4);
00119     m_termination = termination;
00120 }
00121 
00122 void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance)
00123 {
00124     ASSERT(tolerance>0.0);
00125     m_tolerance = tolerance;
00126 }
00127 
00128 void CFeatureBlockLogisticRegression::set_z(float64_t z)
00129 {
00130     m_z = z;
00131 }
00132 
00133 void CFeatureBlockLogisticRegression::set_q(float64_t q)
00134 {
00135     m_q = q;
00136 }
00137 
00138 bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
00139 {
00140     if (data && (CDotFeatures*)data)
00141         set_features((CDotFeatures*)data);
00142 
00143     ASSERT(features);
00144     ASSERT(m_labels);
00145 
00146     int32_t n_vecs = m_labels->get_num_labels();
00147     SGVector<float64_t> y(n_vecs);
00148     for (int32_t i=0; i<n_vecs; i++)
00149         y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
00150     
00151     slep_options options = slep_options::default_options();
00152     options.q = m_q;
00153     options.regularization = m_regularization;
00154     options.termination = m_termination;
00155     options.tolerance = m_tolerance;
00156     options.max_iter = m_max_iter;
00157     options.loss = LOGISTIC;
00158 
00159     EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
00160     switch (relation_type)
00161     {
00162         case GROUP:
00163         {
00164             CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation;
00165             SGVector<index_t> ind = feature_group->get_SLEP_ind();
00166             options.ind = ind.vector;
00167             options.n_feature_blocks = ind.vlen-1;
00168             if (ind[ind.vlen-1] > features->get_num_vectors())
00169                 SG_ERROR("Group of features covers more vectors than available\n");
00170         
00171             options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
00172             for (int32_t i=0; i<options.n_feature_blocks; i++)
00173                 options.gWeight[i] = 1.0;
00174             options.mode = FEATURE_GROUP;
00175             options.loss = LOGISTIC;
00176             options.n_nodes = 0;
00177             slep_result_t result = slep_solver(features, y.vector, m_z, options);
00178 
00179             SG_FREE(options.gWeight);
00180             int32_t n_feats = features->get_dim_feature_space();
00181             SGVector<float64_t> new_w(n_feats);
00182             for (int i=0; i<n_feats; i++)
00183                 new_w[i] = result.w[i];
00184             set_bias(result.c[0]);
00185 
00186             w = new_w;
00187         }
00188         break;
00189         case TREE: 
00190         {
00191             CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation;
00192 
00193             SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
00194             SGVector<float64_t> G;
00195             if (feature_tree->is_general())
00196             {
00197                 G = feature_tree->get_SLEP_G();
00198                 options.general = true;
00199             }
00200             options.ind_t = ind_t.vector;
00201             options.G = G.vector;
00202             options.n_nodes = ind_t.vlen/3;
00203             options.n_feature_blocks = ind_t.vlen/3;
00204             options.mode = FEATURE_TREE;
00205             options.loss = LOGISTIC;
00206 
00207             slep_result_t result = slep_solver(features, y.vector, m_z, options);
00208             
00209             int32_t n_feats = features->get_dim_feature_space();
00210             SGVector<float64_t> new_w(n_feats);
00211             for (int i=0; i<n_feats; i++)
00212                 new_w[i] = result.w[i];
00213 
00214             set_bias(result.c[0]);
00215 
00216             w = new_w;
00217         }
00218         break;
00219         default: 
00220             SG_ERROR("Not supported feature relation type\n");
00221     }
00222 
00223     return true;
00224 }
00225 
00226 float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx)
00227 {
00228     return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
00229 }
00230 
00231 SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data)
00232 {
00233     if (data)
00234     {
00235         if (!data->has_property(FP_DOT))
00236             SG_ERROR("Specified features are not of type CDotFeatures\n");
00237 
00238         set_features((CDotFeatures*) data);
00239     }
00240 
00241     if (!features)
00242         return SGVector<float64_t>();
00243 
00244     int32_t num=features->get_num_vectors();
00245     ASSERT(num>0);
00246     ASSERT(w.vlen==features->get_dim_feature_space());
00247 
00248     float64_t* out=SG_MALLOC(float64_t, num);
00249     features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
00250     for (int32_t i=0; i<num; i++)
00251         out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
00252     return SGVector<float64_t>(out,num);
00253 }
00254 
00255 }
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines

SHOGUN Machine Learning Toolbox - Documentation