Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include <shogun/classifier/FeatureBlockLogisticRegression.h>
00011 #include <shogun/lib/slep/slep_solver.h>
00012 #include <shogun/lib/slep/slep_options.h>
00013
00014 #include <shogun/lib/IndexBlockGroup.h>
00015 #include <shogun/lib/IndexBlockTree.h>
00016
00017 namespace shogun
00018 {
00019
00020 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression() :
00021 CLinearMachine(),
00022 m_feature_relation(NULL), m_z(0.0)
00023 {
00024 register_parameters();
00025 }
00026
00027 CFeatureBlockLogisticRegression::CFeatureBlockLogisticRegression(
00028 float64_t z, CDotFeatures* train_features,
00029 CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
00030 CLinearMachine(),
00031 m_feature_relation(NULL)
00032 {
00033 set_feature_relation(feature_relation);
00034 set_z(z);
00035 set_q(2.0);
00036 set_features(train_features);
00037 set_labels(train_labels);
00038 set_termination(0);
00039 set_regularization(0);
00040 set_tolerance(1e-3);
00041 set_max_iter(1000);
00042 register_parameters();
00043 }
00044
00045 CFeatureBlockLogisticRegression::~CFeatureBlockLogisticRegression()
00046 {
00047 SG_UNREF(m_feature_relation);
00048 }
00049
00050 void CFeatureBlockLogisticRegression::register_parameters()
00051 {
00052 SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
00053 SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
00054 SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
00055 SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
00056 SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
00057 SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
00058 SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
00059 }
00060
00061 CIndexBlockRelation* CFeatureBlockLogisticRegression::get_feature_relation() const
00062 {
00063 SG_REF(m_feature_relation);
00064 return m_feature_relation;
00065 }
00066
00067 void CFeatureBlockLogisticRegression::set_feature_relation(CIndexBlockRelation* feature_relation)
00068 {
00069 SG_UNREF(m_feature_relation);
00070 SG_REF(feature_relation);
00071 m_feature_relation = feature_relation;
00072 }
00073
00074 int32_t CFeatureBlockLogisticRegression::get_max_iter() const
00075 {
00076 return m_max_iter;
00077 }
00078
00079 int32_t CFeatureBlockLogisticRegression::get_regularization() const
00080 {
00081 return m_regularization;
00082 }
00083
00084 int32_t CFeatureBlockLogisticRegression::get_termination() const
00085 {
00086 return m_termination;
00087 }
00088
00089 float64_t CFeatureBlockLogisticRegression::get_tolerance() const
00090 {
00091 return m_tolerance;
00092 }
00093
00094 float64_t CFeatureBlockLogisticRegression::get_z() const
00095 {
00096 return m_z;
00097 }
00098
00099 float64_t CFeatureBlockLogisticRegression::get_q() const
00100 {
00101 return m_q;
00102 }
00103
00104 void CFeatureBlockLogisticRegression::set_max_iter(int32_t max_iter)
00105 {
00106 ASSERT(max_iter>=0);
00107 m_max_iter = max_iter;
00108 }
00109
00110 void CFeatureBlockLogisticRegression::set_regularization(int32_t regularization)
00111 {
00112 ASSERT(regularization==0 || regularization==1);
00113 m_regularization = regularization;
00114 }
00115
00116 void CFeatureBlockLogisticRegression::set_termination(int32_t termination)
00117 {
00118 ASSERT(termination>=0 && termination<=4);
00119 m_termination = termination;
00120 }
00121
00122 void CFeatureBlockLogisticRegression::set_tolerance(float64_t tolerance)
00123 {
00124 ASSERT(tolerance>0.0);
00125 m_tolerance = tolerance;
00126 }
00127
00128 void CFeatureBlockLogisticRegression::set_z(float64_t z)
00129 {
00130 m_z = z;
00131 }
00132
00133 void CFeatureBlockLogisticRegression::set_q(float64_t q)
00134 {
00135 m_q = q;
00136 }
00137
00138 bool CFeatureBlockLogisticRegression::train_machine(CFeatures* data)
00139 {
00140 if (data && (CDotFeatures*)data)
00141 set_features((CDotFeatures*)data);
00142
00143 ASSERT(features);
00144 ASSERT(m_labels);
00145
00146 int32_t n_vecs = m_labels->get_num_labels();
00147 SGVector<float64_t> y(n_vecs);
00148 for (int32_t i=0; i<n_vecs; i++)
00149 y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
00150
00151 slep_options options = slep_options::default_options();
00152 options.q = m_q;
00153 options.regularization = m_regularization;
00154 options.termination = m_termination;
00155 options.tolerance = m_tolerance;
00156 options.max_iter = m_max_iter;
00157 options.loss = LOGISTIC;
00158
00159 EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
00160 switch (relation_type)
00161 {
00162 case GROUP:
00163 {
00164 CIndexBlockGroup* feature_group = (CIndexBlockGroup*)m_feature_relation;
00165 SGVector<index_t> ind = feature_group->get_SLEP_ind();
00166 options.ind = ind.vector;
00167 options.n_feature_blocks = ind.vlen-1;
00168 if (ind[ind.vlen-1] > features->get_num_vectors())
00169 SG_ERROR("Group of features covers more vectors than available\n");
00170
00171 options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
00172 for (int32_t i=0; i<options.n_feature_blocks; i++)
00173 options.gWeight[i] = 1.0;
00174 options.mode = FEATURE_GROUP;
00175 options.loss = LOGISTIC;
00176 options.n_nodes = 0;
00177 slep_result_t result = slep_solver(features, y.vector, m_z, options);
00178
00179 SG_FREE(options.gWeight);
00180 int32_t n_feats = features->get_dim_feature_space();
00181 SGVector<float64_t> new_w(n_feats);
00182 for (int i=0; i<n_feats; i++)
00183 new_w[i] = result.w[i];
00184 set_bias(result.c[0]);
00185
00186 w = new_w;
00187 }
00188 break;
00189 case TREE:
00190 {
00191 CIndexBlockTree* feature_tree = (CIndexBlockTree*)m_feature_relation;
00192
00193 SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
00194 SGVector<float64_t> G;
00195 if (feature_tree->is_general())
00196 {
00197 G = feature_tree->get_SLEP_G();
00198 options.general = true;
00199 }
00200 options.ind_t = ind_t.vector;
00201 options.G = G.vector;
00202 options.n_nodes = ind_t.vlen/3;
00203 options.n_feature_blocks = ind_t.vlen/3;
00204 options.mode = FEATURE_TREE;
00205 options.loss = LOGISTIC;
00206
00207 slep_result_t result = slep_solver(features, y.vector, m_z, options);
00208
00209 int32_t n_feats = features->get_dim_feature_space();
00210 SGVector<float64_t> new_w(n_feats);
00211 for (int i=0; i<n_feats; i++)
00212 new_w[i] = result.w[i];
00213
00214 set_bias(result.c[0]);
00215
00216 w = new_w;
00217 }
00218 break;
00219 default:
00220 SG_ERROR("Not supported feature relation type\n");
00221 }
00222
00223 return true;
00224 }
00225
00226 float64_t CFeatureBlockLogisticRegression::apply_one(int32_t vec_idx)
00227 {
00228 return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
00229 }
00230
00231 SGVector<float64_t> CFeatureBlockLogisticRegression::apply_get_outputs(CFeatures* data)
00232 {
00233 if (data)
00234 {
00235 if (!data->has_property(FP_DOT))
00236 SG_ERROR("Specified features are not of type CDotFeatures\n");
00237
00238 set_features((CDotFeatures*) data);
00239 }
00240
00241 if (!features)
00242 return SGVector<float64_t>();
00243
00244 int32_t num=features->get_num_vectors();
00245 ASSERT(num>0);
00246 ASSERT(w.vlen==features->get_dim_feature_space());
00247
00248 float64_t* out=SG_MALLOC(float64_t, num);
00249 features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
00250 for (int32_t i=0; i<num; i++)
00251 out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
00252 return SGVector<float64_t>(out,num);
00253 }
00254
00255 }