SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
FeatureBlockLogisticRegression.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Copyright (C) 2012 Sergey Lisitsyn
8  */
9 
13 
16 
17 namespace shogun
18 {
19 
22 {
23  init();
24  register_parameters();
25 }
26 
28  float64_t z, CDotFeatures* train_features,
29  CBinaryLabels* train_labels, CIndexBlockRelation* feature_relation) :
31 {
32  init();
33  set_feature_relation(feature_relation);
34  set_z(z);
35  set_features(train_features);
36  set_labels(train_labels);
37  register_parameters();
38 }
39 
40 void CFeatureBlockLogisticRegression::init()
41 {
42  m_feature_relation=NULL;
43  m_z=0.0;
44  m_q=2.0;
45  m_termination=0;
47  m_tolerance=1e-3;
48  m_max_iter=1000;
49 }
50 
52 {
54 }
55 
56 void CFeatureBlockLogisticRegression::register_parameters()
57 {
58  SG_ADD((CSGObject**)&m_feature_relation, "feature_relation", "feature relation", MS_NOT_AVAILABLE);
59  SG_ADD(&m_z, "z", "regularization coefficient", MS_AVAILABLE);
60  SG_ADD(&m_q, "q", "q of L1/Lq", MS_AVAILABLE);
61  SG_ADD(&m_termination, "termination", "termination", MS_NOT_AVAILABLE);
62  SG_ADD(&m_regularization, "regularization", "regularization", MS_NOT_AVAILABLE);
63  SG_ADD(&m_tolerance, "tolerance", "tolerance", MS_NOT_AVAILABLE);
64  SG_ADD(&m_max_iter, "max_iter", "maximum number of iterations", MS_NOT_AVAILABLE);
65 }
66 
68 {
70  return m_feature_relation;
71 }
72 
74 {
75  SG_REF(feature_relation);
77  m_feature_relation = feature_relation;
78 }
79 
81 {
82  return m_max_iter;
83 }
84 
86 {
87  return m_regularization;
88 }
89 
91 {
92  return m_termination;
93 }
94 
96 {
97  return m_tolerance;
98 }
99 
101 {
102  return m_z;
103 }
104 
106 {
107  return m_q;
108 }
109 
111 {
112  ASSERT(max_iter>=0)
113  m_max_iter = max_iter;
114 }
115 
117 {
118  ASSERT(regularization==0 || regularization==1)
119  m_regularization = regularization;
120 }
121 
123 {
124  ASSERT(termination>=0 && termination<=4)
125  m_termination = termination;
126 }
127 
129 {
130  ASSERT(tolerance>0.0)
131  m_tolerance = tolerance;
132 }
133 
135 {
136  m_z = z;
137 }
138 
140 {
141  m_q = q;
142 }
143 
145 {
146  if (data && (CDotFeatures*)data)
147  set_features((CDotFeatures*)data);
148 
151 
152  int32_t n_vecs = m_labels->get_num_labels();
153  SGVector<float64_t> y(n_vecs);
154  for (int32_t i=0; i<n_vecs; i++)
155  y[i] = ((CBinaryLabels*)m_labels)->get_label(i);
156 
157  slep_options options = slep_options::default_options();
158  options.q = m_q;
159  options.regularization = m_regularization;
160  options.termination = m_termination;
161  options.tolerance = m_tolerance;
162  options.max_iter = m_max_iter;
163  options.loss = LOGISTIC;
164 
165  EIndexBlockRelationType relation_type = m_feature_relation->get_relation_type();
166  switch (relation_type)
167  {
168  case GROUP:
169  {
171  SGVector<index_t> ind = feature_group->get_SLEP_ind();
172  options.ind = ind.vector;
173  options.n_feature_blocks = ind.vlen-1;
174  if (ind[ind.vlen-1] > features->get_dim_feature_space())
175  SG_ERROR("Group of features covers more features than available\n")
176 
177  options.gWeight = SG_MALLOC(double, options.n_feature_blocks);
178  for (int32_t i=0; i<options.n_feature_blocks; i++)
179  options.gWeight[i] = 1.0;
180  options.mode = FEATURE_GROUP;
181  options.loss = LOGISTIC;
182  options.n_nodes = 0;
183  slep_result_t result = slep_solver(features, y.vector, m_z, options);
184 
185  SG_FREE(options.gWeight);
186  int32_t n_feats = features->get_dim_feature_space();
187  SGVector<float64_t> new_w(n_feats);
188  for (int i=0; i<n_feats; i++)
189  new_w[i] = result.w[i];
190  set_bias(result.c[0]);
191 
192  w = new_w;
193  }
194  break;
195  case TREE:
196  {
198 
199  SGVector<float64_t> ind_t = feature_tree->get_SLEP_ind_t();
201  if (feature_tree->is_general())
202  {
203  G = feature_tree->get_SLEP_G();
204  options.general = true;
205  }
206  options.ind_t = ind_t.vector;
207  options.G = G.vector;
208  options.n_nodes = ind_t.vlen/3;
209  options.n_feature_blocks = ind_t.vlen/3;
210  options.mode = FEATURE_TREE;
211  options.loss = LOGISTIC;
212 
213  slep_result_t result = slep_solver(features, y.vector, m_z, options);
214 
215  int32_t n_feats = features->get_dim_feature_space();
216  SGVector<float64_t> new_w(n_feats);
217  for (int i=0; i<n_feats; i++)
218  new_w[i] = result.w[i];
219 
220  set_bias(result.c[0]);
221 
222  w = new_w;
223  }
224  break;
225  default:
226  SG_ERROR("Not supported feature relation type\n")
227  }
228 
229  return true;
230 }
231 
233 {
234  return CMath::exp(-(features->dense_dot(vec_idx, w.vector, w.vlen) + bias));
235 }
236 
238 {
239  if (data)
240  {
241  if (!data->has_property(FP_DOT))
242  SG_ERROR("Specified features are not of type CDotFeatures\n")
243 
244  set_features((CDotFeatures*) data);
245  }
246 
247  if (!features)
248  return SGVector<float64_t>();
249 
250  int32_t num=features->get_num_vectors();
251  ASSERT(num>0)
253 
254  float64_t* out=SG_MALLOC(float64_t, num);
255  features->dense_dot_range(out, 0, num, NULL, w.vector, w.vlen, bias);
256  for (int32_t i=0; i<num; i++)
257  out[i] = 2.0/(1.0+CMath::exp(-out[i])) - 1.0;
258  return SGVector<float64_t>(out,num);
259 }
260 
261 }
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
Definition: DotFeatures.cpp:67
void set_feature_relation(CIndexBlockRelation *feature_relation)
class IndexBlockGroup used to represent group-based feature relation.
virtual SGVector< float64_t > get_SLEP_ind_t() const
class IndexBlockTree used to represent tree guided feature relation.
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)=0
virtual int32_t get_num_labels() const =0
class IndexBlockRelation
virtual int32_t get_num_vectors() const =0
CLabels * m_labels
Definition: Machine.h:361
#define SG_ERROR(...)
Definition: SGIO.h:129
SGVector< index_t > get_SLEP_ind()
virtual SGVector< float64_t > get_SLEP_G()
Features that support dot products among other operations.
Definition: DotFeatures.h:44
#define SG_REF(x)
Definition: SGObject.h:51
virtual int32_t get_dim_feature_space() const =0
index_t vlen
Definition: SGVector.h:494
#define ASSERT(x)
Definition: SGIO.h:201
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
double float64_t
Definition: common.h:50
SGVector< float64_t > w
virtual void set_features(CDotFeatures *feat)
Class LinearMachine is a generic interface for all kinds of linear machines like classifiers.
Definition: LinearMachine.h:63
slep_result_t slep_solver(CDotFeatures *features, double *y, double z, const slep_options &options)
CDotFeatures * features
#define SG_UNREF(x)
Definition: SGObject.h:52
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual EIndexBlockRelationType get_relation_type() const =0
The class Features is the base class of all feature objects.
Definition: Features.h:68
static float64_t exp(float64_t x)
Definition: Math.h:621
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
virtual SGVector< float64_t > apply_get_outputs(CFeatures *data)
virtual void set_bias(float64_t b)
#define SG_ADD(...)
Definition: SGObject.h:81
bool has_property(EFeatureProperty p) const
Definition: Features.cpp:295
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65

SHOGUN Machine Learning Toolbox - Documentation