SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SVMSGD.cpp
Go to the documentation of this file.
1 /*
2  SVM with stochastic gradient
3  Copyright (C) 2007- Leon Bottou
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $
19 
20  Shogun adjustments (w) 2008-2009 Soeren Sonnenburg
21 */
22 
24 #include <shogun/base/Parameter.h>
25 #include <shogun/lib/Signal.h>
27 #include <shogun/loss/HingeLoss.h>
28 
29 using namespace shogun;
30 
33 {
34  init();
35 }
36 
39 {
40  init();
41 
42  C1=C;
43  C2=C;
44 }
45 
48 {
49  init();
50  C1=C;
51  C2=C;
52 
53  set_features(traindat);
54  set_labels(trainlab);
55 }
56 
58 {
59  SG_UNREF(loss);
60 }
61 
63 {
64  SG_REF(loss_func);
65  SG_UNREF(loss);
66  loss=loss_func;
67 }
68 
70 {
71  // allocate memory for w and initialize everyting w and bias with 0
74 
75  if (data)
76  {
77  if (!data->has_property(FP_DOT))
78  SG_ERROR("Specified features are not of type CDotFeatures\n")
79  set_features((CDotFeatures*) data);
80  }
81 
83 
84  int32_t num_train_labels=m_labels->get_num_labels();
85  int32_t num_vec=features->get_num_vectors();
86 
87  ASSERT(num_vec==num_train_labels)
88  ASSERT(num_vec>0)
89 
91  w.zero();
92  bias=0;
93 
94  float64_t lambda= 1.0/(C1*num_vec);
95 
96  // Shift t in order to have a
97  // reasonable initial learning rate.
98  // This assumes |x| \approx 1.
99  float64_t maxw = 1.0 / sqrt(lambda);
100  float64_t typw = sqrt(maxw);
101  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
102  t = 1 / (eta0 * lambda);
103 
104  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)
105 
106 
107  //do the sgd
108  calibrate();
109 
110  SG_INFO("Training on %d vectors\n", num_vec)
112 
113  ELossType loss_type = loss->get_loss_type();
114  bool is_log_loss = false;
115  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
116  is_log_loss = true;
117 
118  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
119  {
120  count = skip;
121  for (int32_t i=0; i<num_vec; i++)
122  {
123  float64_t eta = 1.0 / (lambda * t);
124  float64_t y = ((CBinaryLabels*) m_labels)->get_label(i);
125  float64_t z = y * (features->dense_dot(i, w.vector, w.vlen) + bias);
126 
127  if (z < 1 || is_log_loss)
128  {
129  float64_t etd = -eta * loss->first_derivative(z,1);
130  features->add_to_dense_vec(etd * y / wscale, i, w.vector, w.vlen);
131 
132  if (use_bias)
133  {
134  if (use_regularized_bias)
135  bias *= 1 - eta * lambda * bscale;
136  bias += etd * y * bscale;
137  }
138  }
139 
140  if (--count <= 0)
141  {
142  float64_t r = 1 - eta * lambda * skip;
143  if (r < 0.8)
144  r = pow(1 - eta * lambda, skip);
146  count = skip;
147  }
148  t++;
149  }
150  }
151 
153  SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)
154 
155  return true;
156 }
157 
159 {
161  int32_t num_vec=features->get_num_vectors();
162  int32_t c_dim=features->get_dim_feature_space();
163 
164  ASSERT(num_vec>0)
165  ASSERT(c_dim>0)
166 
167  float64_t* c=SG_MALLOC(float64_t, c_dim);
168  memset(c, 0, c_dim*sizeof(float64_t));
169 
170  SG_INFO("Estimating sparsity and bscale num_vec=%d num_feat=%d.\n", num_vec, c_dim)
171 
172  // compute average gradient size
173  int32_t n = 0;
174  float64_t m = 0;
175  float64_t r = 0;
176 
177  for (int32_t j=0; j<num_vec && m<=1000; j++, n++)
178  {
180  features->add_to_dense_vec(1, j, c, c_dim, true);
181 
182  //waste cpu cycles for readability
183  //(only changed dims need checking)
184  m=SGVector<float64_t>::max(c, c_dim);
185  }
186 
187  // bias update scaling
188  bscale = 0.5*m/n;
189 
190  // compute weight decay skip
191  skip = (int32_t) ((16 * n * c_dim) / r);
192  SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale)
193 
194  SG_FREE(c);
195 }
196 
197 void CSVMSGD::init()
198 {
199  t=1;
200  C1=1;
201  C2=1;
202  wscale=1;
203  bscale=1;
204  epochs=5;
205  skip=1000;
206  count=1000;
207  use_bias=true;
208 
209  use_regularized_bias=false;
210 
211  loss=new CHingeLoss();
212  SG_REF(loss);
213 
214  m_parameters->add(&C1, "C1", "Cost constant 1.");
215  m_parameters->add(&C2, "C2", "Cost constant 2.");
216  m_parameters->add(&wscale, "wscale", "W scale");
217  m_parameters->add(&bscale, "bscale", "b scale");
218  m_parameters->add(&epochs, "epochs", "epochs");
219  m_parameters->add(&skip, "skip", "skip");
220  m_parameters->add(&count, "count", "count");
221  m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
222  m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
223 }

SHOGUN Machine Learning Toolbox - Documentation