SHOGUN  v2.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OnlineSVMSGD.cpp
Go to the documentation of this file.
1 /*
2  SVM with stochastic gradient
3  Copyright (C) 2007- Leon Bottou
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with this program; if not, write to the Free Software
17  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA
18  $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $
19 
20  Shogun adjustments (w) 2008-2009 Soeren Sonnenburg
21 */
22 
24 #include <shogun/base/Parameter.h>
25 #include <shogun/lib/Signal.h>
26 #include <shogun/loss/HingeLoss.h>
27 
28 using namespace shogun;
29 
32 {
33  init();
34 }
35 
38 {
39  init();
40 
41  C1=C;
42  C2=C;
43 }
44 
47 {
48  init();
49  C1=C;
50  C2=C;
51 
52  set_features(traindat);
53 }
54 
56 {
57  SG_UNREF(loss);
58 }
59 
61 {
62  if (loss)
63  SG_UNREF(loss);
64  loss=loss_func;
65  SG_REF(loss);
66 }
67 
69 {
70  if (data)
71  {
72  if (!data->has_property(FP_STREAMING_DOT))
73  SG_ERROR("Specified features are not of type CStreamingDotFeatures\n");
75  }
76 
78 
79  // allocate memory for w and initialize everyting w and bias with 0
82  if (w)
83  SG_FREE(w);
84  w_dim=1;
85  w=new float32_t;
86  bias=0;
87 
88  // Shift t in order to have a
89  // reasonable initial learning rate.
90  // This assumes |x| \approx 1.
91  float64_t maxw = 1.0 / sqrt(lambda);
92  float64_t typw = sqrt(maxw);
93  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
94  t = 1 / (eta0 * lambda);
95 
96  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0);
97 
98  //do the sgd
99  calibrate();
100  if (features->is_seekable())
102 
104 
105  ELossType loss_type = loss->get_loss_type();
106  bool is_log_loss = false;
107  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
108  is_log_loss = true;
109 
110  int32_t vec_count;
111  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
112  {
113  vec_count=0;
114  count = skip;
115  while (features->get_next_example())
116  {
117  vec_count++;
118  // Expand w vector if more features are seen in this example
120 
121  float64_t eta = 1.0 / (lambda * t);
123  float64_t z = y * (features->dense_dot(w, w_dim) + bias);
124 
125  if (z < 1 || is_log_loss)
126  {
127  float64_t etd = -eta * loss->first_derivative(z,1);
128  features->add_to_dense_vec(etd * y / wscale, w, w_dim);
129 
130  if (use_bias)
131  {
132  if (use_regularized_bias)
133  bias *= 1 - eta * lambda * bscale;
134  bias += etd * y * bscale;
135  }
136  }
137 
138  if (--count <= 0)
139  {
140  float32_t r = 1 - eta * lambda * skip;
141  if (r < 0.8)
142  r = pow(1 - eta * lambda, skip);
144  count = skip;
145  }
146  t++;
147 
149  }
150 
151  // If the stream is seekable, reset the stream to the first
152  // example (for epochs > 1)
153  if (features->is_seekable() && e < epochs-1)
155  else
156  break;
157 
158  }
159 
160  features->end_parser();
162  SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias);
163 
164  return true;
165 }
166 
167 void COnlineSVMSGD::calibrate(int32_t max_vec_num)
168 {
169  int32_t c_dim=1;
170  float32_t* c=new float32_t;
171 
172  // compute average gradient size
173  int32_t n = 0;
174  float64_t m = 0;
175  float64_t r = 0;
176 
177  while (features->get_next_example())
178  {
179  //Expand c if more features are seen in this example
180  features->expand_if_required(c, c_dim);
181 
183  features->add_to_dense_vec(1, c, c_dim, true);
184 
185  //waste cpu cycles for readability
186  //(only changed dims need checking)
187  m=SGVector<float32_t>::max(c, c_dim);
188  n++;
189 
191  if (n>=max_vec_num || m > 1000)
192  break;
193  }
194 
195  SG_PRINT("Online SGD calibrated using %d vectors.\n", n);
196 
197  // bias update scaling
198  bscale = 0.5*m/n;
199 
200  // compute weight decay skip
201  skip = (int32_t) ((16 * n * c_dim) / r);
202 
203  SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale);
204 
205  SG_FREE(c);
206 }
207 
208 void COnlineSVMSGD::init()
209 {
210  t=1;
211  C1=1;
212  C2=1;
213  lambda=1e-4;
214  wscale=1;
215  bscale=1;
216  epochs=1;
217  skip=1000;
218  count=1000;
219  use_bias=true;
220 
221  use_regularized_bias=false;
222 
223  loss=new CHingeLoss();
224  SG_REF(loss);
225 
226  m_parameters->add(&C1, "C1", "Cost constant 1.");
227  m_parameters->add(&C2, "C2", "Cost constant 2.");
228  m_parameters->add(&lambda, "lambda", "Regularization parameter.");
229  m_parameters->add(&wscale, "wscale", "W scale");
230  m_parameters->add(&bscale, "bscale", "b scale");
231  m_parameters->add(&epochs, "epochs", "epochs");
232  m_parameters->add(&skip, "skip", "skip");
233  m_parameters->add(&count, "count", "count");
234  m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
235  m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
236 }

SHOGUN Machine Learning Toolbox - Documentation