SHOGUN  v3.0.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
OnlineSVMSGD.cpp
Go to the documentation of this file.
1 /*
2  SVM with stochastic gradient
3  Copyright (C) 2007- Leon Bottou
4 
5  This program is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with this library; if not, write to the Free Software
17  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18  $Id: svmsgd.cpp,v 1.13 2007/10/02 20:40:06 cvs Exp $
19 
20  Shogun adjustments (w) 2008-2009 Soeren Sonnenburg
21 */
22 
24 #include <shogun/base/Parameter.h>
25 #include <shogun/lib/Signal.h>
26 #include <shogun/loss/HingeLoss.h>
27 
28 using namespace shogun;
29 
32 {
33  init();
34 }
35 
38 {
39  init();
40 
41  C1=C;
42  C2=C;
43 }
44 
47 {
48  init();
49  C1=C;
50  C2=C;
51 
52  set_features(traindat);
53 }
54 
56 {
57  SG_UNREF(loss);
58 }
59 
61 {
62  SG_REF(loss_func);
63  SG_UNREF(loss);
64  loss=loss_func;
65 }
66 
68 {
69  if (data)
70  {
71  if (!data->has_property(FP_STREAMING_DOT))
72  SG_ERROR("Specified features are not of type CStreamingDotFeatures\n")
74  }
75 
77 
78  // allocate memory for w and initialize everyting w and bias with 0
81  if (w)
82  SG_FREE(w);
83  w_dim=1;
84  w=new float32_t;
85  bias=0;
86 
87  // Shift t in order to have a
88  // reasonable initial learning rate.
89  // This assumes |x| \approx 1.
90  float64_t maxw = 1.0 / sqrt(lambda);
91  float64_t typw = sqrt(maxw);
92  float64_t eta0 = typw / CMath::max(1.0,-loss->first_derivative(-typw,1));
93  t = 1 / (eta0 * lambda);
94 
95  SG_INFO("lambda=%f, epochs=%d, eta0=%f\n", lambda, epochs, eta0)
96 
97  //do the sgd
98  calibrate();
99  if (features->is_seekable())
101 
103 
104  ELossType loss_type = loss->get_loss_type();
105  bool is_log_loss = false;
106  if ((loss_type == L_LOGLOSS) || (loss_type == L_LOGLOSSMARGIN))
107  is_log_loss = true;
108 
109  int32_t vec_count;
110  for(int32_t e=0; e<epochs && (!CSignal::cancel_computations()); e++)
111  {
112  vec_count=0;
113  count = skip;
114  while (features->get_next_example())
115  {
116  vec_count++;
117  // Expand w vector if more features are seen in this example
119 
120  float64_t eta = 1.0 / (lambda * t);
122  float64_t z = y * (features->dense_dot(w, w_dim) + bias);
123 
124  if (z < 1 || is_log_loss)
125  {
126  float64_t etd = -eta * loss->first_derivative(z,1);
127  features->add_to_dense_vec(etd * y / wscale, w, w_dim);
128 
129  if (use_bias)
130  {
131  if (use_regularized_bias)
132  bias *= 1 - eta * lambda * bscale;
133  bias += etd * y * bscale;
134  }
135  }
136 
137  if (--count <= 0)
138  {
139  float32_t r = 1 - eta * lambda * skip;
140  if (r < 0.8)
141  r = pow(1 - eta * lambda, skip);
143  count = skip;
144  }
145  t++;
146 
148  }
149 
150  // If the stream is seekable, reset the stream to the first
151  // example (for epochs > 1)
152  if (features->is_seekable() && e < epochs-1)
154  else
155  break;
156 
157  }
158 
159  features->end_parser();
161  SG_INFO("Norm: %.6f, Bias: %.6f\n", wnorm, bias)
162 
163  return true;
164 }
165 
166 void COnlineSVMSGD::calibrate(int32_t max_vec_num)
167 {
168  int32_t c_dim=1;
169  float32_t* c=new float32_t;
170 
171  // compute average gradient size
172  int32_t n = 0;
173  float64_t m = 0;
174  float64_t r = 0;
175 
176  while (features->get_next_example())
177  {
178  //Expand c if more features are seen in this example
179  features->expand_if_required(c, c_dim);
180 
182  features->add_to_dense_vec(1, c, c_dim, true);
183 
184  //waste cpu cycles for readability
185  //(only changed dims need checking)
186  m=SGVector<float32_t>::max(c, c_dim);
187  n++;
188 
190  if (n>=max_vec_num || m > 1000)
191  break;
192  }
193 
194  SG_PRINT("Online SGD calibrated using %d vectors.\n", n)
195 
196  // bias update scaling
197  bscale = 0.5*m/n;
198 
199  // compute weight decay skip
200  skip = (int32_t) ((16 * n * c_dim) / r);
201 
202  SG_INFO("using %d examples. skip=%d bscale=%.6f\n", n, skip, bscale)
203 
204  SG_FREE(c);
205 }
206 
207 void COnlineSVMSGD::init()
208 {
209  t=1;
210  C1=1;
211  C2=1;
212  lambda=1e-4;
213  wscale=1;
214  bscale=1;
215  epochs=1;
216  skip=1000;
217  count=1000;
218  use_bias=true;
219 
220  use_regularized_bias=false;
221 
222  loss=new CHingeLoss();
223  SG_REF(loss);
224 
225  m_parameters->add(&C1, "C1", "Cost constant 1.");
226  m_parameters->add(&C2, "C2", "Cost constant 2.");
227  m_parameters->add(&lambda, "lambda", "Regularization parameter.");
228  m_parameters->add(&wscale, "wscale", "W scale");
229  m_parameters->add(&bscale, "bscale", "b scale");
230  m_parameters->add(&epochs, "epochs", "epochs");
231  m_parameters->add(&skip, "skip", "skip");
232  m_parameters->add(&count, "count", "count");
233  m_parameters->add(&use_bias, "use_bias", "Indicates if bias is used.");
234  m_parameters->add(&use_regularized_bias, "use_regularized_bias", "Indicates if bias is regularized.");
235 }

SHOGUN Machine Learning Toolbox - Documentation