SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
NeuralLinearLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
36 #include <shogun/lib/SGVector.h>
37 
38 #ifdef HAVE_EIGEN3
40 #endif
41 
42 using namespace shogun;
43 
45 {
46 }
47 
49 CNeuralLayer(num_neurons)
50 {
51 }
52 
54  SGVector< int32_t > input_indices)
55 {
56  CNeuralLayer::initialize_neural_layer(layers, input_indices);
57 
59  for (int32_t i=0; i<input_indices.vlen; i++)
61 }
62 
64  SGVector<bool> parameter_regularizable,
65  float64_t sigma)
66 {
67  for (int32_t i=0; i<m_num_parameters; i++)
68  {
69  // random the parameters
70  parameters[i] = CMath::normal_random(0.0, sigma);
71 
72  // turn regularization off for the biases, on for the weights
73  parameter_regularizable[i] = (i>=m_num_neurons);
74  }
75 }
76 
78  CDynamicObjectArray* layers)
79 {
80  float64_t* biases = parameters.vector;
81 
82 #ifdef HAVE_EIGEN3
83  typedef Eigen::Map<Eigen::MatrixXd> EMappedMatrix;
84  typedef Eigen::Map<Eigen::VectorXd> EMappedVector;
85 
87  EMappedVector B(biases, m_num_neurons);
88 
89  A.colwise() = B;
90 #else
91  for (int32_t i=0; i<m_num_neurons; i++)
92  {
93  for (int32_t j=0; j<m_batch_size; j++)
94  {
95  m_activations[i+j*m_num_neurons] = biases[i];
96  }
97  }
98 #endif
99 
100  int32_t weights_index_offset = m_num_neurons;
101  for (int32_t l=0; l<m_input_indices.vlen; l++)
102  {
103  CNeuralLayer* layer =
104  (CNeuralLayer*)layers->element(m_input_indices[l]);
105 
106  float64_t* weights = parameters.vector + weights_index_offset;
107  weights_index_offset += m_num_neurons*layer->get_num_neurons();
108 
109 #ifdef HAVE_EIGEN3
110  EMappedMatrix W(weights, m_num_neurons, layer->get_num_neurons());
111  EMappedMatrix X(layer->get_activations().matrix,
112  layer->get_num_neurons(), m_batch_size);
113 
114  A += W*X;
115 #else
116  // activations = weights*previous_layer_activations
117  for (int32_t i=0; i<m_num_neurons; i++)
118  {
119  for (int32_t j=0; j<m_batch_size; j++)
120  {
121  float64_t sum = 0;
122  for (int32_t k=0; k<layer->get_num_neurons(); k++)
123  {
124  sum += weights[i+k*m_num_neurons]*
125  layer->get_activations()(k,j);
126  }
127  m_activations[i+j*m_num_neurons] += sum;
128  }
129  }
130 #endif
131  SG_UNREF(layer);
132  }
133 }
134 
136  SGVector<float64_t> parameters,
137  SGMatrix<float64_t> targets,
138  CDynamicObjectArray* layers,
139  SGVector<float64_t> parameter_gradients)
140 {
141  compute_local_gradients(targets);
142 
143  // compute bias gradients
144  float64_t* bias_gradients = parameter_gradients.vector;
145 #ifdef HAVE_EIGEN3
146  typedef Eigen::Map<Eigen::MatrixXd> EMappedMatrix;
147  typedef Eigen::Map<Eigen::VectorXd> EMappedVector;
148 
149  EMappedVector BG(bias_gradients, m_num_neurons);
151 
152  BG = LG.rowwise().sum();
153 #else
154  for (int32_t i=0; i<m_num_neurons; i++)
155  {
156  float64_t sum = 0;
157  for (int32_t j=0; j<m_batch_size; j++)
158  {
159  sum += m_local_gradients[i+j*m_num_neurons];
160  }
161  bias_gradients[i] = sum;
162  }
163 #endif
164 
165  // apply dropout to the local gradients
166  if (dropout_prop>0.0)
167  {
168  int32_t len = m_num_neurons*m_batch_size;
169  for (int32_t i=0; i<len; i++)
171  }
172 
173  int32_t weights_index_offset = m_num_neurons;
174  for (int32_t l=0; l<m_input_indices.vlen; l++)
175  {
176  CNeuralLayer* layer =
177  (CNeuralLayer*)layers->element(m_input_indices[l]);
178 
179  float64_t* weights = parameters.vector + weights_index_offset;
180  float64_t* weight_gradients = parameter_gradients.vector +
181  weights_index_offset;
182 
183  weights_index_offset += m_num_neurons*layer->get_num_neurons();
184 
185 #ifdef HAVE_EIGEN3
186  EMappedMatrix X(layer->get_activations().matrix,
187  layer->get_num_neurons(), m_batch_size);
188  EMappedMatrix W(weights, m_num_neurons, layer->get_num_neurons());
189  EMappedMatrix WG(weight_gradients,
190  m_num_neurons, layer->get_num_neurons());
191  EMappedMatrix IG(layer->get_activation_gradients().matrix,
192  layer->get_num_neurons(), m_batch_size);
193 
194  // compute weight gradients
195  WG = LG*X.transpose();
196 
197  // compute input gradients
198  if (!layer->is_input())
199  IG += W.transpose()*LG;
200 #else
201  // weight_gradients=local_gradients*previous_layer_activations.T
202  for (int32_t i=0; i<m_num_neurons; i++)
203  {
204  for (int32_t j=0; j<layer->get_num_neurons(); j++)
205  {
206  float64_t sum = 0;
207  for (int32_t k=0; k<m_batch_size; k++)
208  {
209  sum += m_local_gradients(i,k)*layer->get_activations()(j,k);
210  }
211  weight_gradients[i+j*m_num_neurons] = sum;
212  }
213  }
214 
215  if (!layer->is_input())
216  {
217  // input_gradients = weights.T*local_gradients
218  for (int32_t i=0; i<layer->get_num_neurons(); i++)
219  {
220  for (int32_t j=0; j<m_batch_size; j++)
221  {
222  float64_t sum = 0;
223  for (int32_t k=0; k<m_num_neurons; k++)
224  {
225  sum += weights[k+i*m_num_neurons]*
227  }
228  layer->get_activation_gradients()(i,j) += sum;
229  }
230  }
231  }
232 #endif
233  SG_UNREF(layer);
234  }
235 
236  if (contraction_coefficient != 0)
237  {
238  compute_contraction_term_gradients(parameters, parameter_gradients);
239  }
240 }
241 
243 {
244  if (targets.num_rows != 0)
245  {
246  // sqaured error measure
247  // local_gradients = activations-targets
248  int32_t length = m_num_neurons*m_batch_size;
249  for (int32_t i=0; i<length; i++)
250  m_local_gradients[i] = (m_activations[i]-targets[i])/m_batch_size;
251  }
252  else
253  {
254  int32_t length = m_num_neurons*m_batch_size;
255  for (int32_t i=0; i<length; i++)
257  }
258 }
259 
261 {
262  // error = 0.5*(sum(targets-activations)^2)/batch_size
263  float64_t sum = 0;
264  int32_t length = m_num_neurons*m_batch_size;
265  for (int32_t i=0; i<length; i++)
266  sum += (targets[i]-m_activations[i])*(targets[i]-m_activations[i]);
267  sum *= (0.5/m_batch_size);
268  return sum;
269 }
270 
272  float64_t max_norm)
273 {
274  int32_t weights_index_offset = m_num_neurons;
275  for (int32_t l=0; l<m_input_indices.vlen; l++)
276  {
277  float64_t* weights = parameters.vector + weights_index_offset;
278 
279  int32_t length = m_num_neurons*m_input_sizes[l];
280  for (int32_t i=0; i<length; i+=m_input_sizes[l])
281  {
282  float64_t norm =
284 
285  if (norm > max_norm)
286  {
287  float64_t multiplier = max_norm/norm;
288  for (int32_t j=0; j<m_input_sizes[l]; j++)
289  weights[i+j] *= multiplier;
290  }
291  }
292  }
293 }
294 
296 {
297  float64_t contraction_term = 0;
298  for (int32_t i=m_num_neurons; i<parameters.vlen; i++)
299  contraction_term += parameters[i]*parameters[i];
300 
301  return contraction_coefficient*contraction_term;
302 }
303 
305  SGVector< float64_t > parameters, SGVector< float64_t > gradients)
306 {
307  for (int32_t i=m_num_neurons; i<parameters.vlen; i++)
308  gradients[i] += 2*contraction_coefficient*parameters[i];
309 }
310 
static T twonorm(const T *x, int32_t len)
|| x ||_2
double norm(double *v, double p, int n)
Definition: epph.cpp:452
SGVector< int32_t > m_input_sizes
Definition: NeuralLayer.h:368
static float32_t normal_random(float32_t mean, float32_t std_dev)
Definition: Math.h:1095
virtual SGMatrix< float64_t > get_activation_gradients()
Definition: NeuralLayer.h:294
SGMatrix< float64_t > m_activations
Definition: NeuralLayer.h:376
virtual void initialize_neural_layer(CDynamicObjectArray *layers, SGVector< int32_t > input_indices)
Definition: NeuralLayer.cpp:61
virtual int32_t get_num_neurons()
Definition: NeuralLayer.h:251
SGVector< int32_t > m_input_indices
Definition: NeuralLayer.h:363
Base class for neural network layers.
Definition: NeuralLayer.h:87
SGMatrix< float64_t > m_activation_gradients
Definition: NeuralLayer.h:381
virtual SGMatrix< float64_t > get_activations()
Definition: NeuralLayer.h:287
index_t num_rows
Definition: SGMatrix.h:376
SGMatrix< float64_t > m_local_gradients
Definition: NeuralLayer.h:387
virtual void initialize_parameters(SGVector< float64_t > parameters, SGVector< bool > parameter_regularizable, float64_t sigma)
virtual void initialize_neural_layer(CDynamicObjectArray *layers, SGVector< int32_t > input_indices)
virtual void compute_local_gradients(SGMatrix< float64_t > targets)
index_t vlen
Definition: SGVector.h:494
CSGObject * element(int32_t idx1, int32_t idx2=0, int32_t idx3=0)
virtual float64_t compute_contraction_term(SGVector< float64_t > parameters)
double float64_t
Definition: common.h:50
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
virtual void enforce_max_norm(SGVector< float64_t > parameters, float64_t max_norm)
#define SG_UNREF(x)
Definition: SGObject.h:52
virtual void compute_activations(SGVector< float64_t > parameters, CDynamicObjectArray *layers)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
float64_t dropout_prop
Definition: NeuralLayer.h:327
virtual float64_t compute_error(SGMatrix< float64_t > targets)
virtual void compute_gradients(SGVector< float64_t > parameters, SGMatrix< float64_t > targets, CDynamicObjectArray *layers, SGVector< float64_t > parameter_gradients)
virtual bool is_input()
Definition: NeuralLayer.h:127
virtual void compute_contraction_term_gradients(SGVector< float64_t > parameters, SGVector< float64_t > gradients)
SGMatrix< bool > m_dropout_mask
Definition: NeuralLayer.h:393
float64_t contraction_coefficient
Definition: NeuralLayer.h:338

SHOGUN Machine Learning Toolbox - Documentation