SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
NeuralLinearLayer.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
36 #include <shogun/lib/SGVector.h>
37 
39 
40 using namespace shogun;
41 
43 {
44 }
45 
47 CNeuralLayer(num_neurons)
48 {
49 }
50 
52  SGVector< int32_t > input_indices)
53 {
54  CNeuralLayer::initialize_neural_layer(layers, input_indices);
55 
57  for (int32_t i=0; i<input_indices.vlen; i++)
59 }
60 
62  SGVector<bool> parameter_regularizable,
63  float64_t sigma)
64 {
65  for (int32_t i=0; i<m_num_parameters; i++)
66  {
67  // random the parameters
68  parameters[i] = CMath::normal_random(0.0, sigma);
69 
70  // turn regularization off for the biases, on for the weights
71  parameter_regularizable[i] = (i>=m_num_neurons);
72  }
73 }
74 
76  CDynamicObjectArray* layers)
77 {
78  float64_t* biases = parameters.vector;
79 
80  typedef Eigen::Map<Eigen::MatrixXd> EMappedMatrix;
81  typedef Eigen::Map<Eigen::VectorXd> EMappedVector;
82 
84  EMappedVector B(biases, m_num_neurons);
85 
86  A.colwise() = B;
87 
88  int32_t weights_index_offset = m_num_neurons;
89  for (int32_t l=0; l<m_input_indices.vlen; l++)
90  {
91  CNeuralLayer* layer =
92  (CNeuralLayer*)layers->element(m_input_indices[l]);
93 
94  float64_t* weights = parameters.vector + weights_index_offset;
95  weights_index_offset += m_num_neurons*layer->get_num_neurons();
96 
97  EMappedMatrix W(weights, m_num_neurons, layer->get_num_neurons());
98  EMappedMatrix X(layer->get_activations().matrix,
99  layer->get_num_neurons(), m_batch_size);
100 
101  A += W*X;
102  SG_UNREF(layer);
103  }
104 }
105 
107  SGVector<float64_t> parameters,
108  SGMatrix<float64_t> targets,
109  CDynamicObjectArray* layers,
110  SGVector<float64_t> parameter_gradients)
111 {
112  compute_local_gradients(targets);
113 
114  // compute bias gradients
115  float64_t* bias_gradients = parameter_gradients.vector;
116  typedef Eigen::Map<Eigen::MatrixXd> EMappedMatrix;
117  typedef Eigen::Map<Eigen::VectorXd> EMappedVector;
118 
119  EMappedVector BG(bias_gradients, m_num_neurons);
121 
122  BG = LG.rowwise().sum();
123 
124  // apply dropout to the local gradients
125  if (dropout_prop>0.0)
126  {
127  int32_t len = m_num_neurons*m_batch_size;
128  for (int32_t i=0; i<len; i++)
130  }
131 
132  int32_t weights_index_offset = m_num_neurons;
133  for (int32_t l=0; l<m_input_indices.vlen; l++)
134  {
135  CNeuralLayer* layer =
136  (CNeuralLayer*)layers->element(m_input_indices[l]);
137 
138  float64_t* weights = parameters.vector + weights_index_offset;
139  float64_t* weight_gradients = parameter_gradients.vector +
140  weights_index_offset;
141 
142  weights_index_offset += m_num_neurons*layer->get_num_neurons();
143 
144  EMappedMatrix X(layer->get_activations().matrix,
145  layer->get_num_neurons(), m_batch_size);
146  EMappedMatrix W(weights, m_num_neurons, layer->get_num_neurons());
147  EMappedMatrix WG(weight_gradients,
148  m_num_neurons, layer->get_num_neurons());
149  EMappedMatrix IG(layer->get_activation_gradients().matrix,
150  layer->get_num_neurons(), m_batch_size);
151 
152  // compute weight gradients
153  WG = LG*X.transpose();
154 
155  // compute input gradients
156  if (!layer->is_input())
157  IG += W.transpose()*LG;
158  SG_UNREF(layer);
159  }
160 
161  if (contraction_coefficient != 0)
162  {
163  compute_contraction_term_gradients(parameters, parameter_gradients);
164  }
165 }
166 
168 {
169  if (targets.num_rows != 0)
170  {
171  // sqaured error measure
172  // local_gradients = activations-targets
173  int32_t length = m_num_neurons*m_batch_size;
174  for (int32_t i=0; i<length; i++)
175  m_local_gradients[i] = (m_activations[i]-targets[i])/m_batch_size;
176  }
177  else
178  {
179  int32_t length = m_num_neurons*m_batch_size;
180  for (int32_t i=0; i<length; i++)
182  }
183 }
184 
186 {
187  // error = 0.5*(sum(targets-activations)^2)/batch_size
188  float64_t sum = 0;
189  int32_t length = m_num_neurons*m_batch_size;
190  for (int32_t i=0; i<length; i++)
191  sum += (targets[i]-m_activations[i])*(targets[i]-m_activations[i]);
192  sum *= (0.5/m_batch_size);
193  return sum;
194 }
195 
197  float64_t max_norm)
198 {
199  int32_t weights_index_offset = m_num_neurons;
200  for (int32_t l=0; l<m_input_indices.vlen; l++)
201  {
202  float64_t* weights = parameters.vector + weights_index_offset;
203 
204  int32_t length = m_num_neurons*m_input_sizes[l];
205  for (int32_t i=0; i<length; i+=m_input_sizes[l])
206  {
207  float64_t norm =
209 
210  if (norm > max_norm)
211  {
212  float64_t multiplier = max_norm/norm;
213  for (int32_t j=0; j<m_input_sizes[l]; j++)
214  weights[i+j] *= multiplier;
215  }
216  }
217  }
218 }
219 
221 {
222  float64_t contraction_term = 0;
223  for (int32_t i=m_num_neurons; i<parameters.vlen; i++)
224  contraction_term += parameters[i]*parameters[i];
225 
226  return contraction_coefficient*contraction_term;
227 }
228 
230  SGVector< float64_t > parameters, SGVector< float64_t > gradients)
231 {
232  for (int32_t i=m_num_neurons; i<parameters.vlen; i++)
233  gradients[i] += 2*contraction_coefficient*parameters[i];
234 }
235 
static T twonorm(const T *x, int32_t len)
|| x ||_2
SGVector< int32_t > m_input_sizes
Definition: NeuralLayer.h:368
static float32_t normal_random(float32_t mean, float32_t std_dev)
Definition: Math.h:1095
virtual SGMatrix< float64_t > get_activation_gradients()
Definition: NeuralLayer.h:294
SGMatrix< float64_t > m_activations
Definition: NeuralLayer.h:376
virtual void initialize_neural_layer(CDynamicObjectArray *layers, SGVector< int32_t > input_indices)
Definition: NeuralLayer.cpp:61
virtual int32_t get_num_neurons()
Definition: NeuralLayer.h:251
SGVector< int32_t > m_input_indices
Definition: NeuralLayer.h:363
Base class for neural network layers.
Definition: NeuralLayer.h:87
SGMatrix< float64_t > m_activation_gradients
Definition: NeuralLayer.h:381
virtual SGMatrix< float64_t > get_activations()
Definition: NeuralLayer.h:287
index_t num_rows
Definition: SGMatrix.h:374
SGMatrix< float64_t > m_local_gradients
Definition: NeuralLayer.h:387
virtual void initialize_parameters(SGVector< float64_t > parameters, SGVector< bool > parameter_regularizable, float64_t sigma)
virtual void initialize_neural_layer(CDynamicObjectArray *layers, SGVector< int32_t > input_indices)
virtual void compute_local_gradients(SGMatrix< float64_t > targets)
index_t vlen
Definition: SGVector.h:494
CSGObject * element(int32_t idx1, int32_t idx2=0, int32_t idx3=0)
virtual float64_t compute_contraction_term(SGVector< float64_t > parameters)
double float64_t
Definition: common.h:50
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
virtual void enforce_max_norm(SGVector< float64_t > parameters, float64_t max_norm)
#define SG_UNREF(x)
Definition: SGObject.h:55
virtual void compute_activations(SGVector< float64_t > parameters, CDynamicObjectArray *layers)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
float64_t dropout_prop
Definition: NeuralLayer.h:327
virtual float64_t compute_error(SGMatrix< float64_t > targets)
virtual void compute_gradients(SGVector< float64_t > parameters, SGMatrix< float64_t > targets, CDynamicObjectArray *layers, SGVector< float64_t > parameter_gradients)
virtual bool is_input()
Definition: NeuralLayer.h:127
virtual void compute_contraction_term_gradients(SGVector< float64_t > parameters, SGVector< float64_t > gradients)
SGMatrix< bool > m_dropout_mask
Definition: NeuralLayer.h:393
float64_t contraction_coefficient
Definition: NeuralLayer.h:338

SHOGUN Machine Learning Toolbox - Documentation