SHOGUN  3.2.1
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DeepAutoencoder.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
37 
42 
43 #include <string>
44 
45 using namespace shogun;
46 
48 {
49  init();
50 }
51 
54 {
55  set_layers(layers);
56  init();
57  m_sigma = sigma;
58  quick_connect();
59 
60  int32_t num_encoding_layers = (m_num_layers-1)/2;
61  for (int32_t i=0; i<m_num_layers; i++)
62  {
63  if (i<= num_encoding_layers)
65  else
67  }
68 
70 
71  for (int32_t i=0; i<m_num_layers; i++)
72  {
73  REQUIRE(get_layer(i)->get_num_neurons()==get_layer(m_num_layers-i-1)->get_num_neurons(),
74  "Layer %i (%i neurons) must have the same number of neurons "
75  "as layer %i (%i neurons)\n", i, get_layer(i)->get_num_neurons(),
76  m_num_layers-i-1, get_layer(m_num_layers-i-1)->get_num_neurons());
77  }
78 }
79 
81 {
82  SGMatrix<float64_t> data_matrix = features_to_matrix(data);
83 
84  int32_t num_encoding_layers = (m_num_layers-1)/2;
85  for (int32_t i=1; i<=num_encoding_layers; i++)
86  {
87  SG_INFO("Pre-training Layer %i\n", i);
88 
89  CNeuralLayer* ae_encoding_layer = (CNeuralLayer*)get_layer(i)->clone();
90 
91  CNeuralLayer* ae_decoding_layer =
93 
94  CAutoencoder* ae = NULL;
95 
96  if (strcmp(ae_encoding_layer->get_name(), "NeuralConvolutionalLayer")==0)
97  {
98  ae = new CAutoencoder(
99  ae_encoding_layer->get_width(),
100  ae_encoding_layer->get_height(),
101  get_layer(i-1)->get_num_neurons()
102  /(ae_encoding_layer->get_width()*ae_encoding_layer->get_height()),
103  (CNeuralConvolutionalLayer*)ae_encoding_layer,
104  (CNeuralConvolutionalLayer*)ae_decoding_layer, m_sigma);
105  }
106  else
107  {
108  ae = new CAutoencoder(get_layer(i-1)->get_num_neurons(),
109  ae_encoding_layer, ae_decoding_layer, m_sigma);
110  }
111 
112  SG_UNREF(ae_encoding_layer);
113  SG_UNREF(ae_decoding_layer);
114 
121  ae->epsilon = pt_epsilon[i-1];
125  ae->gd_momentum = pt_gd_momentum[i-1];
128 
129  // forward propagate the data to obtain the training data for the
130  // current autoencoder
131  for (int32_t j=0; j<i; j++)
132  get_layer(j)->set_batch_size(data_matrix.num_cols);
133  SGMatrix<float64_t> ae_input_matrix = forward_propagate(data_matrix, i-1);
134  CDenseFeatures<float64_t> ae_input_features(ae_input_matrix);
135  for (int32_t j=0; j<i-1; j++)
136  get_layer(j)->set_batch_size(1);
137 
138  ae->train(&ae_input_features);
139 
140  SGVector<float64_t> ae_params = ae->get_parameters();
141  SGVector<float64_t> encoding_layer_params = get_section(m_params, i);
142  SGVector<float64_t> decoding_layer_params = get_section(m_params, m_num_layers-i);
143 
144  for (int32_t j=0; j<ae_params.vlen;j++)
145  {
146  if (j<encoding_layer_params.vlen)
147  encoding_layer_params[j] = ae_params[j];
148  else
149  decoding_layer_params[j-encoding_layer_params.vlen] = ae_params[j];
150  }
151  SG_UNREF(ae);
152  }
153 
154  set_batch_size(1);
155 }
156 
159 {
160  SGMatrix<float64_t> transformed = forward_propagate(data, (m_num_layers-1)/2);
161  return new CDenseFeatures<float64_t>(transformed);
162 }
163 
166 {
167  SGMatrix<float64_t> reconstructed = forward_propagate(data);
168  return new CDenseFeatures<float64_t>(reconstructed);
169 }
170 
172  CNeuralLayer* output_layer, float64_t sigma)
173 {
175  for (int32_t i=0; i<=(m_num_layers-1)/2; i++)
176  {
177  CNeuralLayer* layer = (CNeuralLayer*)get_layer(i)->clone();
179  layers->append_element(layer);
180  SG_UNREF(layer);
181  }
182 
183  if (output_layer != NULL)
184  layers->append_element(output_layer);
185 
186  CNeuralNetwork* net = new CNeuralNetwork(layers);
187  net->quick_connect();
188  net->initialize(sigma);
189 
190  SGVector<float64_t> net_params = net->get_parameters();
191 
192  int32_t len = m_index_offsets[(m_num_layers-1)/2]
194 
195  for (int32_t i=0; i<len; i++)
196  net_params[i] = m_params[i];
197 
198  return net;
199 }
200 
202 {
203  float64_t error = CNeuralNetwork::compute_error(targets);
204 
205  if (m_contraction_coefficient != 0.0)
206 
207  for (int32_t i=1; i<=(m_num_layers-1)/2; i++)
208  error +=
209  get_layer(i)->compute_contraction_term(get_section(m_params,i));
210 
211  return error;
212 }
213 
215 {
217  for (int32_t i=1; i<=(m_num_layers-1)/2; i++)
219 }
220 
221 
222 template <class T>
223 SGVector<T> CDeepAutoencoder::get_section(SGVector<T> v, int32_t i)
224 {
225  return SGVector<T>(v.vector+m_index_offsets[i],
226  get_layer(i)->get_num_parameters(), false);
227 }
228 
229 void CDeepAutoencoder::init()
230 {
231  m_sigma = 0.01;
232 
235 
238 
241 
244 
247 
250 
252  pt_epsilon.set_const(1e-5);
253 
256 
259 
262 
265 
268 
271 
272  SG_ADD(&pt_noise_type, "pt_noise_type",
273  "Pre-training Noise Type", MS_NOT_AVAILABLE);
274  SG_ADD(&pt_noise_parameter, "pt_noise_parameter",
275  "Pre-training Noise Parameter", MS_NOT_AVAILABLE);
276  SG_ADD(&pt_contraction_coefficient, "pt_contraction_coefficient",
277  "Pre-training Contraction Coefficient", MS_NOT_AVAILABLE);
278  SG_ADD(&pt_optimization_method, "pt_optimization_method",
279  "Pre-training Optimization Method", MS_NOT_AVAILABLE);
280  SG_ADD(&pt_gd_mini_batch_size, "pt_gd_mini_batch_size",
281  "Pre-training Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
282  SG_ADD(&pt_max_num_epochs, "pt_max_num_epochs",
283  "Pre-training Max number of Epochs", MS_NOT_AVAILABLE);
284  SG_ADD(&pt_gd_learning_rate, "pt_gd_learning_rate",
285  "Pre-training Gradient descent learning rate", MS_NOT_AVAILABLE);
286  SG_ADD(&pt_gd_learning_rate_decay, "pt_gd_learning_rate_decay",
287  "Pre-training Gradient descent learning rate decay", MS_NOT_AVAILABLE);
288  SG_ADD(&pt_gd_momentum, "pt_gd_momentum",
289  "Pre-training Gradient Descent Momentum", MS_NOT_AVAILABLE);
290  SG_ADD(&pt_gd_error_damping_coeff, "pt_gd_error_damping_coeff",
291  "Pre-training Gradient Descent Error Damping Coeff", MS_NOT_AVAILABLE);
292  SG_ADD(&pt_epsilon, "pt_epsilon",
293  "Pre-training Epsilon", MS_NOT_AVAILABLE);
294  SG_ADD(&pt_l2_coefficient, "pt_l2_coefficient",
295  "Pre-training L2 regularization coeff", MS_NOT_AVAILABLE);
296  SG_ADD(&pt_l1_coefficient, "pt_l1_coefficient",
297  "Pre-training L1 regularization coeff", MS_NOT_AVAILABLE);
298 
299  SG_ADD(&m_sigma, "m_sigma", "Initialization Sigma", MS_NOT_AVAILABLE);
300 }

SHOGUN Machine Learning Toolbox - Documentation