SHOGUN  4.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DeepBeliefNetwork.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
35 
36 #ifdef HAVE_EIGEN3
37 
38 #include <shogun/base/Parameter.h>
41 #include <shogun/lib/SGMatrix.h>
42 #include <shogun/lib/SGVector.h>
48 
49 using namespace shogun;
50 
52 {
53  init();
54 }
55 
57  int32_t num_visible_units, ERBMVisibleUnitType unit_type) : CSGObject()
58 {
59  init();
60  m_layer_sizes->append_element(num_visible_units);
61  m_num_layers++;
62  m_visible_units_type = unit_type;
63 }
64 
66 {
68 }
69 
70 void CDeepBeliefNetwork::add_hidden_layer(int32_t num_units)
71 {
72  m_layer_sizes->append_element(num_units);
73  m_num_layers++;
74 }
75 
77 {
80 
81  m_num_params = 0;
82  for (int32_t i=0; i<m_num_layers; i++)
83  {
85  m_num_params += m_layer_sizes->element(i);
86 
87  if (i<m_num_layers-1)
88  {
89  m_weights_index_offsets[i] = m_num_params;
90  m_num_params += m_layer_sizes->element(i+1)*m_layer_sizes->element(i);
91  }
92  }
93 
95  for (int32_t i=0; i<m_num_params; i++)
96  m_params[i] = CMath::normal_random(0.0,sigma);
97 
98  pt_cd_num_steps = SGVector<int32_t>(m_num_layers-1);
100 
101  pt_cd_persistent = SGVector<bool>(m_num_layers-1);
103 
104  pt_cd_sample_visible = SGVector<bool>(m_num_layers-1);
106 
107  pt_l2_coefficient = SGVector<float64_t>(m_num_layers-1);
109 
110  pt_l1_coefficient = SGVector<float64_t>(m_num_layers-1);
112 
113  pt_monitoring_interval = SGVector<int32_t>(m_num_layers-1);
115 
116  pt_monitoring_method = SGVector<int32_t>(m_num_layers-1);
118 
119  pt_max_num_epochs = SGVector<int32_t>(m_num_layers-1);
121 
122  pt_gd_mini_batch_size = SGVector<int32_t>(m_num_layers-1);
124 
125  pt_gd_learning_rate = SGVector<float64_t>(m_num_layers-1);
127 
130 
131  pt_gd_momentum = SGVector<float64_t>(m_num_layers-1);
133 }
134 
135 void CDeepBeliefNetwork::set_batch_size(int32_t batch_size)
136 {
137  if (m_batch_size == batch_size) return;
138 
139  m_batch_size = batch_size;
140 
142 
143  for (int32_t i=0; i<m_num_layers; i++)
145 
146  reset_chain();
147 }
148 
150 {
151  for (int32_t k=0; k<m_num_layers-1; k++)
152  {
153  SG_INFO("Pre-training RBM %i\n",k);
154  pre_train(k, features);
155  SG_INFO("Finished pre-training RBM %i\n",k);
156  }
157 }
158 
159 void CDeepBeliefNetwork::pre_train(int32_t index,
160  CDenseFeatures< float64_t >* features)
161 {
162  CRBM rbm(m_layer_sizes->element(index+1));
163  if (index == 0)
165  else
166  rbm.add_visible_group(m_layer_sizes->element(index), RBMVUT_BINARY);
167  rbm.initialize_neural_network(m_sigma);
168 
169  rbm.cd_num_steps = pt_cd_num_steps[index];
170  rbm.cd_persistent = pt_cd_persistent[index];
171  rbm.cd_sample_visible = pt_cd_sample_visible[index];
172  rbm.l2_coefficient = pt_l2_coefficient[index];
173  rbm.l1_coefficient = pt_l1_coefficient[index];
174  rbm.monitoring_interval = pt_monitoring_interval[index];
175  rbm.monitoring_method = ERBMMonitoringMethod(pt_monitoring_method[index]);
176  rbm.max_num_epochs = pt_max_num_epochs[index];
177  rbm.gd_mini_batch_size = pt_gd_mini_batch_size[index];
178  rbm.gd_learning_rate = pt_gd_learning_rate[index];
179  rbm.gd_learning_rate_decay = pt_gd_learning_rate_decay[index];
180  rbm.gd_momentum = pt_gd_momentum[index];
181 
182  if (index > 0)
183  {
184  CDenseFeatures<float64_t>* transformed_features =
185  transform(features, index);
186  rbm.train(transformed_features);
187  SG_UNREF(transformed_features);
188  }
189  else
190  rbm.train(features);
191 
192  SGVector<float64_t> rbm_b = rbm.get_visible_bias();
193  SGVector<float64_t> rbm_c = rbm.get_hidden_bias();
194  SGMatrix<float64_t> rbm_w = rbm.get_weights();
195 
196  SGVector<float64_t> dbn_b = get_biases(index);
197  SGVector<float64_t> dbn_c = get_biases(index+1);
198  SGMatrix<float64_t> dbn_w = get_weights(index);
199 
200  for (int32_t i=0; i<dbn_b.vlen; i++)
201  dbn_b[i] = rbm_b[i];
202 
203  for (int32_t i=0; i<dbn_c.vlen; i++)
204  dbn_c[i] = rbm_c[i];
205 
206  for (int32_t i=0; i<dbn_w.num_rows*dbn_w.num_cols; i++)
207  dbn_w[i] = rbm_w[i];
208 }
209 
211 {
212  REQUIRE(features != NULL, "Invalid (NULL) feature pointer\n");
214  "Number of features (%i) must match the DBN's number of visible units "
215  "(%i)\n", features->get_num_features(), m_layer_sizes->element(0));
216 
217  SGMatrix<float64_t> inputs = features->get_feature_matrix();
218 
219  int32_t training_set_size = inputs.num_cols;
220  if (gd_mini_batch_size==0) gd_mini_batch_size = training_set_size;
222 
223  SGVector<float64_t> rec_params(m_num_params);
224  for (int32_t i=0; i<rec_params.vlen; i++)
225  rec_params[i] = m_params[i];
226 
228  SGVector<float64_t> rec_gradients(m_num_params);
229  gradients.zero();
230  rec_gradients.zero();
231 
232  SGVector<float64_t> param_updates(m_num_params);
233  SGVector<float64_t> rec_param_updates(m_num_params);
234  param_updates.zero();
235  rec_param_updates.zero();
236 
237  SGMatrixList<float64_t> sleep_states = m_states;
239  SGMatrixList<float64_t> psleep_states(m_num_layers);
241 
242  for (int32_t i=0; i<m_num_layers; i++)
243  {
247  }
248 
249  CRBM top_rbm(m_layer_sizes->element(m_num_layers-1));
250  if (m_num_layers > 2)
251  top_rbm.add_visible_group(m_layer_sizes->element(m_num_layers-2), RBMVUT_BINARY);
252  else
253  top_rbm.add_visible_group(m_layer_sizes->element(0), m_visible_units_type);
254 
255  top_rbm.initialize_neural_network();
256  top_rbm.m_params = SGVector<float64_t>(
257  m_params.vector+m_bias_index_offsets[m_num_layers-2],
258  top_rbm.get_num_parameters(), false);
259 
260  top_rbm.cd_num_steps = cd_num_steps;
261  top_rbm.cd_persistent = false;
262  top_rbm.set_batch_size(gd_mini_batch_size);
263 
264  float64_t alpha = gd_learning_rate;
265 
266  int32_t counter = 0;
267  for (int32_t i=0; i<max_num_epochs; i++)
268  {
269  for (int32_t j=0; j < training_set_size; j += gd_mini_batch_size)
270  {
271  alpha = gd_learning_rate_decay*alpha;
272 
273  if (j+gd_mini_batch_size>training_set_size)
274  j = training_set_size-gd_mini_batch_size;
275 
276  SGMatrix<float64_t> inputs_batch(inputs.matrix+j*inputs.num_rows,
277  inputs.num_rows, gd_mini_batch_size, false);
278 
279  for (int32_t k=0; k<m_num_params; k++)
280  {
281  m_params[k] += gd_momentum*param_updates[k];
282  rec_params[k] += gd_momentum*rec_param_updates[k];
283  }
284 
285  wake_sleep(inputs_batch, &top_rbm, sleep_states, wake_states,
286  psleep_states, pwake_states, m_params,
287  rec_params, gradients, rec_gradients);
288 
289  for (int32_t k=0; k<m_num_params; k++)
290  {
291  param_updates[k] = gd_momentum*param_updates[k]
292  -alpha*gradients[k];
293  m_params[k] -= alpha*gradients[k];
294 
295  rec_param_updates[k] = gd_momentum*rec_param_updates[k]
296  -alpha*rec_gradients[k];
297  rec_params[k] -= alpha*rec_gradients[k];
298  }
299 
300  if (counter%monitoring_interval == 0)
301  {
302  SGMatrix<float64_t> reconstruction = sleep_states[0];
303  float64_t error = 0;
304  for (int32_t k=0; k<inputs_batch.num_rows*inputs_batch.num_cols; k++)
305  error += CMath::pow(reconstruction[k]-inputs_batch[k],2);
306 
307  error /= m_batch_size;
308 
309  SG_INFO("Epoch %i: reconstruction Error = %f\n",i, error);
310  }
311  counter++;
312  }
313  }
314 }
315 
317  CDenseFeatures< float64_t >* features, int32_t i)
318 {
319  if (i==-1)
320  i = m_num_layers-1;
321 
322  SGMatrix<float64_t> transformed_feature_matrix = features->get_feature_matrix();
323  for (int32_t h=1; h<=i; h++)
324  {
326  up_step(h, m_params, transformed_feature_matrix, m, false);
327  transformed_feature_matrix = m;
328  }
329 
330  return new CDenseFeatures<float64_t>(transformed_feature_matrix);
331 }
332 
334  int32_t num_gibbs_steps, int32_t batch_size)
335 {
336  set_batch_size(batch_size);
337 
338  for (int32_t i=0; i<num_gibbs_steps; i++)
339  {
344  }
345 
346  for (int32_t i=m_num_layers-3; i>=0; i--)
347  down_step(i, m_params, m_states[i+1], m_states[i]);
348 
349  return new CDenseFeatures<float64_t>(m_states[0]);
350 }
351 
353 {
355 
356  for (int32_t i=0; i<s.num_rows*s.num_cols; i++)
357  s[i] = CMath::random(0.0,1.0) > 0.5;
358 }
359 
361  CNeuralLayer* output_layer, float64_t sigma)
362 {
364 
366 
367  for (int32_t i=1; i<m_num_layers; i++)
369 
370  if (output_layer!=NULL)
371  layers->append_element(output_layer);
372 
373  CNeuralNetwork* network = new CNeuralNetwork(layers);
374 
375  network->quick_connect();
376  network->initialize_neural_network(sigma);
377 
378  for (int32_t i=1; i<m_num_layers; i++)
379  {
382 
383  for (int32_t j=0; j<b.vlen; j++)
384  network->m_params[j+network->m_index_offsets[i]] = b[j];
385 
386  for (int32_t j=0; j<W.num_rows*W.num_cols; j++)
387  network->m_params[j+network->m_index_offsets[i]+b.vlen] = W[j];
388  }
389 
390  return network;
391 }
392 
394  SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states)
395 {
396  typedef Eigen::Map<Eigen::MatrixXd> EMatrix;
397  typedef Eigen::Map<Eigen::VectorXd> EVector;
398 
399  EMatrix In(input.matrix, input.num_rows, input.num_cols);
400  EMatrix Out(result.matrix, result.num_rows, result.num_cols);
401  EVector B(get_biases(index,params).vector, m_layer_sizes->element(index));
402 
403  Out.colwise() = B;
404 
405  if (index < m_num_layers-1)
406  {
407  EMatrix W(get_weights(index,params).matrix,
408  m_layer_sizes->element(index+1), m_layer_sizes->element(index));
409  Out += W.transpose()*In;
410  }
411 
412  if (index > 0 || (index==0 && m_visible_units_type==RBMVUT_BINARY))
413  {
414  int32_t len = m_layer_sizes->element(index)*m_batch_size;
415  for (int32_t i=0; i<len; i++)
416  result[i] = 1.0/(1.0+CMath::exp(-1.0*result[i]));
417  }
418 
419  if (index == 0 && m_visible_units_type==RBMVUT_SOFTMAX)
420  {
421  float64_t max = Out.maxCoeff();
422 
423  for (int32_t j=0; j<m_batch_size; j++)
424  {
425  float64_t sum = 0;
426  for (int32_t i=0; i<m_layer_sizes->element(0); i++)
427  sum += CMath::exp(Out(i,j)-max);
428 
429  float64_t normalizer = CMath::log(sum);
430  for (int32_t k=0; k<m_layer_sizes->element(0); k++)
431  Out(k,j) = CMath::exp(Out(k,j)-max-normalizer);
432  }
433  }
434 
435  if (sample_states && index>0)
436  {
437  int32_t len = m_layer_sizes->element(index)*m_batch_size;
438  for (int32_t i=0; i<len; i++)
439  result[i] = CMath::random(0.0,1.0) < result[i];
440  }
441 }
442 
444  SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states)
445 {
446  typedef Eigen::Map<Eigen::MatrixXd> EMatrix;
447  typedef Eigen::Map<Eigen::VectorXd> EVector;
448 
449  EMatrix In(input.matrix, input.num_rows, input.num_cols);
450  EMatrix Out(result.matrix, result.num_rows, result.num_cols);
451  EVector C(get_biases(index, params).vector, m_layer_sizes->element(index));
452 
453  Out.colwise() = C;
454 
455  if (index>0)
456  {
457  EMatrix W(get_weights(index-1, params).matrix,
458  m_layer_sizes->element(index), m_layer_sizes->element(index-1));
459  Out += W*In;
460  }
461 
462  int32_t len = result.num_rows*result.num_cols;
463  for (int32_t i=0; i<len; i++)
464  result[i] = 1.0/(1.0+CMath::exp(-1.0*result[i]));
465 
466  if (sample_states && index>0)
467  {
468  for (int32_t i=0; i<len; i++)
469  result[i] = CMath::random(0.0,1.0) < result[i];
470  }
471 }
472 
474  SGMatrixList<float64_t> sleep_states, SGMatrixList<float64_t> wake_states,
475  SGMatrixList<float64_t> psleep_states, SGMatrixList<float64_t> pwake_states,
476  SGVector<float64_t> gen_params,
477  SGVector<float64_t> rec_params,
478  SGVector<float64_t> gen_gradients,
479  SGVector<float64_t> rec_gradients)
480 {
481  typedef Eigen::Map<Eigen::MatrixXd> EMatrix;
482  typedef Eigen::Map<Eigen::VectorXd> EVector;
483 
484  // Wake phase
485  for (int32_t i=0; i<data.num_rows*data.num_cols; i++)
486  wake_states[0][i] = data[i];
487 
488  for (int32_t i=1; i<m_num_layers-1; i++)
489  up_step(i, rec_params, wake_states[i-1], wake_states[i]);
490 
491  // Contrastive divergence in the top RBM
492  SGVector<float64_t> top_rbm_gradients(
493  gen_gradients.vector+m_bias_index_offsets[m_num_layers-2],
494  top_rbm->get_num_parameters(), false);
495  top_rbm->contrastive_divergence(wake_states[m_num_layers-2], top_rbm_gradients);
496 
497  // Sleep phase
498  sleep_states.set_matrix(m_num_layers-2, top_rbm->visible_state);
499  for (int32_t i=m_num_layers-3; i>=0; i--)
500  down_step(i, gen_params, sleep_states[i+1], sleep_states[i]);
501 
502  // Predictions
503  for (int32_t i=1; i<m_num_layers-1; i++)
504  up_step(i, rec_params, sleep_states[i-1], psleep_states[i]);
505  for (int32_t i=0; i<m_num_layers-2; i++)
506  down_step(i, gen_params, wake_states[i+1], pwake_states[i]);
507 
508  // Gradients for generative parameters
509  for (int32_t i=0; i<m_num_layers-2; i++)
510  {
511  EMatrix wake_i(wake_states[i].matrix,
512  wake_states[i].num_rows, wake_states[i].num_cols);
513  EMatrix wake_i_plus_one(wake_states[i+1].matrix,
514  wake_states[i+1].num_rows, wake_states[i+1].num_cols);
515  EMatrix pwake_i(pwake_states[i].matrix,
516  pwake_states[i].num_rows, pwake_states[i].num_cols);
517 
518  EMatrix WG_gen(get_weights(i,gen_gradients).matrix,
520  EVector BG_gen(get_biases(i,gen_gradients).vector, m_layer_sizes->element(i));
521 
522  pwake_i = pwake_i - wake_i;
523  BG_gen = pwake_i.rowwise().sum()/m_batch_size;
524  WG_gen = wake_i_plus_one*pwake_i.transpose()/m_batch_size;
525  }
526 
527  // Gradients for reconstruction parameters
528  for (int32_t i=1; i<m_num_layers-1; i++)
529  {
530  EMatrix sleep_i(sleep_states[i].matrix,
531  sleep_states[i].num_rows, sleep_states[i].num_cols);
532  EMatrix psleep_i(psleep_states[i].matrix,
533  psleep_states[i].num_rows, psleep_states[i].num_cols);
534  EMatrix sleep_i_minus_one(sleep_states[i-1].matrix,
535  sleep_states[i-1].num_rows, sleep_states[i-1].num_cols);
536 
537  EMatrix WG_rec(get_weights(i-1,rec_gradients).matrix,
539  EVector BG_rec(get_biases(i,rec_gradients).vector, m_layer_sizes->element(i));
540 
541  psleep_i = psleep_i - sleep_i;
542  BG_rec = psleep_i.rowwise().sum()/m_batch_size;
543  WG_rec = psleep_i*sleep_i_minus_one.transpose()/m_batch_size;
544  }
545 }
546 
549 {
550  if (p.vlen==0)
552  m_layer_sizes->element(index+1), m_layer_sizes->element(index), false);
553  else
555  m_layer_sizes->element(index+1), m_layer_sizes->element(index), false);
556 }
557 
560 {
561  if (p.vlen==0)
563  m_layer_sizes->element(index), false);
564  else
566  m_layer_sizes->element(index), false);;
567 }
568 
569 void CDeepBeliefNetwork::init()
570 {
571  cd_num_steps = 1;
572  monitoring_interval = 10;
573 
574  gd_mini_batch_size = 0;
575  max_num_epochs = 1;
576  gd_learning_rate = 0.1;
578  gd_momentum = 0.9;
579 
581  m_num_layers = 0;
583  m_batch_size = 0;
584  m_num_params = 0;
585  m_sigma = 0.01;
586 
587  SG_ADD((machine_int_t*)&m_visible_units_type, "visible_units_type",
588  "Type of the visible units", MS_NOT_AVAILABLE);
589  SG_ADD(&m_num_layers, "num_layers",
590  "Number of layers", MS_NOT_AVAILABLE);
591  SG_ADD((CSGObject**)&m_layer_sizes, "layer_sizes",
592  "Size of each hidden layer", MS_NOT_AVAILABLE);
593 
594  SG_ADD(&m_params, "params",
595  "Parameters of the network", MS_NOT_AVAILABLE);
596  SG_ADD(&m_num_params, "num_params",
597  "Number of parameters", MS_NOT_AVAILABLE);
598  SG_ADD(&m_bias_index_offsets, "bias_index_offsets",
599  "Index offsets of the biases", MS_NOT_AVAILABLE);
600  SG_ADD(&m_weights_index_offsets, "weights_index_offsets",
601  "Index offsets of the weights", MS_NOT_AVAILABLE);
602 
603  SG_ADD(&pt_cd_num_steps, "pt_cd_num_steps",
604  "Pre-training Number of CD Steps", MS_NOT_AVAILABLE);
605  SG_ADD(&pt_cd_persistent, "pt_cd_persistent",
606  "Pre-training Persistent CD", MS_NOT_AVAILABLE);
607  SG_ADD(&pt_cd_sample_visible, "pt_cd_sample_visible",
608  "Pre-training Number of CD Sample Visible", MS_NOT_AVAILABLE);
609  SG_ADD(&pt_l2_coefficient, "pt_l2_coefficient",
610  "Pre-training L2 regularization coeff", MS_NOT_AVAILABLE);
611  SG_ADD(&pt_l1_coefficient, "pt_l1_coefficient",
612  "Pre-training L1 regularization coeff", MS_NOT_AVAILABLE);
613  SG_ADD(&pt_monitoring_interval, "pt_monitoring_interval",
614  "Pre-training Monitoring Interval", MS_NOT_AVAILABLE);
615  SG_ADD(&pt_monitoring_method, "pt_monitoring_method",
616  "Pre-training Monitoring Method", MS_NOT_AVAILABLE);
617  SG_ADD(&pt_cd_num_steps, "pt_gd_mini_batch_size",
618  "Pre-training Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
619  SG_ADD(&pt_max_num_epochs, "pt_max_num_epochs",
620  "Pre-training Max number of Epochs", MS_NOT_AVAILABLE);
621  SG_ADD(&pt_gd_learning_rate, "pt_gd_learning_rate",
622  "Pre-training Gradient descent learning rate", MS_NOT_AVAILABLE);
623  SG_ADD(&pt_gd_learning_rate_decay, "pt_gd_learning_rate_decay",
624  "Pre-training Gradient descent learning rate decay", MS_NOT_AVAILABLE);
625  SG_ADD(&pt_gd_momentum, "pt_gd_momentum",
626  "Pre-training Gradient Descent Momentum", MS_NOT_AVAILABLE);
627 
628  SG_ADD(&cd_num_steps, "cd_num_steps", "Number of CD Steps", MS_NOT_AVAILABLE);
629  SG_ADD(&monitoring_interval, "monitoring_interval",
630  "Monitoring Interval", MS_NOT_AVAILABLE);
631 
632  SG_ADD(&gd_mini_batch_size, "gd_mini_batch_size",
633  "Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
634  SG_ADD(&max_num_epochs, "max_num_epochs",
635  "Max number of Epochs", MS_NOT_AVAILABLE);
636  SG_ADD(&gd_learning_rate, "gd_learning_rate",
637  "Gradient descent learning rate", MS_NOT_AVAILABLE);
638  SG_ADD(&gd_learning_rate_decay, "gd_learning_rate_decay",
639  "Gradient descent learning rate decay", MS_NOT_AVAILABLE);
640  SG_ADD(&gd_momentum, "gd_momentum",
641  "Gradient Descent Momentum", MS_NOT_AVAILABLE);
642 
643  SG_ADD(&m_sigma, "m_sigma", "Initialization Sigma", MS_NOT_AVAILABLE);
644 }
645 
646 #endif
A Restricted Boltzmann Machine.
Definition: RBM.h:123
virtual void wake_sleep(SGMatrix< float64_t > data, CRBM *top_rbm, SGMatrixList< float64_t > sleep_states, SGMatrixList< float64_t > wake_states, SGMatrixList< float64_t > psleep_states, SGMatrixList< float64_t > pwake_states, SGVector< float64_t > gen_params, SGVector< float64_t > rec_params, SGVector< float64_t > gen_gradients, SGVector< float64_t > rec_gradients)
#define SG_INFO(...)
Definition: SGIO.h:118
SGVector< int32_t > m_index_offsets
virtual void train(CDenseFeatures< float64_t > *features)
virtual CDenseFeatures< float64_t > * transform(CDenseFeatures< float64_t > *features, int32_t i=-1)
virtual int32_t get_num_parameters()
Definition: RBM.h:345
SGMatrixList< float64_t > m_states
SGVector< int32_t > pt_gd_mini_batch_size
virtual void initialize_neural_network(float64_t sigma=0.01f)
SGVector< int32_t > m_bias_index_offsets
int32_t get_num_features() const
SGMatrix< ST > get_feature_matrix()
static float32_t normal_random(float32_t mean, float32_t std_dev)
Definition: Math.h:1095
SGVector< float64_t > m_params
virtual void add_visible_group(int32_t num_units, ERBMVisibleUnitType unit_type)
Definition: RBM.cpp:70
CDynamicArray< int32_t > * m_layer_sizes
A generic multi-layer neural network.
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual void pre_train(CDenseFeatures< float64_t > *features)
SGVector< float64_t > pt_gd_momentum
index_t num_cols
Definition: SGMatrix.h:378
virtual void down_step(int32_t index, SGVector< float64_t > params, SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states=true)
Base class for neural network layers.
Definition: NeuralLayer.h:87
SGVector< int32_t > pt_monitoring_interval
virtual void quick_connect()
index_t num_rows
Definition: SGMatrix.h:376
ERBMVisibleUnitType m_visible_units_type
static uint64_t random()
Definition: Math.h:1019
SGVector< int32_t > m_weights_index_offsets
index_t vlen
Definition: SGVector.h:494
virtual void contrastive_divergence(SGMatrix< float64_t > visible_batch, SGVector< float64_t > gradients)
Definition: RBM.cpp:356
virtual void up_step(int32_t index, SGVector< float64_t > params, SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states=true)
SGVector< float64_t > m_params
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:112
void set_matrix(index_t index, const SGMatrix< T > matrix)
virtual int32_t get_num_vectors() const
SGVector< float64_t > pt_gd_learning_rate
virtual void add_hidden_layer(int32_t num_units)
double float64_t
Definition: common.h:50
virtual void set_batch_size(int32_t batch_size)
SGVector< float64_t > pt_gd_learning_rate_decay
SGVector< bool > pt_cd_persistent
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
SGVector< float64_t > pt_l1_coefficient
ERBMVisibleUnitType
Definition: RBM.h:55
SGVector< int32_t > pt_monitoring_method
#define SG_UNREF(x)
Definition: SGObject.h:52
SGVector< float64_t > pt_l2_coefficient
Represents an input layer. The layer can be either connected to all the input features that a network...
Neural layer with linear neurons, with a logistic activation function. can be used as a hidden layer ...
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual CNeuralNetwork * convert_to_neural_network(CNeuralLayer *output_layer=NULL, float64_t sigma=0.01)
int machine_int_t
Definition: common.h:59
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
SGMatrix< float64_t > visible_state
Definition: RBM.h:434
virtual CDenseFeatures< float64_t > * sample(int32_t num_gibbs_steps=1, int32_t batch_size=1)
const T & element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
Definition: DynamicArray.h:224
SGVector< bool > pt_cd_sample_visible
virtual SGMatrix< float64_t > get_weights(int32_t index, SGVector< float64_t > p=SGVector< float64_t >())
Matrix::Scalar max(Matrix m)
Definition: Redux.h:66
#define SG_ADD(...)
Definition: SGObject.h:81
virtual void initialize_neural_network(float64_t sigma=0.01)
ERBMMonitoringMethod
Definition: RBM.h:49
SGVector< int32_t > pt_max_num_epochs
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
void set_const(T const_elem)
Definition: SGVector.cpp:152
virtual SGVector< float64_t > get_biases(int32_t index, SGVector< float64_t > p=SGVector< float64_t >())
bool append_element(CSGObject *e)
SGVector< int32_t > pt_cd_num_steps

SHOGUN Machine Learning Toolbox - Documentation