SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
DeepBeliefNetwork.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
35 
36 
37 #include <shogun/base/Parameter.h>
40 #include <shogun/lib/SGMatrix.h>
41 #include <shogun/lib/SGVector.h>
47 
48 using namespace shogun;
49 
51 {
52  init();
53 }
54 
56  int32_t num_visible_units, ERBMVisibleUnitType unit_type) : CSGObject()
57 {
58  init();
59  m_layer_sizes->append_element(num_visible_units);
60  m_num_layers++;
61  m_visible_units_type = unit_type;
62 }
63 
65 {
67 }
68 
69 void CDeepBeliefNetwork::add_hidden_layer(int32_t num_units)
70 {
71  m_layer_sizes->append_element(num_units);
72  m_num_layers++;
73 }
74 
76 {
79 
80  m_num_params = 0;
81  for (int32_t i=0; i<m_num_layers; i++)
82  {
84  m_num_params += m_layer_sizes->element(i);
85 
86  if (i<m_num_layers-1)
87  {
88  m_weights_index_offsets[i] = m_num_params;
89  m_num_params += m_layer_sizes->element(i+1)*m_layer_sizes->element(i);
90  }
91  }
92 
94  for (int32_t i=0; i<m_num_params; i++)
95  m_params[i] = CMath::normal_random(0.0,sigma);
96 
97  pt_cd_num_steps = SGVector<int32_t>(m_num_layers-1);
99 
100  pt_cd_persistent = SGVector<bool>(m_num_layers-1);
102 
103  pt_cd_sample_visible = SGVector<bool>(m_num_layers-1);
105 
106  pt_l2_coefficient = SGVector<float64_t>(m_num_layers-1);
108 
109  pt_l1_coefficient = SGVector<float64_t>(m_num_layers-1);
111 
112  pt_monitoring_interval = SGVector<int32_t>(m_num_layers-1);
114 
115  pt_monitoring_method = SGVector<int32_t>(m_num_layers-1);
117 
118  pt_max_num_epochs = SGVector<int32_t>(m_num_layers-1);
120 
121  pt_gd_mini_batch_size = SGVector<int32_t>(m_num_layers-1);
123 
124  pt_gd_learning_rate = SGVector<float64_t>(m_num_layers-1);
126 
129 
130  pt_gd_momentum = SGVector<float64_t>(m_num_layers-1);
132 }
133 
134 void CDeepBeliefNetwork::set_batch_size(int32_t batch_size)
135 {
136  if (m_batch_size == batch_size) return;
137 
138  m_batch_size = batch_size;
139 
141 
142  for (int32_t i=0; i<m_num_layers; i++)
144 
145  reset_chain();
146 }
147 
149 {
150  for (int32_t k=0; k<m_num_layers-1; k++)
151  {
152  SG_INFO("Pre-training RBM %i\n",k);
153  pre_train(k, features);
154  SG_INFO("Finished pre-training RBM %i\n",k);
155  }
156 }
157 
158 void CDeepBeliefNetwork::pre_train(int32_t index,
159  CDenseFeatures< float64_t >* features)
160 {
161  CRBM rbm(m_layer_sizes->element(index+1));
162  if (index == 0)
164  else
165  rbm.add_visible_group(m_layer_sizes->element(index), RBMVUT_BINARY);
166  rbm.initialize_neural_network(m_sigma);
167 
168  rbm.cd_num_steps = pt_cd_num_steps[index];
169  rbm.cd_persistent = pt_cd_persistent[index];
170  rbm.cd_sample_visible = pt_cd_sample_visible[index];
171  rbm.l2_coefficient = pt_l2_coefficient[index];
172  rbm.l1_coefficient = pt_l1_coefficient[index];
173  rbm.monitoring_interval = pt_monitoring_interval[index];
174  rbm.monitoring_method = ERBMMonitoringMethod(pt_monitoring_method[index]);
175  rbm.max_num_epochs = pt_max_num_epochs[index];
176  rbm.gd_mini_batch_size = pt_gd_mini_batch_size[index];
177  rbm.gd_learning_rate = pt_gd_learning_rate[index];
178  rbm.gd_learning_rate_decay = pt_gd_learning_rate_decay[index];
179  rbm.gd_momentum = pt_gd_momentum[index];
180 
181  if (index > 0)
182  {
183  CDenseFeatures<float64_t>* transformed_features =
184  transform(features, index);
185  rbm.train(transformed_features);
186  SG_UNREF(transformed_features);
187  }
188  else
189  rbm.train(features);
190 
191  SGVector<float64_t> rbm_b = rbm.get_visible_bias();
192  SGVector<float64_t> rbm_c = rbm.get_hidden_bias();
193  SGMatrix<float64_t> rbm_w = rbm.get_weights();
194 
195  SGVector<float64_t> dbn_b = get_biases(index);
196  SGVector<float64_t> dbn_c = get_biases(index+1);
197  SGMatrix<float64_t> dbn_w = get_weights(index);
198 
199  for (int32_t i=0; i<dbn_b.vlen; i++)
200  dbn_b[i] = rbm_b[i];
201 
202  for (int32_t i=0; i<dbn_c.vlen; i++)
203  dbn_c[i] = rbm_c[i];
204 
205  for (int32_t i=0; i<dbn_w.num_rows*dbn_w.num_cols; i++)
206  dbn_w[i] = rbm_w[i];
207 }
208 
210 {
211  REQUIRE(features != NULL, "Invalid (NULL) feature pointer\n");
213  "Number of features (%i) must match the DBN's number of visible units "
214  "(%i)\n", features->get_num_features(), m_layer_sizes->element(0));
215 
216  SGMatrix<float64_t> inputs = features->get_feature_matrix();
217 
218  int32_t training_set_size = inputs.num_cols;
219  if (gd_mini_batch_size==0) gd_mini_batch_size = training_set_size;
221 
222  SGVector<float64_t> rec_params(m_num_params);
223  for (int32_t i=0; i<rec_params.vlen; i++)
224  rec_params[i] = m_params[i];
225 
227  SGVector<float64_t> rec_gradients(m_num_params);
228  gradients.zero();
229  rec_gradients.zero();
230 
231  SGVector<float64_t> param_updates(m_num_params);
232  SGVector<float64_t> rec_param_updates(m_num_params);
233  param_updates.zero();
234  rec_param_updates.zero();
235 
236  SGMatrixList<float64_t> sleep_states = m_states;
238  SGMatrixList<float64_t> psleep_states(m_num_layers);
240 
241  for (int32_t i=0; i<m_num_layers; i++)
242  {
246  }
247 
248  CRBM top_rbm(m_layer_sizes->element(m_num_layers-1));
249  if (m_num_layers > 2)
250  top_rbm.add_visible_group(m_layer_sizes->element(m_num_layers-2), RBMVUT_BINARY);
251  else
252  top_rbm.add_visible_group(m_layer_sizes->element(0), m_visible_units_type);
253 
254  top_rbm.initialize_neural_network();
255  top_rbm.m_params = SGVector<float64_t>(
256  m_params.vector+m_bias_index_offsets[m_num_layers-2],
257  top_rbm.get_num_parameters(), false);
258 
259  top_rbm.cd_num_steps = cd_num_steps;
260  top_rbm.cd_persistent = false;
261  top_rbm.set_batch_size(gd_mini_batch_size);
262 
263  float64_t alpha = gd_learning_rate;
264 
265  int32_t counter = 0;
266  for (int32_t i=0; i<max_num_epochs; i++)
267  {
268  for (int32_t j=0; j < training_set_size; j += gd_mini_batch_size)
269  {
270  alpha = gd_learning_rate_decay*alpha;
271 
272  if (j+gd_mini_batch_size>training_set_size)
273  j = training_set_size-gd_mini_batch_size;
274 
275  SGMatrix<float64_t> inputs_batch(inputs.matrix+j*inputs.num_rows,
276  inputs.num_rows, gd_mini_batch_size, false);
277 
278  for (int32_t k=0; k<m_num_params; k++)
279  {
280  m_params[k] += gd_momentum*param_updates[k];
281  rec_params[k] += gd_momentum*rec_param_updates[k];
282  }
283 
284  wake_sleep(inputs_batch, &top_rbm, sleep_states, wake_states,
285  psleep_states, pwake_states, m_params,
286  rec_params, gradients, rec_gradients);
287 
288  for (int32_t k=0; k<m_num_params; k++)
289  {
290  param_updates[k] = gd_momentum*param_updates[k]
291  -alpha*gradients[k];
292  m_params[k] -= alpha*gradients[k];
293 
294  rec_param_updates[k] = gd_momentum*rec_param_updates[k]
295  -alpha*rec_gradients[k];
296  rec_params[k] -= alpha*rec_gradients[k];
297  }
298 
299  if (counter%monitoring_interval == 0)
300  {
301  SGMatrix<float64_t> reconstruction = sleep_states[0];
302  float64_t error = 0;
303  for (int32_t k=0; k<inputs_batch.num_rows*inputs_batch.num_cols; k++)
304  error += CMath::pow(reconstruction[k]-inputs_batch[k],2);
305 
306  error /= m_batch_size;
307 
308  SG_INFO("Epoch %i: reconstruction Error = %f\n",i, error);
309  }
310  counter++;
311  }
312  }
313 }
314 
316  CDenseFeatures< float64_t >* features, int32_t i)
317 {
318  if (i==-1)
319  i = m_num_layers-1;
320 
321  SGMatrix<float64_t> transformed_feature_matrix = features->get_feature_matrix();
322  for (int32_t h=1; h<=i; h++)
323  {
325  up_step(h, m_params, transformed_feature_matrix, m, false);
326  transformed_feature_matrix = m;
327  }
328 
329  return new CDenseFeatures<float64_t>(transformed_feature_matrix);
330 }
331 
333  int32_t num_gibbs_steps, int32_t batch_size)
334 {
335  set_batch_size(batch_size);
336 
337  for (int32_t i=0; i<num_gibbs_steps; i++)
338  {
343  }
344 
345  for (int32_t i=m_num_layers-3; i>=0; i--)
346  down_step(i, m_params, m_states[i+1], m_states[i]);
347 
348  return new CDenseFeatures<float64_t>(m_states[0]);
349 }
350 
352 {
354 
355  for (int32_t i=0; i<s.num_rows*s.num_cols; i++)
356  s[i] = CMath::random(0.0,1.0) > 0.5;
357 }
358 
360  CNeuralLayer* output_layer, float64_t sigma)
361 {
363 
365 
366  for (int32_t i=1; i<m_num_layers; i++)
368 
369  if (output_layer!=NULL)
370  layers->append_element(output_layer);
371 
372  CNeuralNetwork* network = new CNeuralNetwork(layers);
373 
374  network->quick_connect();
375  network->initialize_neural_network(sigma);
376 
377  for (int32_t i=1; i<m_num_layers; i++)
378  {
381 
382  for (int32_t j=0; j<b.vlen; j++)
383  network->m_params[j+network->m_index_offsets[i]] = b[j];
384 
385  for (int32_t j=0; j<W.num_rows*W.num_cols; j++)
386  network->m_params[j+network->m_index_offsets[i]+b.vlen] = W[j];
387  }
388 
389  return network;
390 }
391 
393  SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states)
394 {
395  typedef Eigen::Map<Eigen::MatrixXd> EMatrix;
396  typedef Eigen::Map<Eigen::VectorXd> EVector;
397 
398  EMatrix In(input.matrix, input.num_rows, input.num_cols);
399  EMatrix Out(result.matrix, result.num_rows, result.num_cols);
400  EVector B(get_biases(index,params).vector, m_layer_sizes->element(index));
401 
402  Out.colwise() = B;
403 
404  if (index < m_num_layers-1)
405  {
406  EMatrix W(get_weights(index,params).matrix,
407  m_layer_sizes->element(index+1), m_layer_sizes->element(index));
408  Out += W.transpose()*In;
409  }
410 
411  if (index > 0 || (index==0 && m_visible_units_type==RBMVUT_BINARY))
412  {
413  int32_t len = m_layer_sizes->element(index)*m_batch_size;
414  for (int32_t i=0; i<len; i++)
415  result[i] = 1.0/(1.0+CMath::exp(-1.0*result[i]));
416  }
417 
418  if (index == 0 && m_visible_units_type==RBMVUT_SOFTMAX)
419  {
420  float64_t max = Out.maxCoeff();
421 
422  for (int32_t j=0; j<m_batch_size; j++)
423  {
424  float64_t sum = 0;
425  for (int32_t i=0; i<m_layer_sizes->element(0); i++)
426  sum += CMath::exp(Out(i,j)-max);
427 
428  float64_t normalizer = CMath::log(sum);
429  for (int32_t k=0; k<m_layer_sizes->element(0); k++)
430  Out(k,j) = CMath::exp(Out(k,j)-max-normalizer);
431  }
432  }
433 
434  if (sample_states && index>0)
435  {
436  int32_t len = m_layer_sizes->element(index)*m_batch_size;
437  for (int32_t i=0; i<len; i++)
438  result[i] = CMath::random(0.0,1.0) < result[i];
439  }
440 }
441 
443  SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states)
444 {
445  typedef Eigen::Map<Eigen::MatrixXd> EMatrix;
446  typedef Eigen::Map<Eigen::VectorXd> EVector;
447 
448  EMatrix In(input.matrix, input.num_rows, input.num_cols);
449  EMatrix Out(result.matrix, result.num_rows, result.num_cols);
450  EVector C(get_biases(index, params).vector, m_layer_sizes->element(index));
451 
452  Out.colwise() = C;
453 
454  if (index>0)
455  {
456  EMatrix W(get_weights(index-1, params).matrix,
457  m_layer_sizes->element(index), m_layer_sizes->element(index-1));
458  Out += W*In;
459  }
460 
461  int32_t len = result.num_rows*result.num_cols;
462  for (int32_t i=0; i<len; i++)
463  result[i] = 1.0/(1.0+CMath::exp(-1.0*result[i]));
464 
465  if (sample_states && index>0)
466  {
467  for (int32_t i=0; i<len; i++)
468  result[i] = CMath::random(0.0,1.0) < result[i];
469  }
470 }
471 
473  SGMatrixList<float64_t> sleep_states, SGMatrixList<float64_t> wake_states,
474  SGMatrixList<float64_t> psleep_states, SGMatrixList<float64_t> pwake_states,
475  SGVector<float64_t> gen_params,
476  SGVector<float64_t> rec_params,
477  SGVector<float64_t> gen_gradients,
478  SGVector<float64_t> rec_gradients)
479 {
480  typedef Eigen::Map<Eigen::MatrixXd> EMatrix;
481  typedef Eigen::Map<Eigen::VectorXd> EVector;
482 
483  // Wake phase
484  for (int32_t i=0; i<data.num_rows*data.num_cols; i++)
485  wake_states[0][i] = data[i];
486 
487  for (int32_t i=1; i<m_num_layers-1; i++)
488  up_step(i, rec_params, wake_states[i-1], wake_states[i]);
489 
490  // Contrastive divergence in the top RBM
491  SGVector<float64_t> top_rbm_gradients(
492  gen_gradients.vector+m_bias_index_offsets[m_num_layers-2],
493  top_rbm->get_num_parameters(), false);
494  top_rbm->contrastive_divergence(wake_states[m_num_layers-2], top_rbm_gradients);
495 
496  // Sleep phase
497  sleep_states.set_matrix(m_num_layers-2, top_rbm->visible_state);
498  for (int32_t i=m_num_layers-3; i>=0; i--)
499  down_step(i, gen_params, sleep_states[i+1], sleep_states[i]);
500 
501  // Predictions
502  for (int32_t i=1; i<m_num_layers-1; i++)
503  up_step(i, rec_params, sleep_states[i-1], psleep_states[i]);
504  for (int32_t i=0; i<m_num_layers-2; i++)
505  down_step(i, gen_params, wake_states[i+1], pwake_states[i]);
506 
507  // Gradients for generative parameters
508  for (int32_t i=0; i<m_num_layers-2; i++)
509  {
510  EMatrix wake_i(wake_states[i].matrix,
511  wake_states[i].num_rows, wake_states[i].num_cols);
512  EMatrix wake_i_plus_one(wake_states[i+1].matrix,
513  wake_states[i+1].num_rows, wake_states[i+1].num_cols);
514  EMatrix pwake_i(pwake_states[i].matrix,
515  pwake_states[i].num_rows, pwake_states[i].num_cols);
516 
517  EMatrix WG_gen(get_weights(i,gen_gradients).matrix,
519  EVector BG_gen(get_biases(i,gen_gradients).vector, m_layer_sizes->element(i));
520 
521  pwake_i = pwake_i - wake_i;
522  BG_gen = pwake_i.rowwise().sum()/m_batch_size;
523  WG_gen = wake_i_plus_one*pwake_i.transpose()/m_batch_size;
524  }
525 
526  // Gradients for reconstruction parameters
527  for (int32_t i=1; i<m_num_layers-1; i++)
528  {
529  EMatrix sleep_i(sleep_states[i].matrix,
530  sleep_states[i].num_rows, sleep_states[i].num_cols);
531  EMatrix psleep_i(psleep_states[i].matrix,
532  psleep_states[i].num_rows, psleep_states[i].num_cols);
533  EMatrix sleep_i_minus_one(sleep_states[i-1].matrix,
534  sleep_states[i-1].num_rows, sleep_states[i-1].num_cols);
535 
536  EMatrix WG_rec(get_weights(i-1,rec_gradients).matrix,
538  EVector BG_rec(get_biases(i,rec_gradients).vector, m_layer_sizes->element(i));
539 
540  psleep_i = psleep_i - sleep_i;
541  BG_rec = psleep_i.rowwise().sum()/m_batch_size;
542  WG_rec = psleep_i*sleep_i_minus_one.transpose()/m_batch_size;
543  }
544 }
545 
548 {
549  if (p.vlen==0)
551  m_layer_sizes->element(index+1), m_layer_sizes->element(index), false);
552  else
554  m_layer_sizes->element(index+1), m_layer_sizes->element(index), false);
555 }
556 
559 {
560  if (p.vlen==0)
562  m_layer_sizes->element(index), false);
563  else
565  m_layer_sizes->element(index), false);;
566 }
567 
568 void CDeepBeliefNetwork::init()
569 {
570  cd_num_steps = 1;
571  monitoring_interval = 10;
572 
573  gd_mini_batch_size = 0;
574  max_num_epochs = 1;
575  gd_learning_rate = 0.1;
577  gd_momentum = 0.9;
578 
580  m_num_layers = 0;
582  m_batch_size = 0;
583  m_num_params = 0;
584  m_sigma = 0.01;
585 
586  SG_ADD((machine_int_t*)&m_visible_units_type, "visible_units_type",
587  "Type of the visible units", MS_NOT_AVAILABLE);
588  SG_ADD(&m_num_layers, "num_layers",
589  "Number of layers", MS_NOT_AVAILABLE);
590  SG_ADD((CSGObject**)&m_layer_sizes, "layer_sizes",
591  "Size of each hidden layer", MS_NOT_AVAILABLE);
592 
593  SG_ADD(&m_params, "params",
594  "Parameters of the network", MS_NOT_AVAILABLE);
595  SG_ADD(&m_num_params, "num_params",
596  "Number of parameters", MS_NOT_AVAILABLE);
597  SG_ADD(&m_bias_index_offsets, "bias_index_offsets",
598  "Index offsets of the biases", MS_NOT_AVAILABLE);
599  SG_ADD(&m_weights_index_offsets, "weights_index_offsets",
600  "Index offsets of the weights", MS_NOT_AVAILABLE);
601 
602  SG_ADD(&pt_cd_num_steps, "pt_cd_num_steps",
603  "Pre-training Number of CD Steps", MS_NOT_AVAILABLE);
604  SG_ADD(&pt_cd_persistent, "pt_cd_persistent",
605  "Pre-training Persistent CD", MS_NOT_AVAILABLE);
606  SG_ADD(&pt_cd_sample_visible, "pt_cd_sample_visible",
607  "Pre-training Number of CD Sample Visible", MS_NOT_AVAILABLE);
608  SG_ADD(&pt_l2_coefficient, "pt_l2_coefficient",
609  "Pre-training L2 regularization coeff", MS_NOT_AVAILABLE);
610  SG_ADD(&pt_l1_coefficient, "pt_l1_coefficient",
611  "Pre-training L1 regularization coeff", MS_NOT_AVAILABLE);
612  SG_ADD(&pt_monitoring_interval, "pt_monitoring_interval",
613  "Pre-training Monitoring Interval", MS_NOT_AVAILABLE);
614  SG_ADD(&pt_monitoring_method, "pt_monitoring_method",
615  "Pre-training Monitoring Method", MS_NOT_AVAILABLE);
616  SG_ADD(&pt_cd_num_steps, "pt_gd_mini_batch_size",
617  "Pre-training Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
618  SG_ADD(&pt_max_num_epochs, "pt_max_num_epochs",
619  "Pre-training Max number of Epochs", MS_NOT_AVAILABLE);
620  SG_ADD(&pt_gd_learning_rate, "pt_gd_learning_rate",
621  "Pre-training Gradient descent learning rate", MS_NOT_AVAILABLE);
622  SG_ADD(&pt_gd_learning_rate_decay, "pt_gd_learning_rate_decay",
623  "Pre-training Gradient descent learning rate decay", MS_NOT_AVAILABLE);
624  SG_ADD(&pt_gd_momentum, "pt_gd_momentum",
625  "Pre-training Gradient Descent Momentum", MS_NOT_AVAILABLE);
626 
627  SG_ADD(&cd_num_steps, "cd_num_steps", "Number of CD Steps", MS_NOT_AVAILABLE);
628  SG_ADD(&monitoring_interval, "monitoring_interval",
629  "Monitoring Interval", MS_NOT_AVAILABLE);
630 
631  SG_ADD(&gd_mini_batch_size, "gd_mini_batch_size",
632  "Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
633  SG_ADD(&max_num_epochs, "max_num_epochs",
634  "Max number of Epochs", MS_NOT_AVAILABLE);
635  SG_ADD(&gd_learning_rate, "gd_learning_rate",
636  "Gradient descent learning rate", MS_NOT_AVAILABLE);
637  SG_ADD(&gd_learning_rate_decay, "gd_learning_rate_decay",
638  "Gradient descent learning rate decay", MS_NOT_AVAILABLE);
639  SG_ADD(&gd_momentum, "gd_momentum",
640  "Gradient Descent Momentum", MS_NOT_AVAILABLE);
641 
642  SG_ADD(&m_sigma, "m_sigma", "Initialization Sigma", MS_NOT_AVAILABLE);
643 }
644 
A Restricted Boltzmann Machine.
Definition: RBM.h:122
virtual void wake_sleep(SGMatrix< float64_t > data, CRBM *top_rbm, SGMatrixList< float64_t > sleep_states, SGMatrixList< float64_t > wake_states, SGMatrixList< float64_t > psleep_states, SGMatrixList< float64_t > pwake_states, SGVector< float64_t > gen_params, SGVector< float64_t > rec_params, SGVector< float64_t > gen_gradients, SGVector< float64_t > rec_gradients)
#define SG_INFO(...)
Definition: SGIO.h:118
SGVector< int32_t > m_index_offsets
virtual void train(CDenseFeatures< float64_t > *features)
virtual CDenseFeatures< float64_t > * transform(CDenseFeatures< float64_t > *features, int32_t i=-1)
virtual int32_t get_num_parameters()
Definition: RBM.h:344
SGMatrixList< float64_t > m_states
SGVector< int32_t > pt_gd_mini_batch_size
virtual void initialize_neural_network(float64_t sigma=0.01f)
SGVector< int32_t > m_bias_index_offsets
int32_t get_num_features() const
SGMatrix< ST > get_feature_matrix()
static float32_t normal_random(float32_t mean, float32_t std_dev)
Definition: Math.h:1095
SGVector< float64_t > m_params
virtual void add_visible_group(int32_t num_units, ERBMVisibleUnitType unit_type)
Definition: RBM.cpp:69
CDynamicArray< int32_t > * m_layer_sizes
A generic multi-layer neural network.
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual void pre_train(CDenseFeatures< float64_t > *features)
SGVector< float64_t > pt_gd_momentum
index_t num_cols
Definition: SGMatrix.h:376
virtual void down_step(int32_t index, SGVector< float64_t > params, SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states=true)
Base class for neural network layers.
Definition: NeuralLayer.h:87
SGVector< int32_t > pt_monitoring_interval
virtual void quick_connect()
index_t num_rows
Definition: SGMatrix.h:374
ERBMVisibleUnitType m_visible_units_type
static uint64_t random()
Definition: Math.h:1019
SGVector< int32_t > m_weights_index_offsets
index_t vlen
Definition: SGVector.h:492
virtual void contrastive_divergence(SGMatrix< float64_t > visible_batch, SGVector< float64_t > gradients)
Definition: RBM.cpp:355
virtual void up_step(int32_t index, SGVector< float64_t > params, SGMatrix< float64_t > input, SGMatrix< float64_t > result, bool sample_states=true)
SGVector< float64_t > m_params
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
void set_matrix(index_t index, const SGMatrix< T > matrix)
virtual int32_t get_num_vectors() const
SGVector< float64_t > pt_gd_learning_rate
virtual void add_hidden_layer(int32_t num_units)
double float64_t
Definition: common.h:50
virtual void set_batch_size(int32_t batch_size)
SGVector< float64_t > pt_gd_learning_rate_decay
SGVector< bool > pt_cd_persistent
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
SGVector< float64_t > pt_l1_coefficient
ERBMVisibleUnitType
Definition: RBM.h:54
SGVector< int32_t > pt_monitoring_method
#define SG_UNREF(x)
Definition: SGObject.h:55
SGVector< float64_t > pt_l2_coefficient
Represents an input layer. The layer can be either connected to all the input features that a network...
Neural layer with linear neurons, with a logistic activation function. can be used as a hidden layer ...
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
virtual CNeuralNetwork * convert_to_neural_network(CNeuralLayer *output_layer=NULL, float64_t sigma=0.01)
int machine_int_t
Definition: common.h:59
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
SGMatrix< float64_t > visible_state
Definition: RBM.h:433
virtual CDenseFeatures< float64_t > * sample(int32_t num_gibbs_steps=1, int32_t batch_size=1)
const T & element(int32_t idx1, int32_t idx2=0, int32_t idx3=0) const
Definition: DynamicArray.h:224
SGVector< bool > pt_cd_sample_visible
virtual SGMatrix< float64_t > get_weights(int32_t index, SGVector< float64_t > p=SGVector< float64_t >())
Matrix::Scalar max(Matrix m)
Definition: Redux.h:68
#define SG_ADD(...)
Definition: SGObject.h:84
virtual void initialize_neural_network(float64_t sigma=0.01)
ERBMMonitoringMethod
Definition: RBM.h:48
SGVector< int32_t > pt_max_num_epochs
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
void set_const(T const_elem)
Definition: SGVector.cpp:150
virtual SGVector< float64_t > get_biases(int32_t index, SGVector< float64_t > p=SGVector< float64_t >())
bool append_element(CSGObject *e)
SGVector< int32_t > pt_cd_num_steps

SHOGUN Machine Learning Toolbox - Documentation