SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
NeuralNetwork.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014, Shogun Toolbox Foundation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7 
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18 
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * Written (W) 2014 Khaled Nasr
32  */
33 
40 
41 using namespace shogun;
42 
44 : CMachine()
45 {
46  init();
47 }
48 
50 {
51  init();
52  set_layers(layers);
53 }
54 
56 {
57  REQUIRE(layers, "Layers should not be NULL")
58 
60  SG_REF(layers);
61  m_layers = layers;
62 
66 
67  m_num_inputs = 0;
68  for (int32_t i=0; i<m_num_layers; i++)
69  {
70  if (get_layer(i)->is_input())
72  }
73 }
74 
75 void CNeuralNetwork::connect(int32_t i, int32_t j)
76 {
77  REQUIRE("i<j", "i(%i) must be less that j(%i)\n", i, j);
78  m_adj_matrix(i,j) = true;
79 }
80 
82 {
84  for (int32_t i=1; i<m_num_layers; i++)
85  m_adj_matrix(i-1, i) = true;
86 }
87 
88 void CNeuralNetwork::disconnect(int32_t i, int32_t j)
89 {
90  m_adj_matrix(i,j) = false;
91 }
92 
94 {
96 }
97 
99 {
100  for (int32_t j=0; j<m_num_layers; j++)
101  {
102  if (!get_layer(j)->is_input())
103  {
104  int32_t num_inputs = 0;
105  for (int32_t i=0; i<m_num_layers; i++)
106  num_inputs += m_adj_matrix(i,j);
107 
108  SGVector<int32_t> input_indices(num_inputs);
109 
110  int32_t k = 0;
111  for (int i=0; i<m_num_layers; i++)
112  {
113  if (m_adj_matrix(i,j))
114  {
115  input_indices[k] = i;
116  k++;
117  }
118  }
119 
120  get_layer(j)->initialize_neural_layer(m_layers, input_indices);
121  }
122  }
123 
125 
127  m_index_offsets[0] = 0;
128  for (int32_t i=1; i<m_num_layers; i++)
129  {
131  m_total_num_parameters += get_layer(i)->get_num_parameters();
132  }
133 
136 
137  m_params.zero();
138  m_param_regularizable.set_const(true);
139 
140  for (int32_t i=0; i<m_num_layers; i++)
141  {
142  SGVector<float64_t> layer_param = get_section(m_params, i);
143  SGVector<bool> layer_param_regularizable =
144  get_section(m_param_regularizable, i);
145 
146  get_layer(i)->initialize_parameters(layer_param,
147  layer_param_regularizable, sigma);
148 
150  }
151 }
152 
154 {
156 }
157 
159 {
160  SGMatrix<float64_t> output_activations = forward_propagate(data);
162 
163  for (int32_t i=0; i<m_batch_size; i++)
164  {
165  if (get_num_outputs()==1)
166  {
167  if (output_activations[i]>0.5) labels->set_label(i, 1);
168  else labels->set_label(i, -1);
169 
170  labels->set_value(output_activations[i], i);
171  }
172  else if (get_num_outputs()==2)
173  {
174  float64_t v1 = output_activations[2*i];
175  float64_t v2 = output_activations[2*i+1];
176  if (v1>v2)
177  labels->set_label(i, 1);
178  else labels->set_label(i, -1);
179 
180  labels->set_value(v2/(v1+v2), i);
181  }
182  }
183 
184  return labels;
185 }
186 
188 {
189  SGMatrix<float64_t> output_activations = forward_propagate(data);
190  SGVector<float64_t> labels_vec(m_batch_size);
191 
192  for (int32_t i=0; i<m_batch_size; i++)
193  labels_vec[i] = output_activations[i];
194 
195  return new CRegressionLabels(labels_vec);
196 }
197 
198 
200 {
201  SGMatrix<float64_t> output_activations = forward_propagate(data);
202  SGVector<float64_t> labels_vec(m_batch_size);
203 
204  for (int32_t i=0; i<m_batch_size; i++)
205  {
206  labels_vec[i] = CMath::arg_max(
207  output_activations.matrix+i*get_num_outputs(), 1, get_num_outputs());
208  }
209 
210  CMulticlassLabels* labels = new CMulticlassLabels(labels_vec);
211 
213  for (int32_t i=0; i<m_batch_size; i++)
214  {
216  output_activations.matrix, get_num_outputs(), i*get_num_outputs()));
217  }
218 
219  return labels;
220 }
221 
224 {
225  SGMatrix<float64_t> output_activations = forward_propagate(data);
226  return new CDenseFeatures<float64_t>(output_activations);
227 }
228 
230 {
232  "Maximum number of epochs (%i) must be >= 0\n", m_max_num_epochs);
233 
236 
237  for (int32_t i=0; i<m_num_layers-1; i++)
238  {
239  get_layer(i)->dropout_prop =
241  }
242  get_layer(m_num_layers-1)->dropout_prop = 0.0;
243 
244  m_is_training = true;
245  for (int32_t i=0; i<m_num_layers; i++)
246  get_layer(i)->is_training = true;
247 
248  bool result = false;
250  result = train_gradient_descent(inputs, targets);
252  result = train_lbfgs(inputs, targets);
253 
254  for (int32_t i=0; i<m_num_layers; i++)
255  get_layer(i)->is_training = false;
256  m_is_training = false;
257 
258  return result;
259 }
260 
262  SGMatrix<float64_t> targets)
263 {
265  "Gradient descent learning rate (%f) must be > 0\n", m_gd_learning_rate);
267  "Gradient descent momentum (%f) must be >= 0\n", m_gd_momentum);
268 
269  int32_t training_set_size = inputs.num_cols;
270  if (m_gd_mini_batch_size==0) m_gd_mini_batch_size = training_set_size;
272 
273  int32_t n_param = get_num_parameters();
274  SGVector<float64_t> gradients(n_param);
275 
276  // needed for momentum
277  SGVector<float64_t> param_updates(n_param);
278  param_updates.zero();
279 
280  float64_t error_last_time = -1.0, error = -1.0;
281 
283  if (c==-1.0)
284  c = 0.99*(float64_t)m_gd_mini_batch_size/training_set_size + 1e-2;
285 
286  bool continue_training = true;
288 
289  for (int32_t i=0; continue_training; i++)
290  {
291  if (m_max_num_epochs!=0)
292  if (i>=m_max_num_epochs) break;
293 
294  for (int32_t j=0; j < training_set_size; j += m_gd_mini_batch_size)
295  {
296  alpha = m_gd_learning_rate_decay*alpha;
297 
298  if (j+m_gd_mini_batch_size>training_set_size)
299  j = training_set_size-m_gd_mini_batch_size;
300 
301  SGMatrix<float64_t> targets_batch(targets.matrix+j*get_num_outputs(),
302  get_num_outputs(), m_gd_mini_batch_size, false);
303 
304  SGMatrix<float64_t> inputs_batch(inputs.matrix+j*m_num_inputs,
305  m_num_inputs, m_gd_mini_batch_size, false);
306 
307  for (int32_t k=0; k<n_param; k++)
308  m_params[k] += m_gd_momentum*param_updates[k];
309 
310  float64_t e = compute_gradients(inputs_batch, targets_batch, gradients);
311 
312 
313  for (int32_t k=0; k<m_num_layers; k++)
314  {
315  SGVector<float64_t> layer_gradients = get_section(gradients, k);
316  if (layer_gradients.vlen > 0)
317  {
318  SG_INFO("Layer %i (%s), Max Gradient: %g, Mean Gradient: %g.\n", k,get_layer(k)->get_name(),
319  CMath::max(layer_gradients.vector, layer_gradients.vlen),
320  SGVector<float64_t>::sum(layer_gradients.vector, layer_gradients.vlen)/layer_gradients.vlen);
321  }
322  }
323 
324  // filter the errors
325  if (error==-1.0)
326  error = e;
327  else
328  error = (1.0-c) * error + c*e;
329 
330  for (int32_t k=0; k<n_param; k++)
331  {
332  param_updates[k] = m_gd_momentum*param_updates[k]
333  -alpha*gradients[k];
334 
335  m_params[k] -= alpha*gradients[k];
336  }
337 
338  if (error_last_time!=-1.0)
339  {
340  float64_t error_change = (error_last_time-error)/error;
341  if (error_change< m_epsilon && error_change>=0)
342  {
343  SG_INFO("Gradient Descent Optimization Converged\n");
344  continue_training = false;
345  break;
346  }
347 
348  SG_INFO("Epoch %i: Error = %f\n",i, error);
349  }
350  error_last_time = error;
351  }
352  }
353 
354  return true;
355 }
356 
358  const SGMatrix<float64_t> targets)
359 {
360  int32_t training_set_size = inputs.num_cols;
361  set_batch_size(training_set_size);
362 
363  lbfgs_parameter_t lbfgs_param;
364  lbfgs_parameter_init(&lbfgs_param);
365  lbfgs_param.max_iterations = m_max_num_epochs;
366  lbfgs_param.epsilon = 0;
367  lbfgs_param.past = 1;
368  lbfgs_param.delta = m_epsilon;
369 
370  m_lbfgs_temp_inputs = &inputs;
371  m_lbfgs_temp_targets = &targets;
372 
373  int32_t result = lbfgs(m_total_num_parameters,
374  m_params,
375  NULL,
376  &CNeuralNetwork::lbfgs_evaluate,
377  &CNeuralNetwork::lbfgs_progress,
378  this,
379  &lbfgs_param);
380 
381  m_lbfgs_temp_inputs = NULL;
382  m_lbfgs_temp_targets = NULL;
383 
384  if (result==LBFGS_SUCCESS || 1)
385  {
386  SG_INFO("L-BFGS Optimization Converged\n");
387  }
388  else if (result==LBFGSERR_MAXIMUMITERATION)
389  {
390  SG_INFO("L-BFGS Max Number of Epochs reached\n");
391  }
392  else
393  {
394  SG_INFO("L-BFGS optimization ended with return code %i\n",result);
395  }
396  return true;
397 }
398 
399 float64_t CNeuralNetwork::lbfgs_evaluate(void* userdata,
400  const float64_t* W,
401  float64_t* grad,
402  const int32_t n,
403  const float64_t step)
404 {
405  CNeuralNetwork* network = static_cast<CNeuralNetwork*>(userdata);
406 
407  SGVector<float64_t> grad_vector(grad, network->get_num_parameters(), false);
408 
409  return network->compute_gradients(*network->m_lbfgs_temp_inputs,
410  *network->m_lbfgs_temp_targets, grad_vector);
411 }
412 
413 int CNeuralNetwork::lbfgs_progress(void* instance,
414  const float64_t* x,
415  const float64_t* g,
416  const float64_t fx,
417  const float64_t xnorm,
418  const float64_t gnorm,
419  const float64_t step,
420  int n, int k, int ls)
421 {
422  SG_SINFO("Epoch %i: Error = %f\n",k, fx);
423 
424  CNeuralNetwork* network = static_cast<CNeuralNetwork*>(instance);
425  SGVector<float64_t> gradients((float64_t*)g, network->get_num_parameters(), false);
426  for (int32_t i=0; i<network->m_num_layers; i++)
427  {
428  SGVector<float64_t> layer_gradients = network->get_section(gradients, i);
429  if (layer_gradients.vlen > 0)
430  {
431  SG_SINFO("Layer %i (%s), Max Gradient: %g, Mean Gradient: %g.\n", i, network->get_layer(i)->get_name(),
432  CMath::max(layer_gradients.vector, layer_gradients.vlen),
433  SGVector<float64_t>::sum(layer_gradients.vector, layer_gradients.vlen)/layer_gradients.vlen);
434  }
435  }
436  return 0;
437 }
438 
440 {
443  return forward_propagate(inputs, j);
444 }
445 
447  SGMatrix<float64_t> inputs, int32_t j)
448 {
449  if (j==-1)
450  j = m_num_layers-1;
451 
452  for (int32_t i=0; i<=j; i++)
453  {
454  CNeuralLayer* layer = get_layer(i);
455 
456  if (layer->is_input())
457  layer->compute_activations(inputs);
458  else
459  layer->compute_activations(get_section(m_params, i), m_layers);
460 
461  layer->dropout_activations();
462  }
463 
464  return get_layer(j)->get_activations();
465 }
466 
468  SGMatrix<float64_t> targets, SGVector<float64_t> gradients)
469 {
470  forward_propagate(inputs);
471 
472  for (int32_t i=0; i<m_num_layers; i++)
474 
475  for (int32_t i=m_num_layers-1; i>=0; i--)
476  {
477  if (i==m_num_layers-1)
478  get_layer(i)->compute_gradients(get_section(m_params,i), targets,
479  m_layers, get_section(gradients,i));
480  else
481  get_layer(i)->compute_gradients(get_section(m_params,i),
482  SGMatrix<float64_t>(), m_layers, get_section(gradients,i));
483  }
484 
485  // L2 regularization
486  if (m_l2_coefficient != 0.0)
487  {
488  for (int32_t i=0; i<m_total_num_parameters; i++)
489  {
490  if (m_param_regularizable[i])
491  gradients[i] += m_l2_coefficient*m_params[i];
492  }
493  }
494 
495  // L1 regularization
496  if (m_l1_coefficient != 0.0)
497  {
498  for (int32_t i=0; i<m_total_num_parameters; i++)
499  {
500  if (m_param_regularizable[i])
501  gradients[i] +=
502  m_l1_coefficient*CMath::sign<float64_t>(m_params[i]);
503  }
504  }
505 
506  // max-norm regularization
507  if (m_max_norm != -1.0)
508  {
509  for (int32_t i=0; i<m_num_layers; i++)
510  {
511  SGVector<float64_t> layer_params = get_section(m_params,i);
512  get_layer(i)->enforce_max_norm(layer_params, m_max_norm);
513  }
514  }
515 
516  return compute_error(targets);
517 }
518 
520 {
521  float64_t error = get_layer(m_num_layers-1)->compute_error(targets);
522 
523  // L2 regularization
524  if (m_l2_coefficient != 0.0)
525  {
526  for (int32_t i=0; i<m_total_num_parameters; i++)
527  {
528  if (m_param_regularizable[i])
529  error += 0.5*m_l2_coefficient*m_params[i]*m_params[i];
530  }
531  }
532 
533  // L1 regularization
534  if (m_l1_coefficient != 0.0)
535  {
536  for (int32_t i=0; i<m_total_num_parameters; i++)
537  {
538  if (m_param_regularizable[i])
539  error += m_l1_coefficient*CMath::abs(m_params[i]);
540  }
541  }
542 
543  return error;
544 }
545 
547  SGMatrix<float64_t> targets)
548 {
549  forward_propagate(inputs);
550  return compute_error(targets);
551 }
552 
553 
555 {
556  // some random inputs and ouputs
559 
560  for (int32_t i=0; i<x.num_rows; i++)
561  x[i] = CMath::random(0.0,1.0);
562 
563  // the outputs are set up in the form of a probability distribution (in case
564  // that is required by the output layer, i.e softmax)
565  for (int32_t i=0; i<y.num_rows; i++)
566  y[i] = CMath::random(0.0,1.0);
567 
569  for (int32_t i=0; i<y.num_rows; i++)
570  y[i] /= y_sum;
571 
572  set_batch_size(1);
573 
574  // numerically compute gradients
575  SGVector<float64_t> gradients_numerical(m_total_num_parameters);
576 
577  for (int32_t i=0; i<m_total_num_parameters; i++)
578  {
579  float64_t c =
580  CMath::max<float64_t>(CMath::abs(approx_epsilon*m_params[i]),s);
581 
582  m_params[i] += c;
583  float64_t error_plus = compute_error(x,y);
584  m_params[i] -= 2*c;
585  float64_t error_minus = compute_error(x,y);
586  m_params[i] += c;
587 
588  gradients_numerical[i] = (error_plus-error_minus)/(2*c);
589  }
590 
591  // compute gradients using backpropagation
592  SGVector<float64_t> gradients_backprop(m_total_num_parameters);
593  compute_gradients(x, y, gradients_backprop);
594 
595  float64_t sum = 0.0;
596  for (int32_t i=0; i<m_total_num_parameters; i++)
597  {
598  sum += CMath::abs(gradients_backprop[i]-gradients_numerical[i]);
599  }
600 
601  return sum/m_total_num_parameters;
602 }
603 
604 void CNeuralNetwork::set_batch_size(int32_t batch_size)
605 {
606  if (batch_size!=m_batch_size)
607  {
608  m_batch_size = batch_size;
609  for (int32_t i=0; i<m_num_layers; i++)
611  }
612 }
613 
615 {
616  REQUIRE(features != NULL, "Invalid (NULL) feature pointer\n");
617  REQUIRE(features->get_feature_type() == F_DREAL,
618  "Feature type must be F_DREAL\n");
619  REQUIRE(features->get_feature_class() == C_DENSE,
620  "Feature class must be C_DENSE\n");
621 
623  REQUIRE(inputs->get_num_features()==m_num_inputs,
624  "Number of features (%i) must match the network's number of inputs "
625  "(%i)\n", inputs->get_num_features(), get_num_inputs());
626 
627  return inputs->get_feature_matrix();
628 }
629 
631 {
632  REQUIRE(labs != NULL, "Invalid (NULL) labels pointer\n");
633 
635  targets.zero();
636 
637  if (labs->get_label_type() == LT_MULTICLASS)
638  {
639  CMulticlassLabels* labels_mc = (CMulticlassLabels*) labs;
640  REQUIRE(labels_mc->get_num_classes()==get_num_outputs(),
641  "Number of classes (%i) must match the network's number of "
642  "outputs (%i)\n", labels_mc->get_num_classes(), get_num_outputs());
643 
644  for (int32_t i=0; i<labels_mc->get_num_labels(); i++)
645  targets[((int32_t)labels_mc->get_label(i))+ i*get_num_outputs()]
646  = 1.0;
647  }
648  else if (labs->get_label_type() == LT_BINARY)
649  {
650  CBinaryLabels* labels_bin = (CBinaryLabels*) labs;
651  if (get_num_outputs()==1)
652  {
653  for (int32_t i=0; i<labels_bin->get_num_labels(); i++)
654  targets[i] = (labels_bin->get_label(i)==1);
655  }
656  else if (get_num_outputs()==2)
657  {
658  for (int32_t i=0; i<labels_bin->get_num_labels(); i++)
659  {
660  targets[i*2] = (labels_bin->get_label(i)==1);
661  targets[i*2+1] = (labels_bin->get_label(i)==-1);
662  }
663  }
664  }
665  else if (labs->get_label_type() == LT_REGRESSION)
666  {
667  CRegressionLabels* labels_reg = (CRegressionLabels*) labs;
668  for (int32_t i=0; i<labels_reg->get_num_labels(); i++)
669  targets[i] = labels_reg->get_label(i);
670  }
671 
672  return targets;
673 }
674 
676 {
677  // problem type depends on the type of labels given to the network
678  // if no labels are given yet, just return PT_MULTICLASS
679  if (m_labels==NULL)
680  return PT_MULTICLASS;
681 
683  return PT_BINARY;
684  else if (m_labels->get_label_type() == LT_REGRESSION)
685  return PT_REGRESSION;
686  else return PT_MULTICLASS;
687 }
688 
690 {
691  return (lab->get_label_type() == LT_MULTICLASS ||
692  lab->get_label_type() == LT_BINARY ||
693  lab->get_label_type() == LT_REGRESSION);
694 }
695 
697 {
698  if (lab->get_label_type() == LT_BINARY)
699  {
700  REQUIRE(get_num_outputs() <= 2, "Cannot use %s in a neural network "
701  "with more that 2 output neurons\n", lab->get_name());
702  }
703  else if (lab->get_label_type() == LT_REGRESSION)
704  {
705  REQUIRE(get_num_outputs() == 1, "Cannot use %s in a neural network "
706  "with more that 1 output neuron\n", lab->get_name());
707  }
708 
710 }
711 
713 {
714  REQUIRE(i<m_num_layers && i >= 0, "Layer index (%i) out of range\n", i);
715 
716  int32_t n = get_layer(i)->get_num_parameters();
718 
719  memcpy(p->vector, get_section(m_params, i), n*sizeof(float64_t));
720  return p;
721 }
722 
724 {
725  CNeuralLayer* layer = (CNeuralLayer*)m_layers->element(i);
726  // needed because m_layers->element(i) increases the reference count of
727  // layer i
728  SG_UNREF(layer);
729  return layer;
730 }
731 
732 template <class T>
733 SGVector<T> CNeuralNetwork::get_section(SGVector<T> v, int32_t i)
734 {
735  return SGVector<T>(v.vector+m_index_offsets[i],
736  get_layer(i)->get_num_parameters(), false);
737 }
738 
740 {
742 }
743 
745 {
746  SG_REF(m_layers);
747  return m_layers;
748 }
749 
750 void CNeuralNetwork::init()
751 {
753  m_dropout_hidden = 0.0;
754  m_dropout_input = 0.0;
755  m_max_norm = -1.0;
756  m_l2_coefficient = 0.0;
757  m_l1_coefficient = 0.0;
759  m_max_num_epochs = 0;
760  m_gd_learning_rate = 0.1;
762  m_gd_momentum = 0.9;
764  m_epsilon = 1.0e-5;
765  m_num_inputs = 0;
766  m_num_layers = 0;
767  m_layers = NULL;
769  m_batch_size = 1;
770  m_lbfgs_temp_inputs = NULL;
771  m_lbfgs_temp_targets = NULL;
772  m_is_training = false;
773 
774  SG_ADD((machine_int_t*)&m_optimization_method, "optimization_method",
775  "Optimization Method", MS_NOT_AVAILABLE);
776  SG_ADD(&m_gd_mini_batch_size, "gd_mini_batch_size",
777  "Gradient Descent Mini-batch size", MS_NOT_AVAILABLE);
778  SG_ADD(&m_max_num_epochs, "max_num_epochs",
779  "Max number of Epochs", MS_NOT_AVAILABLE);
780  SG_ADD(&m_gd_learning_rate, "gd_learning_rate",
781  "Gradient descent learning rate", MS_NOT_AVAILABLE);
782  SG_ADD(&m_gd_learning_rate_decay, "gd_learning_rate_decay",
783  "Gradient descent learning rate decay", MS_NOT_AVAILABLE);
784  SG_ADD(&m_gd_momentum, "gd_momentum",
785  "Gradient Descent Momentum", MS_NOT_AVAILABLE);
786  SG_ADD(&m_gd_error_damping_coeff, "gd_error_damping_coeff",
787  "Gradient Descent Error Damping Coeff", MS_NOT_AVAILABLE);
788  SG_ADD(&m_epsilon, "epsilon",
789  "Epsilon", MS_NOT_AVAILABLE);
790  SG_ADD(&m_num_inputs, "num_inputs",
791  "Number of Inputs", MS_NOT_AVAILABLE);
792  SG_ADD(&m_num_layers, "num_layers",
793  "Number of Layers", MS_NOT_AVAILABLE);
794  SG_ADD(&m_adj_matrix, "adj_matrix",
795  "Adjacency Matrix", MS_NOT_AVAILABLE);
796  SG_ADD(&m_l2_coefficient, "l2_coefficient",
797  "L2 regularization coeff", MS_NOT_AVAILABLE);
798  SG_ADD(&m_l1_coefficient, "l1_coefficient",
799  "L1 regularization coeff", MS_NOT_AVAILABLE);
800  SG_ADD(&m_dropout_hidden, "dropout_hidden",
801  "Hidden neuron dropout probability", MS_NOT_AVAILABLE);
802  SG_ADD(&m_dropout_input, "dropout_input",
803  "Input neuron dropout probability", MS_NOT_AVAILABLE);
804  SG_ADD(&m_max_norm, "max_norm",
805  "Max Norm", MS_NOT_AVAILABLE);
806  SG_ADD(&m_total_num_parameters, "total_num_parameters",
807  "Total number of parameters", MS_NOT_AVAILABLE);
808  SG_ADD(&m_index_offsets, "index_offsets",
809  "Index Offsets", MS_NOT_AVAILABLE);
810  SG_ADD(&m_params, "params",
811  "Parameters", MS_NOT_AVAILABLE);
812  SG_ADD(&m_param_regularizable, "param_regularizable",
813  "Parameter Regularizable", MS_NOT_AVAILABLE);
814  SG_ADD((CSGObject**)&m_layers, "layers",
815  "DynamicObjectArray of NeuralNetwork objects",
817  SG_ADD(&m_is_training, "is_training",
818  "is_training", MS_NOT_AVAILABLE);
819 }
virtual const char * get_name() const =0
void allocate_confidences_for(int32_t n_classes)
virtual float64_t compute_error(SGMatrix< float64_t > targets)
Definition: NeuralLayer.h:206
#define SG_INFO(...)
Definition: SGIO.h:118
SGVector< int32_t > m_index_offsets
virtual CBinaryLabels * apply_binary(CFeatures *data)
virtual ELabelType get_label_type() const =0
binary labels +1/-1
Definition: LabelTypes.h:18
Real Labels are real-valued labels.
virtual int32_t get_num_parameters()
Definition: NeuralLayer.h:281
virtual void initialize_neural_network(float64_t sigma=0.01f)
int32_t lbfgs(int32_t n, float64_t *x, float64_t *ptr_fx, lbfgs_evaluate_t proc_evaluate, lbfgs_progress_t proc_progress, void *instance, lbfgs_parameter_t *_param, lbfgs_adjust_step_t proc_adjust_step)
Definition: lbfgs.cpp:208
static int32_t arg_max(T *vec, int32_t inc, int32_t len, T *maxv_ptr=NULL)
Definition: Math.h:262
virtual const char * get_name() const
virtual int32_t get_num_labels() const
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:43
virtual int32_t get_num_labels() const =0
real valued labels (e.g. for regression, classifier outputs)
Definition: LabelTypes.h:22
float64_t m_gd_error_damping_coeff
multi-class labels 0,1,...
Definition: LabelTypes.h:20
virtual bool train_machine(CFeatures *data=NULL)
SGVector< float64_t > m_params
virtual void compute_gradients(SGVector< float64_t > parameters, SGMatrix< float64_t > targets, CDynamicObjectArray *layers, SGVector< float64_t > parameter_gradients)
Definition: NeuralLayer.h:195
virtual int32_t get_num_vectors() const =0
virtual SGMatrix< float64_t > get_activation_gradients()
Definition: NeuralLayer.h:294
CLabels * m_labels
Definition: Machine.h:361
A generic multi-layer neural network.
#define REQUIRE(x,...)
Definition: SGIO.h:206
virtual void initialize_neural_layer(CDynamicObjectArray *layers, SGVector< int32_t > input_indices)
Definition: NeuralLayer.cpp:61
index_t num_cols
Definition: SGMatrix.h:376
virtual int32_t get_num_neurons()
Definition: NeuralLayer.h:251
SGMatrix< bool > m_adj_matrix
float64_t get_label(int32_t idx)
SGMatrix< float64_t > features_to_matrix(CFeatures *features)
virtual void disconnect(int32_t i, int32_t j)
Base class for neural network layers.
Definition: NeuralLayer.h:87
virtual bool train_gradient_descent(SGMatrix< float64_t > inputs, SGMatrix< float64_t > targets)
virtual void enforce_max_norm(SGVector< float64_t > parameters, float64_t max_norm)
Definition: NeuralLayer.h:216
virtual void quick_connect()
virtual SGMatrix< float64_t > get_activations()
Definition: NeuralLayer.h:287
#define SG_REF(x)
Definition: SGObject.h:54
index_t num_rows
Definition: SGMatrix.h:374
virtual float64_t compute_error(SGMatrix< float64_t > inputs, SGMatrix< float64_t > targets)
A generic learning machine interface.
Definition: Machine.h:143
static uint64_t random()
Definition: Math.h:1019
bool set_label(int32_t idx, float64_t label)
virtual const char * get_name() const
Definition: NeuralLayer.h:315
SGVector< bool > m_param_regularizable
virtual CMulticlassLabels * apply_multiclass(CFeatures *data)
Multiclass Labels for multi-class classification.
index_t vlen
Definition: SGVector.h:494
virtual void initialize_parameters(SGVector< float64_t > parameters, SGVector< bool > parameter_regularizable, float64_t sigma)
Definition: NeuralLayer.h:143
EProblemType
Definition: Machine.h:110
Class SGObject is the base class of all shogun objects.
Definition: SGObject.h:115
CDynamicObjectArray * m_layers
CSGObject * element(int32_t idx1, int32_t idx2=0, int32_t idx3=0)
virtual void connect(int32_t i, int32_t j)
virtual void set_batch_size(int32_t batch_size)
virtual void disconnect_all()
double float64_t
Definition: common.h:50
virtual void set_value(float64_t value, int32_t idx)
Definition: Labels.cpp:66
virtual CRegressionLabels * apply_regression(CFeatures *data)
static T sum(T *vec, int32_t len)
Return sum(vec)
Definition: SGVector.h:354
virtual EFeatureClass get_feature_class() const =0
static T max(T a, T b)
Definition: Math.h:168
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
virtual void dropout_activations()
Definition: NeuralLayer.cpp:90
ENNOptimizationMethod m_optimization_method
float64_t m_gd_learning_rate_decay
CDynamicObjectArray * get_layers()
virtual float64_t check_gradients(float64_t approx_epsilon=1.0e-3, float64_t s=1.0e-9)
CNeuralLayer * get_layer(int32_t i)
void set_multiclass_confidences(int32_t i, SGVector< float64_t > confidences)
virtual bool is_label_valid(CLabels *lab) const
#define SG_UNREF(x)
Definition: SGObject.h:55
virtual CDenseFeatures< float64_t > * transform(CDenseFeatures< float64_t > *data)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
float64_t dropout_prop
Definition: NeuralLayer.h:327
virtual void set_labels(CLabels *lab)
virtual void set_batch_size(int32_t batch_size)
Definition: NeuralLayer.cpp:75
int machine_int_t
Definition: common.h:59
virtual bool train_lbfgs(SGMatrix< float64_t > inputs, SGMatrix< float64_t > targets)
The class Features is the base class of all feature objects.
Definition: Features.h:68
virtual void compute_activations(SGMatrix< float64_t > inputs)
Definition: NeuralLayer.h:153
#define SG_SINFO(...)
Definition: SGIO.h:173
SGMatrix< float64_t > labels_to_matrix(CLabels *labs)
virtual SGMatrix< float64_t > forward_propagate(CFeatures *data, int32_t j=-1)
virtual EProblemType get_machine_problem_type() const
Binary Labels for binary classification.
Definition: BinaryLabels.h:37
virtual void set_layers(CDynamicObjectArray *layers)
#define SG_ADD(...)
Definition: SGObject.h:84
SGVector< float64_t > * get_layer_parameters(int32_t i)
virtual bool is_input()
Definition: NeuralLayer.h:127
virtual void set_labels(CLabels *lab)
Definition: Machine.cpp:65
void set_const(T const_elem)
Definition: SGVector.cpp:150
virtual EFeatureType get_feature_type() const =0
virtual float64_t compute_gradients(SGMatrix< float64_t > inputs, SGMatrix< float64_t > targets, SGVector< float64_t > gradients)
static T abs(T a)
Definition: Math.h:179
void lbfgs_parameter_init(lbfgs_parameter_t *param)
Definition: lbfgs.cpp:203

SHOGUN Machine Learning Toolbox - Documentation