SHOGUN  4.2.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
AdamUpdater.cpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) The Shogun Machine Learning Toolbox
3  * Written (w) 2015 Wu Lin
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this
10  * list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  *
26  * The views and conclusions contained in the software and documentation are those
27  * of the authors and should not be interpreted as representing official policies,
28  * either expressed or implied, of the Shogun Development Team.
29  *
30  */
31 
34 #include <shogun/base/Parameter.h>
35 
36 using namespace shogun;
37 
40 {
41  init();
42 }
43 
45  float64_t first_moment_decay_factor,
46  float64_t second_moment_decay_factor)
48 {
49  init();
50  set_learning_rate(learning_rate);
51  set_epsilon(epsilon);
52  set_first_moment_decay_factor(first_moment_decay_factor);
53  set_second_moment_decay_factor(second_moment_decay_factor);
54 }
55 
57 {
58  REQUIRE(learning_rate>0,"Learning_rate (%f) must be positive\n",
59  learning_rate);
60  m_log_learning_rate=CMath::log(learning_rate);
61 }
62 
64 {
65  REQUIRE(epsilon>0,"Epsilon (%f) must be non-negative\n",
66  epsilon);
67  m_epsilon=epsilon;
68 }
69 
71 {
72  REQUIRE(decay_factor>0.0 && decay_factor<=1.0,
73  "Decay factor (%f) for first moment must in (0,1]\n",
74  decay_factor);
75  m_decay_factor_first_moment=decay_factor;
76 }
77 
79 {
80  REQUIRE(decay_factor>0.0 && decay_factor<=1.0,
81  "Decay factor (%f) for second moment must in (0,1]\n",
82  decay_factor);
83  m_decay_factor_second_moment=decay_factor;
84 }
85 
87 
88 void AdamUpdater::init()
89 {
92  m_epsilon=1e-8;
98 
99  SG_ADD(&m_decay_factor_first_moment, "AdamUpdater__m_decay_factor_first_moment",
100  "decay_factor_first_moment in AdamUpdater", MS_NOT_AVAILABLE);
101  SG_ADD(&m_decay_factor_second_moment, "AdamUpdater__m_decay_factor_second_moment",
102  "decay_factor_second_moment in AdamUpdater", MS_NOT_AVAILABLE);
103  SG_ADD(&m_gradient_first_moment, "AdamUpdater__m_gradient_first_moment",
104  "m_gradient_first_moment in AdamUpdater", MS_NOT_AVAILABLE);
105  SG_ADD(&m_gradient_second_moment, "AdamUpdater__m_gradient_second_moment",
106  "m_gradient_second_moment in AdamUpdater", MS_NOT_AVAILABLE);
107  SG_ADD(&m_epsilon, "AdamUpdater__m_epsilon",
108  "epsilon in AdamUpdater", MS_NOT_AVAILABLE);
109  SG_ADD(&m_log_scale_pre_iteration, "AdamUpdater__m_log_scale_pre_iteration",
110  "log_scale_pre_iteration in AdamUpdater", MS_NOT_AVAILABLE);
111  SG_ADD(&m_log_learning_rate, "AdamUpdater__m_log_learning_rate",
112  "m_log_learning_rate in AdamUpdater", MS_NOT_AVAILABLE);
113  SG_ADD(&m_iteration_counter, "AdamUpdater__m_iteration_counter",
114  "m_iteration_counter in AdamUpdater", MS_NOT_AVAILABLE);
115 }
116 
118  float64_t gradient, index_t idx, float64_t learning_rate)
119 {
120  REQUIRE(idx>=0 && idx<m_gradient_first_moment.vlen, "");
121  REQUIRE(idx>=0 && idx<m_gradient_second_moment.vlen, "");
122 
124  (1.0-m_decay_factor_first_moment)*gradient;
125  m_gradient_first_moment[idx]=scale_first_moment;
126 
127 
129  (1.0-m_decay_factor_second_moment)*gradient*gradient;
130  m_gradient_second_moment[idx]=scale_second_moment;
131 
132  float64_t res=CMath::exp(m_log_scale_pre_iteration)*scale_first_moment/(CMath::sqrt(scale_second_moment)+m_epsilon);
133  return res;
134 }
135 
137  SGVector<float64_t> raw_negative_descend_direction, float64_t learning_rate)
138 {
139  REQUIRE(variable_reference.vlen==raw_negative_descend_direction.vlen, "");
141  {
144 
147  }
148 
152  CMath::log(1.0-CMath::pow(m_decay_factor_first_moment,(float64_t)m_iteration_counter));
153 
154  DescendUpdaterWithCorrection::update_variable(variable_reference, raw_negative_descend_direction,
155  learning_rate);
156 }
SGVector< float64_t > m_gradient_first_moment
Definition: AdamUpdater.h:140
virtual void set_second_moment_decay_factor(float64_t decay_factor)
Definition: AdamUpdater.cpp:78
virtual ~AdamUpdater()
Definition: AdamUpdater.cpp:86
float64_t m_decay_factor_first_moment
Definition: AdamUpdater.h:131
int32_t index_t
Definition: common.h:62
float64_t m_log_learning_rate
Definition: AdamUpdater.h:122
#define REQUIRE(x,...)
Definition: SGIO.h:206
index_t vlen
Definition: SGVector.h:494
virtual void set_learning_rate(float64_t learning_rate)
Definition: AdamUpdater.cpp:56
double float64_t
Definition: common.h:50
SGVector< float64_t > m_gradient_second_moment
Definition: AdamUpdater.h:143
float64_t m_log_scale_pre_iteration
Definition: AdamUpdater.h:137
virtual float64_t get_negative_descend_direction(float64_t variable, float64_t gradient, index_t idx, float64_t learning_rate)
virtual void update_variable(SGVector< float64_t > variable_reference, SGVector< float64_t > raw_negative_descend_direction, float64_t learning_rate)
all of classes and functions are contained in the shogun namespace
Definition: class_list.h:18
This is a base class for descend update with descend based correction.
static float64_t exp(float64_t x)
Definition: Math.h:621
static float64_t log(float64_t v)
Definition: Math.h:922
virtual void update_variable(SGVector< float64_t > variable_reference, SGVector< float64_t > raw_negative_descend_direction, float64_t learning_rate)
#define SG_ADD(...)
Definition: SGObject.h:84
static float32_t sqrt(float32_t x)
Definition: Math.h:459
virtual void set_first_moment_decay_factor(float64_t decay_factor)
Definition: AdamUpdater.cpp:70
virtual void set_epsilon(float64_t epsilon)
Definition: AdamUpdater.cpp:63
static int32_t pow(bool x, int32_t n)
Definition: Math.h:535
int64_t m_iteration_counter
Definition: AdamUpdater.h:128
void set_const(T const_elem)
Definition: SGVector.cpp:150
float64_t m_decay_factor_second_moment
Definition: AdamUpdater.h:134

SHOGUN Machine Learning Toolbox - Documentation