提交 5b8a0c5d 编写于 作者: D dzhwinter

"optimizer remove init create with proto"

上级 3158efe9
include_directories(${CMAKE_CURRENT_BINARY_DIR}) include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(OPITMIZER_SRCS set(OPITMIZER_SRCS
adadelta_optimizer.cc # adadelta_optimizer.cc
adagrad_optimizer.cc # adagrad_optimizer.cc
adam_optimizer.cc # adam_optimizer.cc
optimizer.cc optimizer.cc
parameter_optimizer.cc parameter_optimizer.cc
sgd_optmizer.cc sgd_optmizer.cc
...@@ -11,9 +11,9 @@ set(OPITMIZER_SRCS ...@@ -11,9 +11,9 @@ set(OPITMIZER_SRCS
) )
set(OPITMIZER_Headers set(OPITMIZER_Headers
adadelta_optimizer.h # adadelta_optimizer.h
adagrad_optimizer.h # adagrad_optimizer.h
adam_optimizer.h # adam_optimizer.h
lr_policy.h lr_policy.h
optimizer.h optimizer.h
parameter_optimizer.h parameter_optimizer.h
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
*/ */
#include <string.h> #include <string.h>
#include "optimizer.h"
#include "paddle/math/BaseMatrix.h" #include "paddle/math/BaseMatrix.h"
namespace paddle { namespace paddle {
...@@ -16,10 +17,14 @@ using TensorBase = BaseMatrixT<T>; ...@@ -16,10 +17,14 @@ using TensorBase = BaseMatrixT<T>;
template <class T> template <class T>
class Tensor : public TensorBase<T> { class Tensor : public TensorBase<T> {
public: public:
Tensor(T* data, int size) : TensorBase<T>(size, 1, 0, data, false, false) {} Tensor(T* data, int size) : TensorBase<T>(1, size, 0, data, false, false) {}
T* get_buffer() { return this->data_; } T* get_buffer() { return this->data_; }
T& operator[](const int idx) {
CHECK(idx >= 0 && idx < this->width_) << " out of index range";
return this->data_[idx];
}
// TODO: replace with tensorshape // TODO: replace with tensorshape
size_t width() { return this->width_; } size_t size() const { return this->width_; }
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -3,21 +3,14 @@ ...@@ -3,21 +3,14 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
template <class T>
AdadeltaOptimizer<T>::AdadeltaOptimizer(const ::paddle::OptimizerConfig& config)
: ParameterOptimizer<T>(config) {
rho = config.adadelta().rho();
epsilon = config.adadelta().epsilon();
decay = config.adadelta().decay();
}
template <class T> template <class T>
void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) { void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) {
size_t size = p->width(); size_t size = p->size();
T* gptr = new T[size]; T* gptr = new T[size];
accum_gradient = Tensor<T>(gptr, size); accum_gradient = Tensor<T>(gptr, size);
T* dptr = new T[size]; T* dptr = new T[size];
accum_delta = Tensor<T>(dtpr, size); accum_delta = Tensor<T>(dptr, size);
T* dptr_current = new T[size]; T* dptr_current = new T[size];
update_delta = Tensor<T>(dptr_current, size); update_delta = Tensor<T>(dptr_current, size);
} }
...@@ -25,8 +18,8 @@ void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) { ...@@ -25,8 +18,8 @@ void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) {
template <class T> template <class T>
void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) { void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
num_sample_passed += 1; num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(); double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
for (size_t i = 0; i < parameter_.size(); ++i) { for (size_t i = 0; i < parameter_->size(); ++i) {
accum_gradient[i] = accum_gradient[i] =
rho * accum_gradient[i] + (1.0 - rho) * gradient[i] * gradient[i]; rho * accum_gradient[i] + (1.0 - rho) * gradient[i] * gradient[i];
...@@ -36,7 +29,8 @@ void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) { ...@@ -36,7 +29,8 @@ void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
accum_delta[i] = accum_delta[i] =
rho * accum_delta[i] + (1.0 - rho) * update_delta[i] * update_delta[i]; rho * accum_delta[i] + (1.0 - rho) * update_delta[i] * update_delta[i];
parameter_[i] -= update_delta[i] + decay * parameter_[i]; parameter_[i] -=
learning_rate * update_delta[i] + learning_rate * decay * parameter_[i];
} }
} }
......
...@@ -9,7 +9,12 @@ namespace optimizer { ...@@ -9,7 +9,12 @@ namespace optimizer {
template <class T> template <class T>
class AdadeltaOptimizer : public ParameterOptimizer<T> { class AdadeltaOptimizer : public ParameterOptimizer<T> {
public: public:
AdadeltaOptimizer(const OptimizerConfig &config); using ParameterOptimizer<T>::parameter_;
using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
AdadeltaOptimizer(double rho, double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer<T>(lr), rho(rho), epsilon(epsilon), decay(decay) {}
~AdadeltaOptimizer() { ~AdadeltaOptimizer() {
if (accum_gradient) delete accum_gradient; if (accum_gradient) delete accum_gradient;
if (accum_delta) delete accum_delta; if (accum_delta) delete accum_delta;
......
...@@ -3,11 +3,6 @@ ...@@ -3,11 +3,6 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
template <class T> template <class T>
AdagradOptimizer<T>::AdagradOptimizer(const ::paddle::OptimizerConfig& config)
: ParameterOptimizer<T>(config) {
epsilon = config.adagrad().epsilon();
decay = config.adagrad().decay();
}
template <class T> template <class T>
void AdagradOptimizer<T>::set_weight(const Tensor<T>* p) { void AdagradOptimizer<T>::set_weight(const Tensor<T>* p) {
......
...@@ -9,7 +9,11 @@ namespace optimizer { ...@@ -9,7 +9,11 @@ namespace optimizer {
template <class T> template <class T>
class AdagradOptimizer : public ParameterOptimizer<T> { class AdagradOptimizer : public ParameterOptimizer<T> {
public: public:
AdagradOptimizer(const OptimizerConfig &config); using ParameterOptimizer<T>::parameter_;
using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
AdagradOptimizer(double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer<T>(lr), epsilon(epsilon), decay(decay) {}
~AdagradOptimizer() { ~AdagradOptimizer() {
if (accum_gradient) delete accum_gradient; if (accum_gradient) delete accum_gradient;
} }
......
...@@ -2,14 +2,6 @@ ...@@ -2,14 +2,6 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
template <class T>
AdamOptimizer<T>::AdamOptimizer(const ::paddle::OptimizerConfig &config)
: ParameterOptimizer<T>(config) {
beta_1 = config.adam().beta_1();
beta_2 = config.adam().beta_2();
epsilon = config.adam().epsilon();
decay = config.adam().decay();
}
template <class T> template <class T>
void AdamOptimizer<T>::set_weight(const Tensor<T> *p) { void AdamOptimizer<T>::set_weight(const Tensor<T> *p) {
...@@ -23,11 +15,16 @@ void AdamOptimizer<T>::set_weight(const Tensor<T> *p) { ...@@ -23,11 +15,16 @@ void AdamOptimizer<T>::set_weight(const Tensor<T> *p) {
template <class T> template <class T>
void AdamOptimizer<T>::update(const Tensor<T> &gradient) { void AdamOptimizer<T>::update(const Tensor<T> &gradient) {
num_sample_passed += 1; num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(); double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
for (size_t i = 0; i < parameter_.size(); ++i) { double coef1 = 1.0 - std::pow(beta_1, num_sample_passed);
accum_gradient[i] += gradient[i] * gradient[i]; double coef2 = 1.0 - std::pow(beta_2, num_sample_passed);
parameter_[i] += learning_rate *= std::sqrt(coef2) / coef1;
learning_rate * (gradient[i] / std::sqrt(accum_gradient[i] + epsilon) + for (size_t i = 0; i < parameter_->size(); ++i) {
momentums_[i] = beta_1 * momentums_[i] + (1.0 - beta_1) * gradient[i];
velocitys_[i] =
beta_2 * velocitys_[i] + (1.0 - beta_2) * gradient[i] * gradient[i];
parameter_[i] -=
learning_rate * (momentums_[i] / std::sqrt(velocitys_[i] + epsilon) +
decay * parameter_[i]); decay * parameter_[i]);
} }
} }
......
...@@ -9,8 +9,20 @@ namespace optimizer { ...@@ -9,8 +9,20 @@ namespace optimizer {
template <class T> template <class T>
class AdamOptimizer : public ParameterOptimizer<T> { class AdamOptimizer : public ParameterOptimizer<T> {
public: public:
AdamOptimizer(const OptimizerConfig &config); using ParameterOptimizer<T>::parameter_;
~AdamOptimizer() {} using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
AdamOptimizer(
double beta_1, double beta_2, double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer<T>(lr),
beta_1(beta_1),
beta_2(beta_2),
epsilon(epsilon),
decay(decay) {}
~AdamOptimizer() {
if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_;
}
void update(const Tensor<T> &gradient); void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p); void set_weight(const Tensor<T> *p);
T *get_weight() const; T *get_weight() const;
......
#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_ #ifndef PADDLE_OPTIMIZER_LR_POLICY_H_
#define PADDLE_OPTIMIZER_LR_POLICY_H_ #define PADDLE_OPTIMIZER_LR_POLICY_H_
#include "OptimizerConfig.ph.h" #include "OptimizerConfig.pb.h"
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
class BaseLr { class BaseLr {
public: public:
LrPolicyBase(const OpitmizerConfig &config) { BaseLr(double lr) : learning_rate(lr) {}
learning_rate = config.lr_config().learning_rate(); virtual ~BaseLr() {}
}
virtual double get_learning_rate(const uint64_t num_sample_passed) = 0; virtual double get_learning_rate(const uint64_t num_sample_passed) = 0;
private: protected:
double learning_rate; double learning_rate;
}; };
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
#include "parameter_optimizer.h" #include "parameter_optimizer.h"
template <class T> template <paddle_element_type T>
struct EnumToType {}; struct EnumToType {};
template <class T> template <class T>
...@@ -11,15 +11,14 @@ struct TypeToEnum {}; ...@@ -11,15 +11,14 @@ struct TypeToEnum {};
#define MATCH_ENUM_TYPE(TYPE, ENUM) \ #define MATCH_ENUM_TYPE(TYPE, ENUM) \
template <> \ template <> \
struct TypeToEnum<ENUM> { \ struct TypeToEnum<TYPE> { \
static paddle_element_type v() { return ENUM; }; \ static paddle_element_type v() { return ENUM; }; \
static constexpr TYPE value = ENUM; static constexpr TYPE value = ENUM; \
} }; \
; template <> \
template <> struct EnumToType<ENUM> { \
struct EnumToType<ENUM> { typedef TYPE Type; \
typedef TYPE Type; }
}
MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32);
MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32);
...@@ -27,11 +26,10 @@ MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); ...@@ -27,11 +26,10 @@ MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64);
MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32);
MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64);
 struct paddle_optimizer {
struct paddle_optimizer {
/*! \brief optmizer in C++ side */ /*! \brief optmizer in C++ side */
paddle::optimizer::ParameterOptimzier* impl; paddle::optimizer::ParameterOptimizerBase* impl;
}; };
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
...@@ -48,7 +46,7 @@ int paddle_release_optimizer(paddle_optimizer* o) { ...@@ -48,7 +46,7 @@ int paddle_release_optimizer(paddle_optimizer* o) {
} }
int paddle_update_parameter(paddle_optimizer* o, int paddle_update_parameter(paddle_optimizer* o,
paddle_element_type data_type, const paddle_element_type data_type,
const void* grad_buffer, const void* grad_buffer,
int num_bytes) { int num_bytes) {
auto type = EnumToType<data_type>::Type; auto type = EnumToType<data_type>::Type;
...@@ -59,7 +57,7 @@ int paddle_update_parameter(paddle_optimizer* o, ...@@ -59,7 +57,7 @@ int paddle_update_parameter(paddle_optimizer* o,
} }
int paddle_optimizer_set_weights(paddle_optimizer* o, int paddle_optimizer_set_weights(paddle_optimizer* o,
paddle_element_type data_type, const paddle_element_type data_type,
void* param_buffer, void* param_buffer,
int num_bytes) { int num_bytes) {
auto type = EnumToType<data_type>::Type; auto type = EnumToType<data_type>::Type;
......
...@@ -64,7 +64,7 @@ int paddle_release_optimizer(paddle_optimizer* o); ...@@ -64,7 +64,7 @@ int paddle_release_optimizer(paddle_optimizer* o);
* @return return exec status * @return return exec status
*/ */
int paddle_update_parameter(paddle_optimizer* o, int paddle_update_parameter(paddle_optimizer* o,
paddle_element_type data_type, const paddle_element_type data_type,
const void* gradient, const void* gradient,
int num_bytes); int num_bytes);
...@@ -76,7 +76,7 @@ int paddle_update_parameter(paddle_optimizer* o, ...@@ -76,7 +76,7 @@ int paddle_update_parameter(paddle_optimizer* o,
* @return return exec status * @return return exec status
*/ */
int paddle_optimizer_set_weights(paddle_optimizer* o, int paddle_optimizer_set_weights(paddle_optimizer* o,
paddle_element_type data_type, const paddle_element_type data_type,
void* param_buffer, void* param_buffer,
int num_bytes); int num_bytes);
......
#include "parameter_optimizer.h"
#include <glog/logging.h> #include <glog/logging.h>
#include "optimizer_factory.h" #include "adadelta_optimizer.h"
#include "adagrad_optimizer.h"
#include "adam_optimizer.h"
#include "lr_policy.h"
#include "sgd_optimizer.h"
#include "parameter_optimizer.h"
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
...@@ -12,29 +17,40 @@ ParameterOptimizer<T> *ParameterOptimizer<T>::create( ...@@ -12,29 +17,40 @@ ParameterOptimizer<T> *ParameterOptimizer<T>::create(
CHECK(config.ParseFromString(config_proto) == 0) CHECK(config.ParseFromString(config_proto) == 0)
<< "error : optimizer config"; << "error : optimizer config";
CHECK(config_valid(config) == 0) << "error : invalid optimizer config "; CHECK(config_valid(config) == 0) << "error : invalid optimizer config ";
BaseLr *lr = nullptr;
switch (config.lr_policy()) {
case "ConstLr":
lr = new ConstLr(config.lr_config().learning_rate());
break;
}
ParameterOptimizer<T> *opt = nullptr; ParameterOptimizer<T> *opt = nullptr;
switch (config.optimizer_name()) { switch (config.optimizer_name()) {
case "SGD": case "SGD":
opt = new SGDOptimizer<T>(config); opt = new SGDOptimizer<T>(config.sgd().momentum(),
config.sgd().decay(),
config.sgd().nesterov(),
lr);
break; break;
case "Adagrad": case "Adagrad":
opt = new AdagradOptimizer<T>(config); opt = new AdagradOptimizer<T>(
config.adagrad().epsilon(), config.adagrad().decay(), lr);
break; break;
case "Adadelta": case "Adadelta":
opt = new AdadeltaOptimizer<T>(config); opt = new AdadeltaOptimizer<T>(config.adadelta().rho(),
config.adadelta().epsilon(),
config.adadelta().decay(),
lr);
break; break;
case "Adam": case "Adam":
opt = new AdamOptimizer<T>(config); opt = new AdamOptimizer<T>(config.adam().beta_1(),
config.adam().beta_2(),
config.adam().epsilon(),
config.adam().decay(),
lr);
break; break;
default:
opt = new SGDOptimizer<T>(config);
} }
switch (config.lr_policy()) {
case "ConstLr":
opt.lr_policy = new ConstLr(config);
break;
}
return opt; return opt;
} }
......
...@@ -11,6 +11,12 @@ ...@@ -11,6 +11,12 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
class ParameterOptimizerBase {
private:
ParameterOptimizerBase(const ParameterOptimizerBase &) = delete;
ParameterOptimizerBase &operator=(const ParameterOptimizerBase &) = delete;
};
template <class T> template <class T>
class ParameterOptimizer { class ParameterOptimizer {
public: public:
...@@ -18,18 +24,18 @@ public: ...@@ -18,18 +24,18 @@ public:
* @brief update hook for algorithm need to traverse parameter more than * @brief update hook for algorithm need to traverse parameter more than
* once. * once.
*/ */
// use config for pack trainig state
ParameterOptimizer(const OptimizerConfig &config) : config_(config){}; ParameterOptimizer(const OptimizerConfig &config) : config_(config){};
ParameterOptimizer(BaseLr *lr) : lr_policy(lr), num_sample_passed(0) {}
virtual ~ParameterOptimizer() { delete parameter_; };
static ParameterOptimizer *create(const ::std::string &config_proto); static ParameterOptimizer *create(const ::std::string &config_proto);
virtual void update(const Tensor &gradient) = 0; virtual void update(const Tensor<T> &gradient) = 0;
virtual void destroy() = 0;
virtual T *get_weight() const; virtual T *get_weight() const;
virtual void set_weight(const Tensor<T> *parameter); virtual void set_weight(const Tensor<T> *parameter);
// package optimizer config proto in runtime for saving checkpoint
virtual char *get_config_proto();
~ParameterOptimzier() { delete parameter_; }
private: public:
bool config_valid(::std::string &config) const; bool config_valid(::std::string &config) const;
OptimizerConfig config_; OptimizerConfig config_;
Tensor<T> *parameter_; Tensor<T> *parameter_;
...@@ -37,12 +43,6 @@ private: ...@@ -37,12 +43,6 @@ private:
// learning rate policy // learning rate policy
BaseLr *lr_policy; BaseLr *lr_policy;
uint64_t num_sample_passed; uint64_t num_sample_passed;
ParameterOptimizer(const ParameterOptimizer &) = delete;
ParameterOptimizer &operator=(const ParameterOptimizer &) = delete;
/**
* @brief indicate if use L1, L2 regularizer
*/
}; };
} // namespace optimizer } // namespace optimizer
......
...@@ -19,6 +19,8 @@ Regularizer<T>* Regularizer<T>::create(const std::string& config) { ...@@ -19,6 +19,8 @@ Regularizer<T>* Regularizer<T>::create(const std::string& config) {
template class L1Regularizer<float>; template class L1Regularizer<float>;
template class L1Regularizer<double>; template class L1Regularizer<double>;
template class L2Regularizer<float>;
template class L2Regularizer<double>;
} // namespace optimizer } // namespace optimizer
} // namespace paddle } // namespace paddle
...@@ -9,8 +9,18 @@ namespace optimizer { ...@@ -9,8 +9,18 @@ namespace optimizer {
template <class T> template <class T>
class SGDOptimizer : public ParameterOptimizer<T> { class SGDOptimizer : public ParameterOptimizer<T> {
public: public:
SGDOptimizer(const ::paddle::OptimizerConfig& config); using ParameterOptimizer<T>::parameter_;
~SGDOptimizer() { using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
SGDOptimizer(double m,
double d,
bool n,
double learning_rate,
uint64_t num_sample_passed,
BaseLr* lr)
: ParameterOptimizer<T>(lr), momentum(m), decay(d), nesterov(n) {}
virtual ~SGDOptimizer() {
// clear memory by Tensor library // clear memory by Tensor library
delete momentums_; delete momentums_;
} }
...@@ -18,7 +28,6 @@ public: ...@@ -18,7 +28,6 @@ public:
void set_weight(const Tensor<T>* p); void set_weight(const Tensor<T>* p);
T* get_weight() const; T* get_weight() const;
char* get_config_proto();
private: private:
Tensor<T>* momentums_; Tensor<T>* momentums_;
......
...@@ -3,18 +3,10 @@ ...@@ -3,18 +3,10 @@
namespace paddle { namespace paddle {
namespace optimizer { namespace optimizer {
template <class T>
SGDOptimizer<T>::SGDOptimizer(const ::paddle::OptimizerConfig &config)
: ParameterOptimizer<T>(config) {
momentum = config.sgd().momentum();
decay = config.sgd().decay();
nesterov = config.sgd().nesterov();
}
template <class T> template <class T>
void SGDOptimizer<T>::set_weight(const Tensor<T> *p) { void SGDOptimizer<T>::set_weight(const Tensor<T> *p) {
// ParameterOptimizer::set_weight(p); // ParameterOptimizer::set_weight(p);
size_t size = p->width(); size_t size = p->size();
// TODO: fix it with align aware allocator bind to Tensor // TODO: fix it with align aware allocator bind to Tensor
if (momentum != 0.0) { if (momentum != 0.0) {
T *ptr = new T[size]; T *ptr = new T[size];
...@@ -27,7 +19,7 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) { ...@@ -27,7 +19,7 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
num_sample_passed += 1; num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed); double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
double velocity = 0.0; double velocity = 0.0;
for (size_t i = 0; i < parameter_.size(); ++i) { Tensor<T> &for (size_t i = 0; i < parameter_->size(); ++i) {
if (momentum == 0.0) { if (momentum == 0.0) {
velocity = velocity =
-learning_rate * gradient[i] - learning_rate * decay * parameter_[i]; -learning_rate * gradient[i] - learning_rate * decay * parameter_[i];
...@@ -44,15 +36,6 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) { ...@@ -44,15 +36,6 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
} }
} }
template <class T>
char *SGDOptimizer<T>::get_config_proto() {
ParameterOptimizer::get_config_proto();
config.set_learning_rate(learning_rate);
config.set_decay(decay);
config.set_nesterov(nesterov);
return config.SerializeAsString().c_str();
}
template class SGDOptimizer<float>; template class SGDOptimizer<float>;
template class SGDOptimizer<double>; template class SGDOptimizer<double>;
......
...@@ -12,7 +12,7 @@ message SGDConfig { ...@@ -12,7 +12,7 @@ message SGDConfig {
optional double momentum = 21 [default = 0.0]; optional double momentum = 21 [default = 0.0];
optional double decay = 23 [default = 0.0]; optional double decay = 23 [default = 0.0];
optional bool nesterov =24 [default = false]; optional bool nesterov =24 [default = false];
}
message AdadeltaConfig { message AdadeltaConfig {
...@@ -95,5 +95,4 @@ message OptimizerConfig { ...@@ -95,5 +95,4 @@ message OptimizerConfig {
// common config of optimizer // common config of optimizer
optional double clipnorm = 101; optional double clipnorm = 101;
optional double clipvalue = 102; optional double clipvalue = 102;
} }
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册