From 62cd5c7ae65d085d3ca0dd85ee9a1b30b542d604 Mon Sep 17 00:00:00 2001 From: dzhwinter Date: Sun, 4 Jun 2017 22:38:54 +0800 Subject: [PATCH] "failed to resolve conflict. apply to HEAD" --- paddle/optimizer/CMakeLists.txt | 29 +++++++ paddle/optimizer/Tensor.h | 28 +++++++ paddle/optimizer/adadelta_optimizer.cc | 47 +++++++++++ paddle/optimizer/adadelta_optimizer.h | 35 ++++++++ paddle/optimizer/adagrad_optimizer.cc | 38 +++++++++ paddle/optimizer/adagrad_optimizer.h | 29 +++++++ paddle/optimizer/adam_optimizer.cc | 38 +++++++++ paddle/optimizer/adam_optimizer.h | 29 +++++++ paddle/optimizer/lr_policy.h | 31 +++++++ paddle/optimizer/optimizer.cc | 75 +++++++++++++++++ paddle/optimizer/optimizer.h | 92 +++++++++++++++++++++ paddle/optimizer/optimizer_factory_test.cpp | 32 +++++++ paddle/optimizer/optimizer_test.cpp | 11 +++ paddle/optimizer/parameter_optimizer.cc | 71 ++++++++++++++++ paddle/optimizer/parameter_optimizer.h | 51 ++++++++++++ paddle/optimizer/regularizer.cc | 24 ++++++ paddle/optimizer/regularizer.h | 45 ++++++++++ paddle/optimizer/sgd_optimizer.h | 33 ++++++++ paddle/optimizer/sgd_optmizer.cc | 60 ++++++++++++++ 19 files changed, 798 insertions(+) create mode 100644 paddle/optimizer/CMakeLists.txt create mode 100644 paddle/optimizer/Tensor.h create mode 100644 paddle/optimizer/adadelta_optimizer.cc create mode 100644 paddle/optimizer/adadelta_optimizer.h create mode 100644 paddle/optimizer/adagrad_optimizer.cc create mode 100644 paddle/optimizer/adagrad_optimizer.h create mode 100644 paddle/optimizer/adam_optimizer.cc create mode 100644 paddle/optimizer/adam_optimizer.h create mode 100644 paddle/optimizer/lr_policy.h create mode 100644 paddle/optimizer/optimizer.cc create mode 100644 paddle/optimizer/optimizer.h create mode 100644 paddle/optimizer/optimizer_factory_test.cpp create mode 100644 paddle/optimizer/optimizer_test.cpp create mode 100644 paddle/optimizer/parameter_optimizer.cc create mode 100644 paddle/optimizer/parameter_optimizer.h create mode 100644 paddle/optimizer/regularizer.cc create mode 100644 paddle/optimizer/regularizer.h create mode 100644 paddle/optimizer/sgd_optimizer.h create mode 100644 paddle/optimizer/sgd_optmizer.cc diff --git a/paddle/optimizer/CMakeLists.txt b/paddle/optimizer/CMakeLists.txt new file mode 100644 index 00000000000..134ca9e9d64 --- /dev/null +++ b/paddle/optimizer/CMakeLists.txt @@ -0,0 +1,29 @@ +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +set(OPITMIZER_SRCS + adadelta_optimizer.cc + adagrad_optimizer.cc + adam_optimizer.cc + optimizer.cc + parameter_optimizer.cc + sgd_optmizer.cc + regularizer.cc + ) + +set(OPITMIZER_Headers + adadelta_optimizer.h + adagrad_optimizer.h + adam_optimizer.h + lr_policy.h + optimizer.h + parameter_optimizer.h + regularizer.h + sgd_optimizer.h + Tensor.h + ) + +add_library(optimizer STATIC ${OPITMIZER_SRCS}) +add_dependencies(optimizer gen_proto_cpp) + +add_simple_unittest(optimizer_test) +add_simple_unittest(optimizer_factory_test) diff --git a/paddle/optimizer/Tensor.h b/paddle/optimizer/Tensor.h new file mode 100644 index 00000000000..a8387c4df41 --- /dev/null +++ b/paddle/optimizer/Tensor.h @@ -0,0 +1,28 @@ +#ifndef PADDLE_OPTIMIZER_TENSOR_H_ +#define PADDLE_OPTIMIZER_TENSOR_H_ +/** + * @brief tensor used by optimizer + */ + +#include +#include "paddle/math/BaseMatrix.h" + +namespace paddle { +namespace optimizer { + +template +using TensorBase = BaseMatrixT; + +template +class Tensor : public TensorBase { +public: + Tensor(T* data, int size) : TensorBase(size, 1, 0, data, false, false) {} + T* get_buffer() { return this->data_; } + // TODO: replace with tensorshape + size_t width() { return this->width_; } +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/adadelta_optimizer.cc b/paddle/optimizer/adadelta_optimizer.cc new file mode 100644 index 00000000000..39d465cebe6 --- /dev/null +++ b/paddle/optimizer/adadelta_optimizer.cc @@ -0,0 +1,47 @@ +#include "adadelta_optimizer.h" +#include + +namespace paddle { +namespace optimizer { +template +AdadeltaOptimizer::AdadeltaOptimizer(const ::paddle::OptimizerConfig& config) + : ParameterOptimizer(config) { + rho = config.adadelta().rho(); + epsilon = config.adadelta().epsilon(); + decay = config.adadelta().decay(); +} + +template +void AdadeltaOptimizer::set_weight(const Tensor* p) { + size_t size = p->width(); + T* gptr = new T[size]; + accum_gradient = Tensor(gptr, size); + T* dptr = new T[size]; + accum_delta = Tensor(dtpr, size); + T* dptr_current = new T[size]; + update_delta = Tensor(dptr_current, size); +} + +template +void AdadeltaOptimizer::update(const Tensor& gradient) { + num_sample_passed += 1; + double learning_rate = lr_policy->get_learning_rate(); + for (size_t i = 0; i < parameter_.size(); ++i) { + accum_gradient[i] = + rho * accum_gradient[i] + (1.0 - rho) * gradient[i] * gradient[i]; + + update_delta[i] = std::sqrt(accum_delta[i] + epsilon) / + std::sqrt(accum_gradient[i] + epsilon) * gradient[i]; + + accum_delta[i] = + rho * accum_delta[i] + (1.0 - rho) * update_delta[i] * update_delta[i]; + + parameter_[i] -= update_delta[i] + decay * parameter_[i]; + } +} + +template class AdadeltaOptimizer; +template class AdadeltaOptimizer; + +} // namespace optimizer +} // namespace paddle diff --git a/paddle/optimizer/adadelta_optimizer.h b/paddle/optimizer/adadelta_optimizer.h new file mode 100644 index 00000000000..1a8c03f2682 --- /dev/null +++ b/paddle/optimizer/adadelta_optimizer.h @@ -0,0 +1,35 @@ +#ifndef PADDLE_ADADELTA_OPTIMIZER_H_ +#define PADDLE_ADADELTA_OPTIMIZER_H_ + +#include "parameter_optimizer.h" + +namespace paddle { +namespace optimizer { + +template +class AdadeltaOptimizer : public ParameterOptimizer { +public: + AdadeltaOptimizer(const OptimizerConfig &config); + ~AdadeltaOptimizer() { + if (accum_gradient) delete accum_gradient; + if (accum_delta) delete accum_delta; + if (update_delta) delete update_delta; + } + void update(const Tensor &gradient); + void set_weight(const Tensor *p); + T *get_weight() const; + +private: + Tensor *accum_gradient; + Tensor *accum_delta; + Tensor *update_delta; + + double rho; + double epsilon; + double decay; +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/adagrad_optimizer.cc b/paddle/optimizer/adagrad_optimizer.cc new file mode 100644 index 00000000000..40402a67108 --- /dev/null +++ b/paddle/optimizer/adagrad_optimizer.cc @@ -0,0 +1,38 @@ +#include "adagrad_optimizer.h" + +namespace paddle { +namespace optimizer { +template +AdagradOptimizer::AdagradOptimizer(const ::paddle::OptimizerConfig& config) + : ParameterOptimizer(config) { + epsilon = config.adagrad().epsilon(); + decay = config.adagrad().decay(); +} + +template +void AdagradOptimizer::set_weight(const Tensor* p) { + size_t size = p->width(); + T* gptr = new T[size]; + accum_gradient = Tensor(gptr, size); + T* dptr = new T[size]; + accum_delta = Tensor(dtpr, size); + T* dptr_current = new T[size]; + update_delta = Tensor(dptr_current, size); +} + +template +void AdagradOptimizer::update(const Tensor& gradient) { + num_sample_passed += 1; + double learning_rate = lr_policy->get_learning_rate(); + for (size_t i = 0; i < parameter_.size(); ++i) { + accum_gradient[i] += gradient[i] * gradient[i]; + parameter_[i] += + learning_rate * (gradient[i] / std::sqrt(accum_gradient[i] + epsilon) + + decay * parameter_[i]); + } +} + +template class AdagradOptimizer; +template class AdagradOptimizer; +} // namespace optimizer +} // namespace paddle diff --git a/paddle/optimizer/adagrad_optimizer.h b/paddle/optimizer/adagrad_optimizer.h new file mode 100644 index 00000000000..1ec438fd05a --- /dev/null +++ b/paddle/optimizer/adagrad_optimizer.h @@ -0,0 +1,29 @@ +#ifndef PADDLE_ADAGRAD_OPTIMIZER_H_ +#define PADDLE_ADAGRAD_OPTIMIZER_H_ + +#include "parameter_optimizer.h" + +namespace paddle { +namespace optimizer { + +template +class AdagradOptimizer : public ParameterOptimizer { +public: + AdagradOptimizer(const OptimizerConfig &config); + ~AdagradOptimizer() { + if (accum_gradient) delete accum_gradient; + } + void update(const Tensor &gradient); + void set_weight(const Tensor *p); + T *get_weight() const; + +private: + Tensor *accum_gradient; + double epsilon; + double decay; +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/adam_optimizer.cc b/paddle/optimizer/adam_optimizer.cc new file mode 100644 index 00000000000..c2303c6545e --- /dev/null +++ b/paddle/optimizer/adam_optimizer.cc @@ -0,0 +1,38 @@ +#include "adam_optimizer.h" + +namespace paddle { +namespace optimizer { +template +AdamOptimizer::AdamOptimizer(const ::paddle::OptimizerConfig &config) + : ParameterOptimizer(config) { + beta_1 = config.adam().beta_1(); + beta_2 = config.adam().beta_2(); + epsilon = config.adam().epsilon(); + decay = config.adam().decay(); +} + +template +void AdamOptimizer::set_weight(const Tensor *p) { + size_t size = p->width(); + T *mptr = new T[size]; + momentums_ = Tensor(mptr, size); + T *vptr = new T[size]; + velocitys_ = Tensor(vtpr, size); +} + +template +void AdamOptimizer::update(const Tensor &gradient) { + num_sample_passed += 1; + double learning_rate = lr_policy->get_learning_rate(); + for (size_t i = 0; i < parameter_.size(); ++i) { + accum_gradient[i] += gradient[i] * gradient[i]; + parameter_[i] += + learning_rate * (gradient[i] / std::sqrt(accum_gradient[i] + epsilon) + + decay * parameter_[i]); + } +} + +template class AdamOptimizer; +template class AdamOptimizer; +} // namespace optimizer +} // namespace paddle diff --git a/paddle/optimizer/adam_optimizer.h b/paddle/optimizer/adam_optimizer.h new file mode 100644 index 00000000000..ceec18eb336 --- /dev/null +++ b/paddle/optimizer/adam_optimizer.h @@ -0,0 +1,29 @@ +#ifndef PADDLE_ADAM_OPTIMIZER_H_ +#define PADDLE_ADAM_OPTIMIZER_H_ + +#include "parameter_optimizer.h" + +namespace paddle { +namespace optimizer { + +template +class AdamOptimizer : public ParameterOptimizer { +public: + AdamOptimizer(const OptimizerConfig &config); + ~AdamOptimizer() {} + void update(const Tensor &gradient); + void set_weight(const Tensor *p); + T *get_weight() const; + +private: + Tensor *momentums_; + Tensor *velocitys_; + double beta_1; + double beta_2; + double epsilon; + double decay; +}; + +} // namespace optimizer +} // namespace paddle +#endif diff --git a/paddle/optimizer/lr_policy.h b/paddle/optimizer/lr_policy.h new file mode 100644 index 00000000000..6977b68de7b --- /dev/null +++ b/paddle/optimizer/lr_policy.h @@ -0,0 +1,31 @@ +#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_ +#define PADDLE_OPTIMIZER_LR_POLICY_H_ + +#include "OptimizerConfig.ph.h" + +namespace paddle { +namespace optimizer { + +class BaseLr { +public: + LrPolicyBase(const OpitmizerConfig &config) { + learning_rate = config.lr_config().learning_rate(); + } + virtual double get_learning_rate(const uint64_t num_sample_passed) = 0; + +private: + double learning_rate; +}; + +// constant learning rate policy +class ConstLr final : public BaseLr { +public: + double get_learning_rate(const uint64_t num_sample_passed) { + return learning_rate; + } +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/optimizer.cc b/paddle/optimizer/optimizer.cc new file mode 100644 index 00000000000..e72881e5d0d --- /dev/null +++ b/paddle/optimizer/optimizer.cc @@ -0,0 +1,75 @@ +#include "optimizer.h" +#include + +#include "parameter_optimizer.h" + +template +struct EnumToType {}; + +template +struct TypeToEnum {}; + +#define MATCH_ENUM_TYPE(TYPE, ENUM) \ + template <> \ + struct TypeToEnum { \ + static paddle_element_type v() { return ENUM; }; \ + static constexpr TYPE value = ENUM; +} +; +template <> +struct EnumToType { + typedef TYPE Type; +} + +MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32); +MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32); +MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64); +MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64); +MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32); +MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64); + +struct paddle_optimizer { + /*! \brief optmizer in C++ side */ + + paddle::optimizer::ParameterOptimzier* impl; +}; + +paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, + int config_proto_len) { + paddle_optimizer* optimizer; + std::string config(config_proto, config_proto + config_proto_len); + optimizer->impl->create(config_proto); + return optimizer; +} + +int paddle_release_optimizer(paddle_optimizer* o) { + if (o != nullptr) delete o->impl; + return PADDLE_SUCCESS; +} + +int paddle_update_parameter(paddle_optimizer* o, + paddle_element_type data_type, + const void* grad_buffer, + int num_bytes) { + auto type = EnumToType::Type; + paddle::Tensor gradient(reinterpret_cast(grad_buffer), + num_bytes); + o->impl->update(gradient); + return PADDLE_SUCCESS; +} + +int paddle_optimizer_set_weights(paddle_optimizer* o, + paddle_element_type data_type, + void* param_buffer, + int num_bytes) { + auto type = EnumToType::Type; + paddle::Tensor* param = new paddle::Tensor( + reinterpret_cast(param_buffer), num_bytes); + o->impl->set_weight(param); + return PADDLE_SUCCESS; +} + +void* paddle_optimizer_get_weights(paddle_optimizer* o) { + void* buffer = (void*)o->impl->get_weight(); + return buffer; +} diff --git a/paddle/optimizer/optimizer.h b/paddle/optimizer/optimizer.h new file mode 100644 index 00000000000..0eba2e78118 --- /dev/null +++ b/paddle/optimizer/optimizer.h @@ -0,0 +1,92 @@ +#ifndef PADDLE_LIB_OPTIMIZER_H_ +#define PADDLE_LIB_OPTIMIZER_H_ +#include +#include + +/*! \brief optimizer export C API. which will be used in + Case A, on Trainer (On ParameterServer Client) optimize gradient + + Case B, on ParameterServer side optimize gradient + + To simplify the configuration parsing. optimizer *do not* parse any config + e.g. learning rate should be calculated by the caller + */ + +#ifdef __cplusplus +extern "C" { +#endif +/*! \brief datatypes */ +typedef enum { + PADDLE_ELEMENT_TYPE_INT32 = 0, + PADDLE_ELEMENT_TYPE_UINT32 = 1, + PADDLE_ELEMENT_TYPE_INT64 = 2, + PADDLE_ELEMENT_TYPE_UINT64 = 3, + PADDLE_ELEMENT_TYPE_FLOAT32 = 4, + PADDLE_ELEMENT_TYPE_FLOAT64 = 5, +} paddle_element_type; + +/*! \brief execute status code */ +const int32_t PADDLE_SUCCESS = 0; +const int32_t PADDLE_ERROR = -1; + +typedef struct paddle_optimizer paddle_optimizer; +/** + * this group interface called in order : + * 1. create optimizer with config + * 2. set weights + * 3. update_parameter + * 4. get_weights + * 5. release optimizer + */ + +/** + * @brief create optimizer with proto_config + * @param config_proto, optimizer protobuf, see OptimizerConfig.proto in detail + * @return return optimizer instance + */ +paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto, + int config_proto_len); + +/** + * @brief release optimizer + * @param optimizer + * @return return exec status + */ +int paddle_release_optimizer(paddle_optimizer* o); + +/** + * @brief optimizer instance + * @param datatype of gradient and parameter + * @param gradient, calculate by optimzizer caller. + * TODO(zhihong): just pass loss to reduce communicate overhead. + * Project Adam Ms'14 paper for detail + * @param num_bytes, gradient size + * @return return exec status + */ +int paddle_update_parameter(paddle_optimizer* o, + paddle_element_type data_type, + const void* gradient, + int num_bytes); + +/** + * @brief optimizer instance + * @param data_type datatype of gradient + * @param param_buffer, initilized parameter buffer + * @param num_bytes, parameter size + * @return return exec status + */ +int paddle_optimizer_set_weights(paddle_optimizer* o, + paddle_element_type data_type, + void* param_buffer, + int num_bytes); + +/** + * @brief optimizer instance + * @return return content of parameter buffer in optimizer + */ +void* paddle_optimizer_get_weights(paddle_optimizer* o); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/paddle/optimizer/optimizer_factory_test.cpp b/paddle/optimizer/optimizer_factory_test.cpp new file mode 100644 index 00000000000..67a9506996f --- /dev/null +++ b/paddle/optimizer/optimizer_factory_test.cpp @@ -0,0 +1,32 @@ +#include "optimizer_factory.h" +#include "gtest/gtest.h" +#include "parameter_optimizer.h" + +#define float TestType; + +class OptimizerTest : public testing::Test { +public: + virtual void SetUp() { + paddle::OptimizerConfig config; + config.set_learning_rate(0.01); + config.set_decay(0.0); + config.set_momentum(0.0); + config.set_nesterov(false); + config.set_lr_decay_a(0.9); + config.set_lr_decay_b(0.1); + + std::string config_proto = config.SerializeAsString(); + ParameterOptimizer::create(config_proto, ) + } + virtual void TearDown() {} + +private: + ParameterOptimizer* o; +}; + +TEST_F(OptimizerTest, createOptimizer) {} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/paddle/optimizer/optimizer_test.cpp b/paddle/optimizer/optimizer_test.cpp new file mode 100644 index 00000000000..1bdc6f40fca --- /dev/null +++ b/paddle/optimizer/optimizer_test.cpp @@ -0,0 +1,11 @@ +#include "optimizer.h" +#include "gtest/gtest.h" + +template +class Opitmizer_C_Test : public testing::Test { +private: + Tensor parameter; + Tensor gradient; +}; + +void applyGradientDescent_TEST() {} diff --git a/paddle/optimizer/parameter_optimizer.cc b/paddle/optimizer/parameter_optimizer.cc new file mode 100644 index 00000000000..c5e9e0acc30 --- /dev/null +++ b/paddle/optimizer/parameter_optimizer.cc @@ -0,0 +1,71 @@ +#include "parameter_optimizer.h" +#include +#include "optimizer_factory.h" + +namespace paddle { +namespace optimizer { + +template +ParameterOptimizer *ParameterOptimizer::create( + const ::std::string &config_proto) { + paddle::OptimizerConfig config; + CHECK(config.ParseFromString(config_proto) == 0) + << "error : optimizer config"; + CHECK(config_valid(config) == 0) << "error : invalid optimizer config "; + ParameterOptimizer *opt = nullptr; + switch (config.optimizer_name()) { + case "SGD": + opt = new SGDOptimizer(config); + break; + case "Adagrad": + opt = new AdagradOptimizer(config); + break; + case "Adadelta": + opt = new AdadeltaOptimizer(config); + break; + case "Adam": + opt = new AdamOptimizer(config); + break; + default: + opt = new SGDOptimizer(config); + } + + switch (config.lr_policy()) { + case "ConstLr": + opt.lr_policy = new ConstLr(config); + break; + } + return opt; +} + +template +T *ParameterOptimizer::get_weight() const { + return parameter.get().get_buffer(); +} + +template +char *ParameterOptimizer::get_config_proto() const { + // set config dynamic value for save checkpoint + config_.lr_policy().set_learning_rate( + lr_policy->get_learning_rate(num_sample_passed)); + config_.set_num_sample_passed(num_sample_passed); + config_.set_iterations(iterations); + return config_.SerializeAsString().c_str(); +} + +template +void ParameterOptimizer::set_weight(const Tensor *p) { + parameter_ = p; +} + +template +bool ParameterOptimizer::config_valid(const ::std::string &config) const { + // TODO(zhihong) : add more value checker, failed ASAP + return true; +} + +template class ParameterOptimzier; +template class ParameterOptimzier; + +} // namespace optimizer +} // namespace paddle diff --git a/paddle/optimizer/parameter_optimizer.h b/paddle/optimizer/parameter_optimizer.h new file mode 100644 index 00000000000..d5914857af0 --- /dev/null +++ b/paddle/optimizer/parameter_optimizer.h @@ -0,0 +1,51 @@ +#ifndef PADDLE_PARAMETER_OPTIMIZER_H_ +#define PADDLE_PARAMETER_OPTIMIZER_H_ + +#include +#include +#include +#include "OptimizerConfig.pb.h" +#include "Tensor.h" +#include "lr_policy.h" + +namespace paddle { +namespace optimizer { + +template +class ParameterOptimizer { +public: + /** + * @brief update hook for algorithm need to traverse parameter more than + * once. + */ + ParameterOptimizer(const OptimizerConfig &config) : config_(config){}; + + static ParameterOptimizer *create(const ::std::string &config_proto); + virtual void update(const Tensor &gradient) = 0; + virtual void destroy() = 0; + virtual T *get_weight() const; + virtual void set_weight(const Tensor *parameter); + // package optimizer config proto in runtime for saving checkpoint + virtual char *get_config_proto(); + ~ParameterOptimzier() { delete parameter_; } + +private: + bool config_valid(::std::string &config) const; + OptimizerConfig config_; + Tensor *parameter_; + + // learning rate policy + BaseLr *lr_policy; + uint64_t num_sample_passed; + + ParameterOptimizer(const ParameterOptimizer &) = delete; + ParameterOptimizer &operator=(const ParameterOptimizer &) = delete; + /** + * @brief indicate if use L1, L2 regularizer + */ +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/regularizer.cc b/paddle/optimizer/regularizer.cc new file mode 100644 index 00000000000..dd21c20e711 --- /dev/null +++ b/paddle/optimizer/regularizer.cc @@ -0,0 +1,24 @@ +#include "regularizer.h" + +namespace paddle { +namespace optimizer { + +template +Regularizer* Regularizer::create(const std::string& config) { + paddle::OptimizerConfig config; + Regularizer* r; + if (config.regularizer_type() == paddle::OptimizerConfig_RegularizerType_L1) { + r = new L1Regularizer(config); + } else if (config.regularizer_type() == + paddle::OptimizerConfig_RegularizerType_L2) { + r = new L2Regularizer(config); + break; + } + return r; +} + +template class L1Regularizer; +template class L1Regularizer; + +} // namespace optimizer +} // namespace paddle diff --git a/paddle/optimizer/regularizer.h b/paddle/optimizer/regularizer.h new file mode 100644 index 00000000000..e37211ce230 --- /dev/null +++ b/paddle/optimizer/regularizer.h @@ -0,0 +1,45 @@ +#ifndef PADDLE_OPITMIZER_REGULARIZER_H_ +#define PADDLE_OPTIMIZER_REGULARIZER_H_ + +#include "OptimizerConfig.pb.h" +#include "Tensor.h" + +namespace paddle { +namespace optimizer { + +/** + * @brief regularizer in L1, L2 + */ + +template +class Regularizer { +public: + /** + * @brief regularizer update interface + * @param param need to update + * @return void + */ + static Regularizer *create(const std::string &config); + virtual void update(Tensor ¶meter) = 0; + +private: + std::string regularizer_name; + OptimizerConfig config_; +}; + +template +class L1Regularizer { +public: + void update(Tensor ¶meter); +}; + +template +class L2Regularizer { +public: + void update(Tensor ¶meter); +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/sgd_optimizer.h b/paddle/optimizer/sgd_optimizer.h new file mode 100644 index 00000000000..4e1d9669c96 --- /dev/null +++ b/paddle/optimizer/sgd_optimizer.h @@ -0,0 +1,33 @@ +#ifndef PADDLE_SGD_OPTIMIZER_H_ +#define PADDLE_SGD_OPTIMIZER_H_ + +#include "parameter_optimizer.h" + +namespace paddle { +namespace optimizer { + +template +class SGDOptimizer : public ParameterOptimizer { +public: + SGDOptimizer(const ::paddle::OptimizerConfig& config); + ~SGDOptimizer() { + // clear memory by Tensor library + delete momentums_; + } + void update(const Tensor& gradient); + + void set_weight(const Tensor* p); + T* get_weight() const; + char* get_config_proto(); + +private: + Tensor* momentums_; + double momentum; + double decay; + bool nesterov; +}; + +} // namespace optimizer +} // namespace paddle + +#endif diff --git a/paddle/optimizer/sgd_optmizer.cc b/paddle/optimizer/sgd_optmizer.cc new file mode 100644 index 00000000000..ff23d46dc6f --- /dev/null +++ b/paddle/optimizer/sgd_optmizer.cc @@ -0,0 +1,60 @@ +#include "sgd_optimizer.h" + +namespace paddle { +namespace optimizer { + +template +SGDOptimizer::SGDOptimizer(const ::paddle::OptimizerConfig &config) + : ParameterOptimizer(config) { + momentum = config.sgd().momentum(); + decay = config.sgd().decay(); + nesterov = config.sgd().nesterov(); +} + +template +void SGDOptimizer::set_weight(const Tensor *p) { + // ParameterOptimizer::set_weight(p); + size_t size = p->width(); + // TODO: fix it with align aware allocator bind to Tensor + if (momentum != 0.0) { + T *ptr = new T[size]; + momentums_ = Tensor(ptr, size); + } +} + +template +void SGDOptimizer::update(const Tensor &gradient) { + num_sample_passed += 1; + double learning_rate = lr_policy->get_learning_rate(num_sample_passed); + double velocity = 0.0; + for (size_t i = 0; i < parameter_.size(); ++i) { + if (momentum == 0.0) { + velocity = + -learning_rate * gradient[i] - learning_rate * decay * parameter_[i]; + } else { + momentums_[i] = momentum * momentums_[i] - learning_rate * gradient[i] - + learning_rate * decay * parameter_[i]; + velocity = momentums_[i]; + } + if (nesterov) { + parameter_[i] += momentum * velocity - learning_rate * gradient[i]; + } else { + parameter_[i] += velocity; + } + } +} + +template +char *SGDOptimizer::get_config_proto() { + ParameterOptimizer::get_config_proto(); + config.set_learning_rate(learning_rate); + config.set_decay(decay); + config.set_nesterov(nesterov); + return config.SerializeAsString().c_str(); +} + +template class SGDOptimizer; +template class SGDOptimizer; + +} // namespace optimizer +} // namespace paddle -- GitLab