提交 62cd5c7a 编写于 作者: D dzhwinter

"failed to resolve conflict. apply to HEAD"

上级 e1dbb2fc
include_directories(${CMAKE_CURRENT_BINARY_DIR})
set(OPITMIZER_SRCS
adadelta_optimizer.cc
adagrad_optimizer.cc
adam_optimizer.cc
optimizer.cc
parameter_optimizer.cc
sgd_optmizer.cc
regularizer.cc
)
set(OPITMIZER_Headers
adadelta_optimizer.h
adagrad_optimizer.h
adam_optimizer.h
lr_policy.h
optimizer.h
parameter_optimizer.h
regularizer.h
sgd_optimizer.h
Tensor.h
)
add_library(optimizer STATIC ${OPITMIZER_SRCS})
add_dependencies(optimizer gen_proto_cpp)
add_simple_unittest(optimizer_test)
add_simple_unittest(optimizer_factory_test)
#ifndef PADDLE_OPTIMIZER_TENSOR_H_
#define PADDLE_OPTIMIZER_TENSOR_H_
/**
* @brief tensor used by optimizer
*/
#include <string.h>
#include "paddle/math/BaseMatrix.h"
namespace paddle {
namespace optimizer {
template <class T>
using TensorBase = BaseMatrixT<T>;
template <class T>
class Tensor : public TensorBase<T> {
public:
Tensor(T* data, int size) : TensorBase<T>(size, 1, 0, data, false, false) {}
T* get_buffer() { return this->data_; }
// TODO: replace with tensorshape
size_t width() { return this->width_; }
};
} // namespace optimizer
} // namespace paddle
#endif
#include "adadelta_optimizer.h"
#include <algorithm>
namespace paddle {
namespace optimizer {
template <class T>
AdadeltaOptimizer<T>::AdadeltaOptimizer(const ::paddle::OptimizerConfig& config)
: ParameterOptimizer<T>(config) {
rho = config.adadelta().rho();
epsilon = config.adadelta().epsilon();
decay = config.adadelta().decay();
}
template <class T>
void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) {
size_t size = p->width();
T* gptr = new T[size];
accum_gradient = Tensor<T>(gptr, size);
T* dptr = new T[size];
accum_delta = Tensor<T>(dtpr, size);
T* dptr_current = new T[size];
update_delta = Tensor<T>(dptr_current, size);
}
template <class T>
void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate();
for (size_t i = 0; i < parameter_.size(); ++i) {
accum_gradient[i] =
rho * accum_gradient[i] + (1.0 - rho) * gradient[i] * gradient[i];
update_delta[i] = std::sqrt(accum_delta[i] + epsilon) /
std::sqrt(accum_gradient[i] + epsilon) * gradient[i];
accum_delta[i] =
rho * accum_delta[i] + (1.0 - rho) * update_delta[i] * update_delta[i];
parameter_[i] -= update_delta[i] + decay * parameter_[i];
}
}
template class AdadeltaOptimizer<float>;
template class AdadeltaOptimizer<double>;
} // namespace optimizer
} // namespace paddle
#ifndef PADDLE_ADADELTA_OPTIMIZER_H_
#define PADDLE_ADADELTA_OPTIMIZER_H_
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
class AdadeltaOptimizer : public ParameterOptimizer<T> {
public:
AdadeltaOptimizer(const OptimizerConfig &config);
~AdadeltaOptimizer() {
if (accum_gradient) delete accum_gradient;
if (accum_delta) delete accum_delta;
if (update_delta) delete update_delta;
}
void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p);
T *get_weight() const;
private:
Tensor<T> *accum_gradient;
Tensor<T> *accum_delta;
Tensor<T> *update_delta;
double rho;
double epsilon;
double decay;
};
} // namespace optimizer
} // namespace paddle
#endif
#include "adagrad_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
AdagradOptimizer<T>::AdagradOptimizer(const ::paddle::OptimizerConfig& config)
: ParameterOptimizer<T>(config) {
epsilon = config.adagrad().epsilon();
decay = config.adagrad().decay();
}
template <class T>
void AdagradOptimizer<T>::set_weight(const Tensor<T>* p) {
size_t size = p->width();
T* gptr = new T[size];
accum_gradient = Tensor<T>(gptr, size);
T* dptr = new T[size];
accum_delta = Tensor<T>(dtpr, size);
T* dptr_current = new T[size];
update_delta = Tensor<T>(dptr_current, size);
}
template <class T>
void AdagradOptimizer<T>::update(const Tensor<T>& gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate();
for (size_t i = 0; i < parameter_.size(); ++i) {
accum_gradient[i] += gradient[i] * gradient[i];
parameter_[i] +=
learning_rate * (gradient[i] / std::sqrt(accum_gradient[i] + epsilon) +
decay * parameter_[i]);
}
}
template class AdagradOptimizer<float>;
template class AdagradOptimizer<double>;
} // namespace optimizer
} // namespace paddle
#ifndef PADDLE_ADAGRAD_OPTIMIZER_H_
#define PADDLE_ADAGRAD_OPTIMIZER_H_
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
class AdagradOptimizer : public ParameterOptimizer<T> {
public:
AdagradOptimizer(const OptimizerConfig &config);
~AdagradOptimizer() {
if (accum_gradient) delete accum_gradient;
}
void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p);
T *get_weight() const;
private:
Tensor<T> *accum_gradient;
double epsilon;
double decay;
};
} // namespace optimizer
} // namespace paddle
#endif
#include "adam_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
AdamOptimizer<T>::AdamOptimizer(const ::paddle::OptimizerConfig &config)
: ParameterOptimizer<T>(config) {
beta_1 = config.adam().beta_1();
beta_2 = config.adam().beta_2();
epsilon = config.adam().epsilon();
decay = config.adam().decay();
}
template <class T>
void AdamOptimizer<T>::set_weight(const Tensor<T> *p) {
size_t size = p->width();
T *mptr = new T[size];
momentums_ = Tensor<T>(mptr, size);
T *vptr = new T[size];
velocitys_ = Tensor<T>(vtpr, size);
}
template <class T>
void AdamOptimizer<T>::update(const Tensor<T> &gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate();
for (size_t i = 0; i < parameter_.size(); ++i) {
accum_gradient[i] += gradient[i] * gradient[i];
parameter_[i] +=
learning_rate * (gradient[i] / std::sqrt(accum_gradient[i] + epsilon) +
decay * parameter_[i]);
}
}
template class AdamOptimizer<float>;
template class AdamOptimizer<double>;
} // namespace optimizer
} // namespace paddle
#ifndef PADDLE_ADAM_OPTIMIZER_H_
#define PADDLE_ADAM_OPTIMIZER_H_
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
class AdamOptimizer : public ParameterOptimizer<T> {
public:
AdamOptimizer(const OptimizerConfig &config);
~AdamOptimizer() {}
void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p);
T *get_weight() const;
private:
Tensor<T> *momentums_;
Tensor<T> *velocitys_;
double beta_1;
double beta_2;
double epsilon;
double decay;
};
} // namespace optimizer
} // namespace paddle
#endif
#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_
#define PADDLE_OPTIMIZER_LR_POLICY_H_
#include "OptimizerConfig.ph.h"
namespace paddle {
namespace optimizer {
class BaseLr {
public:
LrPolicyBase(const OpitmizerConfig &config) {
learning_rate = config.lr_config().learning_rate();
}
virtual double get_learning_rate(const uint64_t num_sample_passed) = 0;
private:
double learning_rate;
};
// constant learning rate policy
class ConstLr final : public BaseLr {
public:
double get_learning_rate(const uint64_t num_sample_passed) {
return learning_rate;
}
};
} // namespace optimizer
} // namespace paddle
#endif
#include "optimizer.h"
#include <string>
#include "parameter_optimizer.h"
template <class T>
struct EnumToType {};
template <class T>
struct TypeToEnum {};
#define MATCH_ENUM_TYPE(TYPE, ENUM) \
template <> \
struct TypeToEnum<ENUM> { \
static paddle_element_type v() { return ENUM; }; \
static constexpr TYPE value = ENUM;
}
;
template <>
struct EnumToType<ENUM> {
typedef TYPE Type;
}
MATCH_ENUM_TYPE(int32_t, PADDLE_ELEMENT_TYPE_INT32);
MATCH_ENUM_TYPE(uint32_t, PADDLE_ELEMENT_TYPE_UINT32);
MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64);
MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32);
MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64);
struct paddle_optimizer {
/*! \brief optmizer in C++ side */
paddle::optimizer::ParameterOptimzier* impl;
};
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int config_proto_len) {
paddle_optimizer* optimizer;
std::string config(config_proto, config_proto + config_proto_len);
optimizer->impl->create(config_proto);
return optimizer;
}
int paddle_release_optimizer(paddle_optimizer* o) {
if (o != nullptr) delete o->impl;
return PADDLE_SUCCESS;
}
int paddle_update_parameter(paddle_optimizer* o,
paddle_element_type data_type,
const void* grad_buffer,
int num_bytes) {
auto type = EnumToType<data_type>::Type;
paddle::Tensor<type> gradient(reinterpret_cast<type*>(grad_buffer),
num_bytes);
o->impl->update(gradient);
return PADDLE_SUCCESS;
}
int paddle_optimizer_set_weights(paddle_optimizer* o,
paddle_element_type data_type,
void* param_buffer,
int num_bytes) {
auto type = EnumToType<data_type>::Type;
paddle::Tensor<type>* param = new paddle::Tensor<type>(
reinterpret_cast<type*>(param_buffer), num_bytes);
o->impl->set_weight(param);
return PADDLE_SUCCESS;
}
void* paddle_optimizer_get_weights(paddle_optimizer* o) {
void* buffer = (void*)o->impl->get_weight();
return buffer;
}
#ifndef PADDLE_LIB_OPTIMIZER_H_
#define PADDLE_LIB_OPTIMIZER_H_
#include <stdbool.h>
#include <stdint.h>
/*! \brief optimizer export C API. which will be used in
Case A, on Trainer (On ParameterServer Client) optimize gradient
Case B, on ParameterServer side optimize gradient
To simplify the configuration parsing. optimizer *do not* parse any config
e.g. learning rate should be calculated by the caller
*/
#ifdef __cplusplus
extern "C" {
#endif
/*! \brief datatypes */
typedef enum {
PADDLE_ELEMENT_TYPE_INT32 = 0,
PADDLE_ELEMENT_TYPE_UINT32 = 1,
PADDLE_ELEMENT_TYPE_INT64 = 2,
PADDLE_ELEMENT_TYPE_UINT64 = 3,
PADDLE_ELEMENT_TYPE_FLOAT32 = 4,
PADDLE_ELEMENT_TYPE_FLOAT64 = 5,
} paddle_element_type;
/*! \brief execute status code */
const int32_t PADDLE_SUCCESS = 0;
const int32_t PADDLE_ERROR = -1;
typedef struct paddle_optimizer paddle_optimizer;
/**
* this group interface called in order :
* 1. create optimizer with config
* 2. set weights
* 3. update_parameter
* 4. get_weights
* 5. release optimizer
*/
/**
* @brief create optimizer with proto_config
* @param config_proto, optimizer protobuf, see OptimizerConfig.proto in detail
* @return return optimizer instance
*/
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int config_proto_len);
/**
* @brief release optimizer
* @param optimizer
* @return return exec status
*/
int paddle_release_optimizer(paddle_optimizer* o);
/**
* @brief optimizer instance
* @param datatype of gradient and parameter
* @param gradient, calculate by optimzizer caller.
* TODO(zhihong): just pass loss to reduce communicate overhead.
* Project Adam Ms'14 paper for detail
* @param num_bytes, gradient size
* @return return exec status
*/
int paddle_update_parameter(paddle_optimizer* o,
paddle_element_type data_type,
const void* gradient,
int num_bytes);
/**
* @brief optimizer instance
* @param data_type datatype of gradient
* @param param_buffer, initilized parameter buffer
* @param num_bytes, parameter size
* @return return exec status
*/
int paddle_optimizer_set_weights(paddle_optimizer* o,
paddle_element_type data_type,
void* param_buffer,
int num_bytes);
/**
* @brief optimizer instance
* @return return content of parameter buffer in optimizer
*/
void* paddle_optimizer_get_weights(paddle_optimizer* o);
#ifdef __cplusplus
}
#endif
#endif
#include "optimizer_factory.h"
#include "gtest/gtest.h"
#include "parameter_optimizer.h"
#define float TestType;
class OptimizerTest : public testing::Test {
public:
virtual void SetUp() {
paddle::OptimizerConfig config;
config.set_learning_rate(0.01);
config.set_decay(0.0);
config.set_momentum(0.0);
config.set_nesterov(false);
config.set_lr_decay_a(0.9);
config.set_lr_decay_b(0.1);
std::string config_proto = config.SerializeAsString();
ParameterOptimizer<TestType>::create(config_proto, )
}
virtual void TearDown() {}
private:
ParameterOptimizer<TestType>* o;
};
TEST_F(OptimizerTest, createOptimizer) {}
int main(int argc, char** argv) {
testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}
#include "optimizer.h"
#include "gtest/gtest.h"
template <class T>
class Opitmizer_C_Test : public testing::Test {
private:
Tensor<T> parameter;
Tensor<T> gradient;
};
void applyGradientDescent_TEST() {}
#include "parameter_optimizer.h"
#include <glog/logging.h>
#include "optimizer_factory.h"
namespace paddle {
namespace optimizer {
template <class T>
ParameterOptimizer<T> *ParameterOptimizer<T>::create(
const ::std::string &config_proto) {
paddle::OptimizerConfig config;
CHECK(config.ParseFromString(config_proto) == 0)
<< "error : optimizer config";
CHECK(config_valid(config) == 0) << "error : invalid optimizer config ";
ParameterOptimizer<T> *opt = nullptr;
switch (config.optimizer_name()) {
case "SGD":
opt = new SGDOptimizer<T>(config);
break;
case "Adagrad":
opt = new AdagradOptimizer<T>(config);
break;
case "Adadelta":
opt = new AdadeltaOptimizer<T>(config);
break;
case "Adam":
opt = new AdamOptimizer<T>(config);
break;
default:
opt = new SGDOptimizer<T>(config);
}
switch (config.lr_policy()) {
case "ConstLr":
opt.lr_policy = new ConstLr(config);
break;
}
return opt;
}
template <class T>
T *ParameterOptimizer<T>::get_weight() const {
return parameter.get().get_buffer();
}
template <class T>
char *ParameterOptimizer<T>::get_config_proto() const {
// set config dynamic value for save checkpoint
config_.lr_policy().set_learning_rate(
lr_policy->get_learning_rate(num_sample_passed));
config_.set_num_sample_passed(num_sample_passed);
config_.set_iterations(iterations);
return config_.SerializeAsString().c_str();
}
template <class T>
void ParameterOptimizer<T>::set_weight(const Tensor<T> *p) {
parameter_ = p;
}
template <class T>
bool ParameterOptimizer<T>::config_valid(const ::std::string &config) const {
// TODO(zhihong) : add more value checker, failed ASAP
return true;
}
template class ParameterOptimzier<float>;
template class ParameterOptimzier<double>;
} // namespace optimizer
} // namespace paddle
#ifndef PADDLE_PARAMETER_OPTIMIZER_H_
#define PADDLE_PARAMETER_OPTIMIZER_H_
#include <glog/logging.h>
#include <functional>
#include <string>
#include "OptimizerConfig.pb.h"
#include "Tensor.h"
#include "lr_policy.h"
namespace paddle {
namespace optimizer {
template <class T>
class ParameterOptimizer {
public:
/**
* @brief update hook for algorithm need to traverse parameter more than
* once.
*/
ParameterOptimizer(const OptimizerConfig &config) : config_(config){};
static ParameterOptimizer *create(const ::std::string &config_proto);
virtual void update(const Tensor &gradient) = 0;
virtual void destroy() = 0;
virtual T *get_weight() const;
virtual void set_weight(const Tensor<T> *parameter);
// package optimizer config proto in runtime for saving checkpoint
virtual char *get_config_proto();
~ParameterOptimzier() { delete parameter_; }
private:
bool config_valid(::std::string &config) const;
OptimizerConfig config_;
Tensor<T> *parameter_;
// learning rate policy
BaseLr *lr_policy;
uint64_t num_sample_passed;
ParameterOptimizer(const ParameterOptimizer &) = delete;
ParameterOptimizer &operator=(const ParameterOptimizer &) = delete;
/**
* @brief indicate if use L1, L2 regularizer
*/
};
} // namespace optimizer
} // namespace paddle
#endif
#include "regularizer.h"
namespace paddle {
namespace optimizer {
template <class T>
Regularizer<T>* Regularizer<T>::create(const std::string& config) {
paddle::OptimizerConfig config;
Regularizer<T>* r;
if (config.regularizer_type() == paddle::OptimizerConfig_RegularizerType_L1) {
r = new L1Regularizer<T>(config);
} else if (config.regularizer_type() ==
paddle::OptimizerConfig_RegularizerType_L2) {
r = new L2Regularizer<T>(config);
break;
}
return r;
}
template class L1Regularizer<float>;
template class L1Regularizer<double>;
} // namespace optimizer
} // namespace paddle
#ifndef PADDLE_OPITMIZER_REGULARIZER_H_
#define PADDLE_OPTIMIZER_REGULARIZER_H_
#include "OptimizerConfig.pb.h"
#include "Tensor.h"
namespace paddle {
namespace optimizer {
/**
* @brief regularizer in L1, L2
*/
template <class T>
class Regularizer {
public:
/**
* @brief regularizer update interface
* @param param need to update
* @return void
*/
static Regularizer *create(const std::string &config);
virtual void update(Tensor<T> &parameter) = 0;
private:
std::string regularizer_name;
OptimizerConfig config_;
};
template <class T>
class L1Regularizer {
public:
void update(Tensor<T> &parameter);
};
template <class T>
class L2Regularizer {
public:
void update(Tensor<T> &parameter);
};
} // namespace optimizer
} // namespace paddle
#endif
#ifndef PADDLE_SGD_OPTIMIZER_H_
#define PADDLE_SGD_OPTIMIZER_H_
#include "parameter_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
class SGDOptimizer : public ParameterOptimizer<T> {
public:
SGDOptimizer(const ::paddle::OptimizerConfig& config);
~SGDOptimizer() {
// clear memory by Tensor library
delete momentums_;
}
void update(const Tensor<T>& gradient);
void set_weight(const Tensor<T>* p);
T* get_weight() const;
char* get_config_proto();
private:
Tensor<T>* momentums_;
double momentum;
double decay;
bool nesterov;
};
} // namespace optimizer
} // namespace paddle
#endif
#include "sgd_optimizer.h"
namespace paddle {
namespace optimizer {
template <class T>
SGDOptimizer<T>::SGDOptimizer(const ::paddle::OptimizerConfig &config)
: ParameterOptimizer<T>(config) {
momentum = config.sgd().momentum();
decay = config.sgd().decay();
nesterov = config.sgd().nesterov();
}
template <class T>
void SGDOptimizer<T>::set_weight(const Tensor<T> *p) {
// ParameterOptimizer::set_weight(p);
size_t size = p->width();
// TODO: fix it with align aware allocator bind to Tensor
if (momentum != 0.0) {
T *ptr = new T[size];
momentums_ = Tensor<T>(ptr, size);
}
}
template <class T>
void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
double velocity = 0.0;
for (size_t i = 0; i < parameter_.size(); ++i) {
if (momentum == 0.0) {
velocity =
-learning_rate * gradient[i] - learning_rate * decay * parameter_[i];
} else {
momentums_[i] = momentum * momentums_[i] - learning_rate * gradient[i] -
learning_rate * decay * parameter_[i];
velocity = momentums_[i];
}
if (nesterov) {
parameter_[i] += momentum * velocity - learning_rate * gradient[i];
} else {
parameter_[i] += velocity;
}
}
}
template <class T>
char *SGDOptimizer<T>::get_config_proto() {
ParameterOptimizer::get_config_proto();
config.set_learning_rate(learning_rate);
config.set_decay(decay);
config.set_nesterov(nesterov);
return config.SerializeAsString().c_str();
}
template class SGDOptimizer<float>;
template class SGDOptimizer<double>;
} // namespace optimizer
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册