提交 8610ba1c 编写于 作者: D dzhwinter

"remove get config proto"

上级 5b8a0c5d
......@@ -5,7 +5,6 @@
*/
#include <string.h>
#include "optimizer.h"
#include "paddle/math/BaseMatrix.h"
namespace paddle {
......@@ -15,18 +14,27 @@ template <class T>
using TensorBase = BaseMatrixT<T>;
template <class T>
class Tensor : public TensorBase<T> {
class TensorT : public TensorBase<T> {
public:
Tensor(T* data, int size) : TensorBase<T>(1, size, 0, data, false, false) {}
TensorT(T* data, int size) : TensorBase<T>(1, size, 0, data, false, false) {}
TensorT(const TensorT& t)
: TensorBase<T>(1, t.size(), 0, t.get_buffer(), false, false) {}
TensorT& operator=(const TensorT& t) {
this->size_ = t.size();
this->data_ = t.get_buffer();
}
T* get_buffer() { return this->data_; }
T& operator[](const int idx) {
CHECK(idx >= 0 && idx < this->width_) << " out of index range";
CHECK(idx >= 0 && idx < this->width_) << "out of index range";
return this->data_[idx];
}
// TODO: replace with tensorshape
size_t size() const { return this->width_; }
};
// TODO(zhihong): design problem of dynamic datatype, need to fix
typedef TensorT<real> Tensor;
} // namespace optimizer
} // namespace paddle
......
......@@ -4,19 +4,17 @@
namespace paddle {
namespace optimizer {
template <class T>
void AdadeltaOptimizer<T>::set_weight(const Tensor<T>* p) {
void AdadeltaOptimizer::set_weight(Tensor* p) {
size_t size = p->size();
T* gptr = new T[size];
accum_gradient = Tensor<T>(gptr, size);
T* dptr = new T[size];
accum_delta = Tensor<T>(dptr, size);
T* dptr_current = new T[size];
update_delta = Tensor<T>(dptr_current, size);
real* gptr = new real[size];
accum_gradient = Tensor(gptr, size);
real* dptr = new real[size];
accum_delta = Tensor(dptr, size);
real* dptr_current = new real[size];
update_delta = Tensor(dptr_current, size);
}
template <class T>
void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
void AdadeltaOptimizer::update(const Tensor& gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
for (size_t i = 0; i < parameter_->size(); ++i) {
......@@ -33,9 +31,5 @@ void AdadeltaOptimizer<T>::update(const Tensor<T>& gradient) {
learning_rate * update_delta[i] + learning_rate * decay * parameter_[i];
}
}
template class AdadeltaOptimizer<float>;
template class AdadeltaOptimizer<double>;
} // namespace optimizer
} // namespace paddle
......@@ -6,28 +6,27 @@
namespace paddle {
namespace optimizer {
template <class T>
class AdadeltaOptimizer : public ParameterOptimizer<T> {
class AdadeltaOptimizer : public ParameterOptimizer {
public:
using ParameterOptimizer<T>::parameter_;
using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
using ParameterOptimizer::parameter_;
using ParameterOptimizer::num_sample_passed;
using ParameterOptimizer::lr_policy;
AdadeltaOptimizer(double rho, double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer<T>(lr), rho(rho), epsilon(epsilon), decay(decay) {}
: ParameterOptimizer(lr), rho(rho), epsilon(epsilon), decay(decay) {}
~AdadeltaOptimizer() {
if (accum_gradient) delete accum_gradient;
if (accum_delta) delete accum_delta;
if (update_delta) delete update_delta;
}
void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p);
T *get_weight() const;
void update(const Tensor &gradient);
void set_weight(Tensor *p);
real *get_weight() const;
private:
Tensor<T> *accum_gradient;
Tensor<T> *accum_delta;
Tensor<T> *update_delta;
Tensor *accum_gradient;
Tensor *accum_delta;
Tensor *update_delta;
double rho;
double epsilon;
......
......@@ -2,21 +2,18 @@
namespace paddle {
namespace optimizer {
template <class T>
template <class T>
void AdagradOptimizer<T>::set_weight(const Tensor<T>* p) {
void AdagradOptimizer::set_weight(Tensor* p) {
size_t size = p->width();
T* gptr = new T[size];
accum_gradient = Tensor<T>(gptr, size);
T* dptr = new T[size];
accum_delta = Tensor<T>(dtpr, size);
T* dptr_current = new T[size];
update_delta = Tensor<T>(dptr_current, size);
real* gptr = new real[size];
accum_gradient = Tensor(gptr, size);
real* dptr = new real[size];
accum_delta = Tensor(dtpr, size);
real* dptr_current = new real[size];
update_delta = Tensor(dptr_current, size);
}
template <class T>
void AdagradOptimizer<T>::update(const Tensor<T>& gradient) {
void AdagradOptimizer::update(const Tensor& gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate();
for (size_t i = 0; i < parameter_.size(); ++i) {
......@@ -27,7 +24,5 @@ void AdagradOptimizer<T>::update(const Tensor<T>& gradient) {
}
}
template class AdagradOptimizer<float>;
template class AdagradOptimizer<double>;
} // namespace optimizer
} // namespace paddle
......@@ -6,23 +6,19 @@
namespace paddle {
namespace optimizer {
template <class T>
class AdagradOptimizer : public ParameterOptimizer<T> {
class AdagradOptimizer : public ParameterOptimizer {
public:
using ParameterOptimizer<T>::parameter_;
using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
AdagradOptimizer(double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer<T>(lr), epsilon(epsilon), decay(decay) {}
: ParameterOptimizer(lr), epsilon(epsilon), decay(decay) {}
~AdagradOptimizer() {
if (accum_gradient) delete accum_gradient;
}
void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p);
T *get_weight() const;
void update(const Tensor &gradient);
void set_weight(Tensor *p);
real *get_weight() const;
private:
Tensor<T> *accum_gradient;
Tensor *accum_gradient;
double epsilon;
double decay;
};
......
......@@ -3,17 +3,15 @@
namespace paddle {
namespace optimizer {
template <class T>
void AdamOptimizer<T>::set_weight(const Tensor<T> *p) {
void AdamOptimizer::set_weight(Tensor *p) {
size_t size = p->width();
T *mptr = new T[size];
momentums_ = Tensor<T>(mptr, size);
T *vptr = new T[size];
velocitys_ = Tensor<T>(vtpr, size);
real *mptr = new real[size];
momentums_ = Tensor(mptr, size);
real *vptr = new real[size];
velocitys_ = Tensor(vtpr, size);
}
template <class T>
void AdamOptimizer<T>::update(const Tensor<T> &gradient) {
void AdamOptimizer::update(const Tensor &gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
double coef1 = 1.0 - std::pow(beta_1, num_sample_passed);
......@@ -28,8 +26,5 @@ void AdamOptimizer<T>::update(const Tensor<T> &gradient) {
decay * parameter_[i]);
}
}
template class AdamOptimizer<float>;
template class AdamOptimizer<double>;
} // namespace optimizer
} // namespace paddle
......@@ -6,15 +6,11 @@
namespace paddle {
namespace optimizer {
template <class T>
class AdamOptimizer : public ParameterOptimizer<T> {
class AdamOptimizer : public ParameterOptimizer {
public:
using ParameterOptimizer<T>::parameter_;
using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
AdamOptimizer(
double beta_1, double beta_2, double epsilon, double decay, BaseLr *lr)
: ParameterOptimizer<T>(lr),
: ParameterOptimizer(lr),
beta_1(beta_1),
beta_2(beta_2),
epsilon(epsilon),
......@@ -23,13 +19,13 @@ public:
if (momentums_) delete momentums_;
if (velocitys_) delete velocitys_;
}
void update(const Tensor<T> &gradient);
void set_weight(const Tensor<T> *p);
T *get_weight() const;
void update(const Tensor &gradient);
void set_weight(Tensor *p);
real *get_weight() const;
private:
Tensor<T> *momentums_;
Tensor<T> *velocitys_;
Tensor *momentums_;
Tensor *velocitys_;
double beta_1;
double beta_2;
double epsilon;
......
#ifndef PADDLE_OPTIMIZER_LR_POLICY_H_
#define PADDLE_OPTIMIZER_LR_POLICY_H_
#include <algorithm>
#include "OptimizerConfig.pb.h"
namespace paddle {
......@@ -19,11 +20,25 @@ protected:
// constant learning rate policy
class ConstLr final : public BaseLr {
public:
ConstLr(double lr) : BaseLr(lr){};
double get_learning_rate(const uint64_t num_sample_passed) {
return learning_rate;
}
};
class LinearLr final : public BaseLr {
public:
LinearLr(double lr, double lr_decay_a, double lr_decay_b)
: BaseLr(lr), lr_decay_a(lr_decay_a), lr_decay_b(lr_decay_b) {}
double get_learning_rate(const uint64_t num_sample_passed) {
return std::max(learning_rate - lr_decay_a * num_sample_passed, lr_decay_b);
}
private:
double lr_decay_a;
double lr_decay_b;
};
} // namespace optimizer
} // namespace paddle
......
......@@ -2,8 +2,9 @@
#include <string>
#include "parameter_optimizer.h"
using namespace paddle::optimizer;
template <paddle_element_type T>
template <paddle_element_type VALUE>
struct EnumToType {};
template <class T>
......@@ -26,17 +27,16 @@ MATCH_ENUM_TYPE(int64_t, PADDLE_ELEMENT_TYPE_INT64);
MATCH_ENUM_TYPE(uint64_t, PADDLE_ELEMENT_TYPE_UINT64);
MATCH_ENUM_TYPE(float, PADDLE_ELEMENT_TYPE_FLOAT32);
MATCH_ENUM_TYPE(double, PADDLE_ELEMENT_TYPE_FLOAT64);
 struct paddle_optimizer {
/*! \brief optmizer in C++ side */
paddle::optimizer::ParameterOptimizerBase* impl;
struct paddle_optimizer {
paddle::optimizer::ParameterOptimizer* impl;
};
paddle_optimizer* paddle_create_optimizer(const unsigned char* config_proto,
int config_proto_len) {
paddle_optimizer* optimizer;
paddle_optimizer* optimizer = new paddle_optimizer;
std::string config(config_proto, config_proto + config_proto_len);
optimizer->impl->create(config_proto);
optimizer->impl = ParameterOptimizer::create(config);
return optimizer;
}
......@@ -49,9 +49,9 @@ int paddle_update_parameter(paddle_optimizer* o,
const paddle_element_type data_type,
const void* grad_buffer,
int num_bytes) {
auto type = EnumToType<data_type>::Type;
paddle::Tensor<type> gradient(reinterpret_cast<type*>(grad_buffer),
num_bytes);
// TOOD(zhihong): datatype not work. need to add the runtime datatype
auto grad = reinterpret_cast<const real*>(grad_buffer);
Tensor gradient(const_cast<real*>(grad), num_bytes);
o->impl->update(gradient);
return PADDLE_SUCCESS;
}
......@@ -60,9 +60,8 @@ int paddle_optimizer_set_weights(paddle_optimizer* o,
const paddle_element_type data_type,
void* param_buffer,
int num_bytes) {
auto type = EnumToType<data_type>::Type;
paddle::Tensor<type>* param = new paddle::Tensor<type>(
reinterpret_cast<type*>(param_buffer), num_bytes);
// TOOD(zhihong): datatype not work. need to add the runtime datatype
Tensor* param = new Tensor(reinterpret_cast<real*>(param_buffer), num_bytes);
o->impl->set_weight(param);
return PADDLE_SUCCESS;
}
......
......@@ -10,78 +10,60 @@
namespace paddle {
namespace optimizer {
template <class T>
ParameterOptimizer<T> *ParameterOptimizer<T>::create(
ParameterOptimizer *ParameterOptimizer::create(
const ::std::string &config_proto) {
paddle::OptimizerConfig config;
CHECK(config.ParseFromString(config_proto) == 0)
<< "error : optimizer config";
CHECK(config_valid(config) == 0) << "error : invalid optimizer config ";
BaseLr *lr = nullptr;
switch (config.lr_policy()) {
case "ConstLr":
lr = new ConstLr(config.lr_config().learning_rate());
break;
}
ParameterOptimizer<T> *opt = nullptr;
switch (config.optimizer_name()) {
case "SGD":
opt = new SGDOptimizer<T>(config.sgd().momentum(),
auto select_lr_policy = [=](const OptimizerConfig &config) -> BaseLr * {
std::string s(config.lr_policy());
if (s == "ConstLr") return new ConstLr(config.lr_config().learning_rate());
if (s == "LinearLr")
return new LinearLr(config.lr_config().learning_rate(),
config.lr_config().lr_decay_a(),
config.lr_config().lr_decay_b());
// default
return new ConstLr(config.lr_config().learning_rate());
};
BaseLr *lr = select_lr_policy(config);
auto select_optimizer =
[=](const OptimizerConfig &config) -> ParameterOptimizer * {
std::string s(config.optimizer_name());
if (s == "SGD") {
return new SGDOptimizer(config.sgd().momentum(),
config.sgd().decay(),
config.sgd().nesterov(),
lr);
break;
case "Adagrad":
opt = new AdagradOptimizer<T>(
}
if (s == "Adadelta") {
return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr);
}
if (s == "Adagrad") {
return new AdagradOptimizer(
config.adagrad().epsilon(), config.adagrad().decay(), lr);
break;
case "Adadelta":
opt = new AdadeltaOptimizer<T>(config.adadelta().rho(),
}
if (s == "Adam") {
return new AdadeltaOptimizer(config.adadelta().rho(),
config.adadelta().epsilon(),
config.adadelta().decay(),
lr);
break;
case "Adam":
opt = new AdamOptimizer<T>(config.adam().beta_1(),
config.adam().beta_2(),
config.adam().epsilon(),
config.adam().decay(),
lr);
break;
}
return opt;
}
template <class T>
T *ParameterOptimizer<T>::get_weight() const {
return parameter.get().get_buffer();
}
template <class T>
char *ParameterOptimizer<T>::get_config_proto() const {
// set config dynamic value for save checkpoint
config_.lr_policy().set_learning_rate(
lr_policy->get_learning_rate(num_sample_passed));
config_.set_num_sample_passed(num_sample_passed);
config_.set_iterations(iterations);
return config_.SerializeAsString().c_str();
}
template <class T>
void ParameterOptimizer<T>::set_weight(const Tensor<T> *p) {
parameter_ = p;
// default
return new SGDOptimizer(config.sgd().momentum(),
config.sgd().decay(),
config.sgd().nesterov(),
lr);
};
return select_optimizer(config);
}
template <class T>
bool ParameterOptimizer<T>::config_valid(const ::std::string &config) const {
// TODO(zhihong) : add more value checker, failed ASAP
return true;
real *ParameterOptimizer::get_weight() const {
return parameter_->get_buffer();
}
template class ParameterOptimzier<float>;
template class ParameterOptimzier<double>;
void ParameterOptimizer::set_weight(Tensor *p) { parameter_ = p; }
} // namespace optimizer
} // namespace paddle
......@@ -11,13 +11,6 @@
namespace paddle {
namespace optimizer {
class ParameterOptimizerBase {
private:
ParameterOptimizerBase(const ParameterOptimizerBase &) = delete;
ParameterOptimizerBase &operator=(const ParameterOptimizerBase &) = delete;
};
template <class T>
class ParameterOptimizer {
public:
/**
......@@ -31,14 +24,13 @@ public:
virtual ~ParameterOptimizer() { delete parameter_; };
static ParameterOptimizer *create(const ::std::string &config_proto);
virtual void update(const Tensor<T> &gradient) = 0;
virtual T *get_weight() const;
virtual void set_weight(const Tensor<T> *parameter);
virtual void update(const Tensor &gradient) = 0;
virtual real *get_weight() const;
virtual void set_weight(Tensor *parameter);
public:
bool config_valid(::std::string &config) const;
OptimizerConfig config_;
Tensor<T> *parameter_;
Tensor *parameter_;
// learning rate policy
BaseLr *lr_policy;
......
......@@ -6,31 +6,22 @@
namespace paddle {
namespace optimizer {
template <class T>
class SGDOptimizer : public ParameterOptimizer<T> {
class SGDOptimizer : public ParameterOptimizer {
public:
using ParameterOptimizer<T>::parameter_;
using ParameterOptimizer<T>::num_sample_passed;
using ParameterOptimizer<T>::lr_policy;
SGDOptimizer(double m,
double d,
bool n,
double learning_rate,
uint64_t num_sample_passed,
BaseLr* lr)
: ParameterOptimizer<T>(lr), momentum(m), decay(d), nesterov(n) {}
virtual ~SGDOptimizer() {
// clear memory by Tensor library
delete momentums_;
}
void update(const Tensor<T>& gradient);
void set_weight(const Tensor<T>* p);
T* get_weight() const;
using ParameterOptimizer::parameter_;
using ParameterOptimizer::num_sample_passed;
using ParameterOptimizer::lr_policy;
SGDOptimizer(double m, double d, bool n, BaseLr* lr)
: ParameterOptimizer(lr), momentum(m), decay(d), nesterov(n) {}
virtual ~SGDOptimizer() { delete momentums_; }
void update(const Tensor& gradient);
void set_weight(Tensor* p);
real* get_weight() const;
private:
Tensor<T>* momentums_;
Tensor* momentums_;
double momentum;
double decay;
bool nesterov;
......
......@@ -3,23 +3,21 @@
namespace paddle {
namespace optimizer {
template <class T>
void SGDOptimizer<T>::set_weight(const Tensor<T> *p) {
void SGDOptimizer::set_weight(Tensor *p) {
// ParameterOptimizer::set_weight(p);
size_t size = p->size();
// TODO: fix it with align aware allocator bind to Tensor
if (momentum != 0.0) {
T *ptr = new T[size];
momentums_ = Tensor<T>(ptr, size);
real *ptr = new real[size];
momentums_ = new Tensor(ptr, size);
}
}
template <class T>
void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
void SGDOptimizer::update(const Tensor &gradient) {
num_sample_passed += 1;
double learning_rate = lr_policy->get_learning_rate(num_sample_passed);
double velocity = 0.0;
Tensor<T> &for (size_t i = 0; i < parameter_->size(); ++i) {
for (size_t i = 0; i < parameter_->size(); ++i) {
if (momentum == 0.0) {
velocity =
-learning_rate * gradient[i] - learning_rate * decay * parameter_[i];
......@@ -36,8 +34,5 @@ void SGDOptimizer<T>::update(const Tensor<T> &gradient) {
}
}
template class SGDOptimizer<float>;
template class SGDOptimizer<double>;
} // namespace optimizer
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册