From c624417c6f5f1d61ab539aa9c88e95b929a19054 Mon Sep 17 00:00:00 2001 From: Qiao Longfei Date: Fri, 14 Dec 2018 11:27:14 +0800 Subject: [PATCH] change sparse mode to lazy mode --- paddle/fluid/operators/optimizers/adam_op.cc | 2 +- paddle/fluid/operators/optimizers/adam_op.h | 12 ++++++------ python/paddle/fluid/optimizer.py | 6 +++--- .../paddle/fluid/tests/unittests/test_adam_op.py | 16 ++++++++-------- 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/paddle/fluid/operators/optimizers/adam_op.cc b/paddle/fluid/operators/optimizers/adam_op.cc index b2c2e5c32..799322432 100644 --- a/paddle/fluid/operators/optimizers/adam_op.cc +++ b/paddle/fluid/operators/optimizers/adam_op.cc @@ -111,7 +111,7 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker { "Constant for numerical stability") .SetDefault(1.0e-8f); AddAttr( - "sparse_mode", + "lazy_mode", "(bool, default false) " "only update the parameter that has gradient in sparse update") .SetDefault(false); diff --git a/paddle/fluid/operators/optimizers/adam_op.h b/paddle/fluid/operators/optimizers/adam_op.h index 25e23c5f9..5870557bb 100644 --- a/paddle/fluid/operators/optimizers/adam_op.h +++ b/paddle/fluid/operators/optimizers/adam_op.h @@ -177,13 +177,13 @@ struct SparseAdamFunctor { const int64_t* rows_; int64_t row_numel_; int64_t row_count_; - bool sparse_mode_; + bool lazy_mode_; SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, const T* beta2_pow, const T* mom1, T* mom1_out, const T* mom2, T* mom2_out, const T* lr, const T* grad, const T* param, T* param_out, const int64_t* rows, - int64_t row_numel, int64_t row_count, bool sparse_mode) + int64_t row_numel, int64_t row_count, bool lazy_mode) : beta1_(beta1), beta2_(beta2), epsilon_(epsilon), @@ -200,7 +200,7 @@ struct SparseAdamFunctor { rows_(rows), row_numel_(row_numel), row_count_(row_count), - sparse_mode_(sparse_mode) {} + lazy_mode_(lazy_mode) {} inline HOSTDEVICE void adam_update(size_t i, T g) const { // The following code is the same as dense @@ -245,7 +245,7 @@ class AdamOpKernel : public framework::OpKernel { using paddle::framework::LoDTensor; using paddle::operators::detail::Ref; - bool sparse_mode = ctx.Attr("sparse_mode"); + bool lazy_mode = ctx.Attr("lazy_mode"); T beta1 = static_cast(ctx.Attr("beta1")); T beta2 = static_cast(ctx.Attr("beta2")); T epsilon = static_cast(ctx.Attr("epsilon")); @@ -357,8 +357,8 @@ class AdamOpKernel : public framework::OpKernel { mom2_out.template mutable_data(ctx.GetPlace()), lr.template data(), grad_data, param.template data(), param_out.template mutable_data(ctx.GetPlace()), rows, row_numel, - grad_merge.rows().size(), sparse_mode); - if (sparse_mode) { + grad_merge.rows().size(), lazy_mode); + if (lazy_mode) { size_t row_count = grad_merge.rows().size(); for (size_t row_index = 0; row_index < row_count; ++row_index) { for (size_t offset = 0; offset < row_numel; ++offset) { diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 9c7482bc4..c53bf4913 100644 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -664,7 +664,7 @@ class AdamOptimizer(Optimizer): epsilon=1e-8, regularization=None, name=None, - sparse_mode=False): + lazy_mode=False): assert learning_rate is not None assert beta1 is not None assert beta2 is not None @@ -677,7 +677,7 @@ class AdamOptimizer(Optimizer): self._beta1 = beta1 self._beta2 = beta2 self._epsilon = epsilon - self._sparse_mode = sparse_mode + self._lazy_mode = lazy_mode def _create_accumulators(self, block, parameters): assert isinstance(block, framework.Block) @@ -732,7 +732,7 @@ class AdamOptimizer(Optimizer): "beta1": self._beta1, "beta2": self._beta2, "epsilon": self._epsilon, - "sparse_mode": self._sparse_mode + "lazy_mode": self._lazy_mode }) return adam_op diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index da91875a1..461196689 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -195,7 +195,7 @@ def adam_step(inputs, attributes): def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, - sparse_mode): + lazy_mode): ''' Simulate one step of the adam optimizer :param inputs: dict of inputs @@ -231,7 +231,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad, class TestSparseAdamOp(unittest.TestCase): - def setup(self, scope, place, sparse_mode): + def setup(self, scope, place, lazy_mode): beta1 = 0.78 beta2 = 0.836 epsilon = 1e-4 @@ -265,19 +265,19 @@ class TestSparseAdamOp(unittest.TestCase): param_out, mom1, mom2 = adam_step_sparse(self.dense_inputs, self.attrs, height, rows, row_numel, - np_array, sparse_mode) + np_array, lazy_mode) self.outputs = { "ParamOut": param_out, "Moment1Out": mom1, "Moment2Out": mom2 } - def check_with_place(self, place, sparse_mode): + def check_with_place(self, place, lazy_mode): scope = core.Scope() - self.setup(scope, place, sparse_mode) + self.setup(scope, place, lazy_mode) op_args = dict() - op_args['sparse_mode'] = sparse_mode + op_args['lazy_mode'] = lazy_mode for key, np_array in self.dense_inputs.items(): var = scope.var(key).get_tensor() var.set(np_array, place) @@ -313,8 +313,8 @@ class TestSparseAdamOp(unittest.TestCase): if core.is_compiled_with_cuda(): places.append(core.CUDAPlace(0)) for place in places: - for sparse_mode in (True, False): - self.check_with_place(place, sparse_mode) + for lazy_mode in (True, False): + self.check_with_place(place, lazy_mode) if __name__ == "__main__": -- GitLab