change sparse mode to lazy mode

c624417c · Qiao Longfei · 4035e4ba · c624417c · c624417c · c624417c
4 changed file
--- a/paddle/fluid/operators/optimizers/adam_op.cc
+++ b/paddle/fluid/operators/optimizers/adam_op.cc
@@ -111,7 +111,7 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
                   "Constant for numerical stability")
        .SetDefault(1.0e-8f);
    AddAttr<bool>(
-        "sparse_mode",
+        "lazy_mode",
        "(bool, default false) "
        "only update the parameter that has gradient in sparse update")
        .SetDefault(false);

--- a/paddle/fluid/operators/optimizers/adam_op.h
+++ b/paddle/fluid/operators/optimizers/adam_op.h
@@ -177,13 +177,13 @@ struct SparseAdamFunctor {
  const int64_t* rows_;
  int64_t row_numel_;
  int64_t row_count_;
-  bool sparse_mode_;
+  bool lazy_mode_;
  SparseAdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow,
                    const T* beta2_pow, const T* mom1, T* mom1_out,
                    const T* mom2, T* mom2_out, const T* lr, const T* grad,
                    const T* param, T* param_out, const int64_t* rows,
-                    int64_t row_numel, int64_t row_count, bool sparse_mode)
+                    int64_t row_numel, int64_t row_count, bool lazy_mode)
      : beta1_(beta1),
        beta2_(beta2),
        epsilon_(epsilon),
@@ -200,7 +200,7 @@ struct SparseAdamFunctor {
        rows_(rows),
        row_numel_(row_numel),
        row_count_(row_count),
-        sparse_mode_(sparse_mode) {}
+        lazy_mode_(lazy_mode) {}
  inline HOSTDEVICE void adam_update(size_t i, T g) const {
    // The following code is the same as dense
@@ -245,7 +245,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
    using paddle::framework::LoDTensor;
    using paddle::operators::detail::Ref;
-    bool sparse_mode = ctx.Attr<bool>("sparse_mode");
+    bool lazy_mode = ctx.Attr<bool>("lazy_mode");
    T beta1 = static_cast<T>(ctx.Attr<float>("beta1"));
    T beta2 = static_cast<T>(ctx.Attr<float>("beta2"));
    T epsilon = static_cast<T>(ctx.Attr<float>("epsilon"));
@@ -357,8 +357,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
          mom2_out.template mutable_data<T>(ctx.GetPlace()),
          lr.template data<T>(), grad_data, param.template data<T>(),
          param_out.template mutable_data<T>(ctx.GetPlace()), rows, row_numel,
-          grad_merge.rows().size(), sparse_mode);
+          grad_merge.rows().size(), lazy_mode);
-      if (sparse_mode) {
+      if (lazy_mode) {
        size_t row_count = grad_merge.rows().size();
        for (size_t row_index = 0; row_index < row_count; ++row_index) {
          for (size_t offset = 0; offset < row_numel; ++offset) {

--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -664,7 +664,7 @@ class AdamOptimizer(Optimizer):
                 epsilon=1e-8,
                 regularization=None,
                 name=None,
-                 sparse_mode=False):
+                 lazy_mode=False):
        assert learning_rate is not None
        assert beta1 is not None
        assert beta2 is not None
@@ -677,7 +677,7 @@ class AdamOptimizer(Optimizer):
        self._beta1 = beta1
        self._beta2 = beta2
        self._epsilon = epsilon
-        self._sparse_mode = sparse_mode
+        self._lazy_mode = lazy_mode
    def _create_accumulators(self, block, parameters):
        assert isinstance(block, framework.Block)
@@ -732,7 +732,7 @@ class AdamOptimizer(Optimizer):
                "beta1": self._beta1,
                "beta2": self._beta2,
                "epsilon": self._epsilon,
-                "sparse_mode": self._sparse_mode
+                "lazy_mode": self._lazy_mode
            })
        return adam_op

--- a/python/paddle/fluid/tests/unittests/test_adam_op.py
+++ b/python/paddle/fluid/tests/unittests/test_adam_op.py
@@ -195,7 +195,7 @@ def adam_step(inputs, attributes):
 def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad,
-                     sparse_mode):
+                     lazy_mode):
    '''
    Simulate one step of the adam optimizer
    :param inputs: dict of inputs
@@ -231,7 +231,7 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad,
 class TestSparseAdamOp(unittest.TestCase):
-    def setup(self, scope, place, sparse_mode):
+    def setup(self, scope, place, lazy_mode):
        beta1 = 0.78
        beta2 = 0.836
        epsilon = 1e-4
@@ -265,19 +265,19 @@ class TestSparseAdamOp(unittest.TestCase):
        param_out, mom1, mom2 = adam_step_sparse(self.dense_inputs, self.attrs,
                                                 height, rows, row_numel,
-                                                 np_array, sparse_mode)
+                                                 np_array, lazy_mode)
        self.outputs = {
            "ParamOut": param_out,
            "Moment1Out": mom1,
            "Moment2Out": mom2
        }
-    def check_with_place(self, place, sparse_mode):
+    def check_with_place(self, place, lazy_mode):
        scope = core.Scope()
-        self.setup(scope, place, sparse_mode)
+        self.setup(scope, place, lazy_mode)
        op_args = dict()
-        op_args['sparse_mode'] = sparse_mode
+        op_args['lazy_mode'] = lazy_mode
        for key, np_array in self.dense_inputs.items():
            var = scope.var(key).get_tensor()
            var.set(np_array, place)
@@ -313,8 +313,8 @@ class TestSparseAdamOp(unittest.TestCase):
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))
        for place in places:
-            for sparse_mode in (True, False):
+            for lazy_mode in (True, False):
-                self.check_with_place(place, sparse_mode)
+                self.check_with_place(place, lazy_mode)
 if __name__ == "__main__":