From 00e8791f66186663bed67353722875a27a5e3256 Mon Sep 17 00:00:00 2001
From: dzhwinter <dzhwinter@gmail.com>
Date: Wed, 17 Oct 2018 19:29:47 +0800
Subject: [PATCH] fix compile in cpu error. test=develop

---
 paddle/fluid/operators/momentum_op.cc         | 15 +++++---
 paddle/fluid/operators/momentum_op.h          | 22 ++++++-----
 .../fluid/tests/unittests/test_momentum_op.py | 38 ++++++++-----------
 3 files changed, 37 insertions(+), 38 deletions(-)
diff --git a/paddle/fluid/operators/momentum_op.cc b/paddle/fluid/operators/momentum_op.cc
index fad6f80166c..12b916fcebd 100644
--- a/paddle/fluid/operators/momentum_op.cc
+++ b/paddle/fluid/operators/momentum_op.cc
@@ -45,12 +45,15 @@ class MomentumOp : public framework::OperatorWithKernel {
                    "Output(VelocityOut) of Momentum should not be null.");
 
     auto param_dim = ctx->GetInputDim("Param");
-    PADDLE_ENFORCE_EQ(
-        param_dim, ctx->GetInputDim("Grad"),
-        "Param and Grad input of MomentumOp should have the same dimension.");
-    PADDLE_ENFORCE_EQ(
-        param_dim, ctx->GetInputDim("Velocity"),
-        "Param and Velocity of MomentumOp should have the same dimension.");
+    if (ctx->GetInputsVarType("Grad")[0] ==
+        framework::proto::VarType::LOD_TENSOR) {
+      PADDLE_ENFORCE_EQ(
+          param_dim, ctx->GetInputDim("Grad"),
+          "Param and Grad input of MomentumOp should have the same dimension.");
+      PADDLE_ENFORCE_EQ(
+          param_dim, ctx->GetInputDim("Velocity"),
+          "Param and Velocity of MomentumOp should have the same dimension.");
+    }
     PADDLE_ENFORCE_EQ(framework::product(ctx->GetInputDim("LearningRate")), 1,
                       "Learning_rate should be a scalar");
 
diff --git a/paddle/fluid/operators/momentum_op.h b/paddle/fluid/operators/momentum_op.h
index 4a74c078e6a..6b4d00f56ca 100644
--- a/paddle/fluid/operators/momentum_op.h
+++ b/paddle/fluid/operators/momentum_op.h
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License. */
 
 #pragma once
+#include <string>
 #include "paddle/fluid/framework/eigen.h"
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/algorithm.h"
@@ -303,28 +304,30 @@ class MomentumOpKernel : public framework::OpKernel<T> {
       auto* merged_grad = const_cast<framework::Scope&>(ctx.scope())
                               .Var()
                               ->GetMutable<framework::SelectedRows>();
-
       math::scatter::MergeAdd<DeviceContext, T> merge_func;
       merge_func(ctx.template device_context<DeviceContext>(), *grad,
                  merged_grad);
 
-      platform::ForRange<DeviceContext> for_range(
-          static_cast<const DeviceContext&>(ctx.device_context()),
-          param->numel());
-
       const int64_t* rows = nullptr;
+#ifdef PADDLE_WITH_CUDA
       if (platform::is_gpu_place(ctx.GetPlace())) {
         rows = merged_grad->rows().CUDAData(ctx.GetPlace());
       } else {
+#endif
         rows = merged_grad->rows().data();
+#ifdef PADDLE_WITH_CUDA
       }
-
+#endif
+      int64_t row_numel =
+          merged_grad->value().numel() / merged_grad->rows().size();
+      platform::ForRange<DeviceContext> for_range(
+          static_cast<const DeviceContext&>(ctx.device_context()),
+          param->numel());
       if (use_nesterov) {
         SparseMomentumFunctor<T, UseNesterov> functor(
             param->data<T>(), merged_grad->value().data<T>(),
-            velocity->data<T>(), learning_rate->data<T>(), mu, rows,
+            velocity->data<T>(), learning_rate->data<T>(), mu, rows, row_numel,
             static_cast<int64_t>(merged_grad->rows().size()),
-            static_cast<int64_t>(merged_grad->height()),
             param_out->mutable_data<T>(ctx.GetPlace()),
             velocity_out->mutable_data<T>(ctx.GetPlace()));
         for_range(functor);
@@ -332,9 +335,8 @@ class MomentumOpKernel : public framework::OpKernel<T> {
       } else {
         SparseMomentumFunctor<T, NoNesterov> functor(
             param->data<T>(), merged_grad->value().data<T>(),
-            velocity->data<T>(), learning_rate->data<T>(), mu, rows,
+            velocity->data<T>(), learning_rate->data<T>(), mu, rows, row_numel,
             static_cast<int64_t>(merged_grad->rows().size()),
-            static_cast<int64_t>(merged_grad->height()),
             param_out->mutable_data<T>(ctx.GetPlace()),
             velocity_out->mutable_data<T>(ctx.GetPlace()));
         for_range(functor);
diff --git a/python/paddle/fluid/tests/unittests/test_momentum_op.py b/python/paddle/fluid/tests/unittests/test_momentum_op.py
index 9bbffaa7ebb..a3d89610b40 100644
--- a/python/paddle/fluid/tests/unittests/test_momentum_op.py
+++ b/python/paddle/fluid/tests/unittests/test_momentum_op.py
@@ -121,22 +121,13 @@ class TestSparseMomentumOp(unittest.TestCase):
         grad_tensor = grad_selected_rows.get_tensor()
         grad_tensor.set(grad_np_array, place)
 
-        velocity_selected_rows = scope.var('Velocity').get_selected_rows()
-        velocity_selected_rows.set_height(height)
-        velocity_selected_rows.set_rows(rows)
-        velocity_np_array = np.ones((len(rows), row_numel)).astype("float32")
-        velocity_np_array[0, 0] = 2.0
-        velocity_np_array[2, 8] = 2.0
-        velocity_tensor = velocity_selected_rows.get_tensor()
-        velocity_tensor.set(velocity_np_array, place)
-        velocity_out_selected_rows = scope.var('VelocityOut').get_selected_rows(
-        )
-        velocity_out_selected_rows.set_height(height)
-        velocity_out_selected_rows.set_rows(rows)
-        velocity_out_np_array = np.full((len(rows), row_numel),
+        velocity = scope.var('Velocity').get_tensor()
+        velocity_np_array = np.ones((height, row_numel)).astype("float32")
+        velocity.set(velocity_np_array, place)
+        velocity_out = scope.var('VelocityOut').get_tensor()
+        velocity_out_np_array = np.full((height, row_numel),
                                         0.0).astype("float32")
-        velocity_out_tensor = velocity_out_selected_rows.get_tensor()
-        velocity_out_tensor.set(velocity_out_np_array, place)
+        velocity_out.set(velocity_out_np_array, place)
 
         # create and initialize LeraningRate Variable
         lr = scope.var('LearningRate').get_tensor()
@@ -158,19 +149,22 @@ class TestSparseMomentumOp(unittest.TestCase):
 
         # get and compare result
         param_out_np_array = np.array(param_out)
-        velocity_out_np_array = np.array(velocity_out_tensor)
+        velocity_out_np_array = np.array(velocity_out)
 
         # TODO(dzh): add a more suitable general numpy interface
         # for sparse update.
-        _velocity_out = mu * velocity_np_array + grad_np_array
-        _param = param_array[rows]
+        _grad_np_array = np.full((height, row_numel), 0.0).astype("float32")
+        for i in range(len(rows)):
+            _grad_np_array[rows[i]] = grad_np_array[i]
+        _velocity_out = mu * velocity_np_array + _grad_np_array
+        _param = param_array
         if use_nesterov:
-            _param_out = _param - grad_np_array * lr_array - \
-                        _velocity_out * mu * lr_array
+            _param_out = _param - (_grad_np_array + _velocity_out * mu
+                                   ) * lr_array
         else:
-            _param_out = _param - lr * _velocity_out
-        self.assertTrue((_param_out == param_out_np_array[rows]).all())
+            _param_out = _param - lr_array * _velocity_out
         self.assertTrue((_velocity_out == velocity_out_np_array).all())
+        self.assertTrue((_param_out == param_out_np_array).all())
 
     def init_kernel(self):
         pass
-- 
GitLab