fix gpu build error

f9681459 · qijun · ab8cc401 · f9681459 · f9681459 · f9681459
5 changed file
--- a/paddle/operators/sgd_op.cc
+++ b/paddle/operators/sgd_op.cc
@@ -61,7 +61,7 @@ param_out = param - learning_rate * grad;

 template <typename T>
 struct SparseSGDFunctor<platform::CPUPlace, T> {
-  void operator()(const platform::DeviceContext& ctx,
+  void operator()(const platform::DeviceContext& context,
                  const framework::SelectedRows& input,
                  const framework::Tensor& learning_rate,
                  framework::Tensor* output) {

--- a/paddle/operators/sgd_op.cu
+++ b/paddle/operators/sgd_op.cu
@@ -34,15 +34,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows,
  for (int index = tid; index < row_numel; index += block_size) {
    // Since index in rows of SelectedRows can be duplicate, we have to use
    // Atomic Operation to avoid concurrent write error.
-    paddle::platform::CudaAtomicSub(tensor_out + index,
-                                    learning_rate[0] * selected_rows[index]);
+    paddle::platform::CudaAtomicAdd(
+        tensor_out + index, -1.0 * learning_rate[0] * selected_rows[index]);
  }
 }
 }  // namespace

 template <typename T>
 struct SparseSGDFunctor<platform::GPUPlace, T> {
-  void operator()(const platform::DeviceContext& ctx,
+  void operator()(const platform::DeviceContext& context,
                  const framework::SelectedRows& input,
                  const framework::Tensor& learning_rate,
                  framework::Tensor* output) {

--- a/paddle/operators/sgd_op.h
+++ b/paddle/operators/sgd_op.h
@@ -22,7 +22,7 @@ namespace operators {

 template <typename Place, typename T>
 struct SparseSGDFunctor {
-  void operator()(const platform::DeviceContext& ctx,
+  void operator()(const platform::DeviceContext& context,
                  const framework::SelectedRows& input,
                  const framework::Tensor& learning_rate,
                  framework::Tensor* output);

--- a/paddle/pybind/pybind.cc
+++ b/paddle/pybind/pybind.cc
@@ -153,7 +153,15 @@ PYBIND11_PLUGIN(core) {
           py::return_value_policy::reference)
      .def("set_height", &SelectedRows::set_height)
      .def("height", &SelectedRows::height)
-      .def("set_rows", &SelectedRows::set_rows)
+      .def("set_rows",
+           [](SelectedRows &self, std::vector<int64_t> rows) {
+#ifndef PADDLE_WITH_CUDA
+             self.set_rows(rows);
+#else
+        Vector<int64_t> new_rows(rows);
+        self.set_rows(new_rows);
+#endif
+           })
      .def("rows", [](SelectedRows &self) {
 #ifndef PADDLE_WITH_CUDA
        return self.rows();

--- a/python/paddle/v2/framework/tests/test_sgd_op.py
+++ b/python/paddle/v2/framework/tests/test_sgd_op.py
@@ -20,11 +20,10 @@ class TestSGDOp(OpTest):


 class TestSparseSGDOp(unittest.TestCase):
-    def test_sparse_sgd(self):
+    def check_with_place(self, place):
        scope = core.Scope()

        # create and initialize Grad Variable   
-        place = core.CPUPlace()
        height = 10
        rows = [0, 4, 7]
        row_numel = 12
@@ -35,6 +34,7 @@ class TestSparseSGDOp(unittest.TestCase):
        np_array = np.ones((len(rows), row_numel)).astype("float32")
        np_array[0, 0] = 2.0
        np_array[2, 8] = 4.0
+
        grad_tensor = grad_selected_rows.get_tensor()
        grad_tensor.set(np_array, place)

@@ -76,6 +76,13 @@ class TestSparseSGDOp(unittest.TestCase):
        # rows[2] = 7, 5.0 - 2.0 * 4.0
        self.assertAlmostEqual(-3.0, result_array[rows[2], 8])

+    def test_sparse_sgd(self):
+        places = [core.CPUPlace()]
+        if core.is_compile_gpu():
+            places.append(core.GPUPlace(0))
+        for place in places:
+            self.check_with_place(place)
+

 if __name__ == "__main__":
    unittest.main()