From f9681459b2075e8067e6bda45a62967fc4baec62 Mon Sep 17 00:00:00 2001 From: qijun Date: Tue, 17 Oct 2017 16:33:52 -0700 Subject: [PATCH] fix gpu build error --- paddle/operators/sgd_op.cc | 2 +- paddle/operators/sgd_op.cu | 6 +++--- paddle/operators/sgd_op.h | 2 +- paddle/pybind/pybind.cc | 10 +++++++++- python/paddle/v2/framework/tests/test_sgd_op.py | 11 +++++++++-- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index e26a1c7893..2acb96d1b4 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -61,7 +61,7 @@ param_out = param - learning_rate * grad; template struct SparseSGDFunctor { - void operator()(const platform::DeviceContext& ctx, + void operator()(const platform::DeviceContext& context, const framework::SelectedRows& input, const framework::Tensor& learning_rate, framework::Tensor* output) { diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 5c28314141..106f9b746b 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -34,15 +34,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows, for (int index = tid; index < row_numel; index += block_size) { // Since index in rows of SelectedRows can be duplicate, we have to use // Atomic Operation to avoid concurrent write error. - paddle::platform::CudaAtomicSub(tensor_out + index, - learning_rate[0] * selected_rows[index]); + paddle::platform::CudaAtomicAdd( + tensor_out + index, -1.0 * learning_rate[0] * selected_rows[index]); } } } // namespace template struct SparseSGDFunctor { - void operator()(const platform::DeviceContext& ctx, + void operator()(const platform::DeviceContext& context, const framework::SelectedRows& input, const framework::Tensor& learning_rate, framework::Tensor* output) { diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index 8c28d5e66b..78b595fc6c 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -22,7 +22,7 @@ namespace operators { template struct SparseSGDFunctor { - void operator()(const platform::DeviceContext& ctx, + void operator()(const platform::DeviceContext& context, const framework::SelectedRows& input, const framework::Tensor& learning_rate, framework::Tensor* output); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 65e265b614..80854fb0c5 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -153,7 +153,15 @@ PYBIND11_PLUGIN(core) { py::return_value_policy::reference) .def("set_height", &SelectedRows::set_height) .def("height", &SelectedRows::height) - .def("set_rows", &SelectedRows::set_rows) + .def("set_rows", + [](SelectedRows &self, std::vector rows) { +#ifndef PADDLE_WITH_CUDA + self.set_rows(rows); +#else + Vector new_rows(rows); + self.set_rows(new_rows); +#endif + }) .def("rows", [](SelectedRows &self) { #ifndef PADDLE_WITH_CUDA return self.rows(); diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index c7d6a3b345..01262bba4d 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -20,11 +20,10 @@ class TestSGDOp(OpTest): class TestSparseSGDOp(unittest.TestCase): - def test_sparse_sgd(self): + def check_with_place(self, place): scope = core.Scope() # create and initialize Grad Variable - place = core.CPUPlace() height = 10 rows = [0, 4, 7] row_numel = 12 @@ -35,6 +34,7 @@ class TestSparseSGDOp(unittest.TestCase): np_array = np.ones((len(rows), row_numel)).astype("float32") np_array[0, 0] = 2.0 np_array[2, 8] = 4.0 + grad_tensor = grad_selected_rows.get_tensor() grad_tensor.set(np_array, place) @@ -76,6 +76,13 @@ class TestSparseSGDOp(unittest.TestCase): # rows[2] = 7, 5.0 - 2.0 * 4.0 self.assertAlmostEqual(-3.0, result_array[rows[2], 8]) + def test_sparse_sgd(self): + places = [core.CPUPlace()] + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + for place in places: + self.check_with_place(place) + if __name__ == "__main__": unittest.main() -- GitLab