diff --git a/paddle/operators/sgd_op.cc b/paddle/operators/sgd_op.cc index e26a1c7893d6b47fb8126dba646f7f428e973195..2acb96d1b4f5903ff6c57b10e7621c8adaf73171 100644 --- a/paddle/operators/sgd_op.cc +++ b/paddle/operators/sgd_op.cc @@ -61,7 +61,7 @@ param_out = param - learning_rate * grad; template struct SparseSGDFunctor { - void operator()(const platform::DeviceContext& ctx, + void operator()(const platform::DeviceContext& context, const framework::SelectedRows& input, const framework::Tensor& learning_rate, framework::Tensor* output) { diff --git a/paddle/operators/sgd_op.cu b/paddle/operators/sgd_op.cu index 5c28314141cebe606ea4d32cd0dd0deee9406d14..106f9b746ba6614d8fa68b677c47ec04ed26fb81 100644 --- a/paddle/operators/sgd_op.cu +++ b/paddle/operators/sgd_op.cu @@ -34,15 +34,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows, for (int index = tid; index < row_numel; index += block_size) { // Since index in rows of SelectedRows can be duplicate, we have to use // Atomic Operation to avoid concurrent write error. - paddle::platform::CudaAtomicSub(tensor_out + index, - learning_rate[0] * selected_rows[index]); + paddle::platform::CudaAtomicAdd( + tensor_out + index, -1.0 * learning_rate[0] * selected_rows[index]); } } } // namespace template struct SparseSGDFunctor { - void operator()(const platform::DeviceContext& ctx, + void operator()(const platform::DeviceContext& context, const framework::SelectedRows& input, const framework::Tensor& learning_rate, framework::Tensor* output) { diff --git a/paddle/operators/sgd_op.h b/paddle/operators/sgd_op.h index 8c28d5e66b3f17be97d80bde653dfd5f8f7e77b5..78b595fc6c63d775b627f23cafa9458f1dadd4e5 100644 --- a/paddle/operators/sgd_op.h +++ b/paddle/operators/sgd_op.h @@ -22,7 +22,7 @@ namespace operators { template struct SparseSGDFunctor { - void operator()(const platform::DeviceContext& ctx, + void operator()(const platform::DeviceContext& context, const framework::SelectedRows& input, const framework::Tensor& learning_rate, framework::Tensor* output); diff --git a/paddle/pybind/pybind.cc b/paddle/pybind/pybind.cc index 65e265b6142c922f8c2904ece465cbbb661d4ad5..80854fb0c5013fe0de857fba93749f9f3574b2ab 100644 --- a/paddle/pybind/pybind.cc +++ b/paddle/pybind/pybind.cc @@ -153,7 +153,15 @@ PYBIND11_PLUGIN(core) { py::return_value_policy::reference) .def("set_height", &SelectedRows::set_height) .def("height", &SelectedRows::height) - .def("set_rows", &SelectedRows::set_rows) + .def("set_rows", + [](SelectedRows &self, std::vector rows) { +#ifndef PADDLE_WITH_CUDA + self.set_rows(rows); +#else + Vector new_rows(rows); + self.set_rows(new_rows); +#endif + }) .def("rows", [](SelectedRows &self) { #ifndef PADDLE_WITH_CUDA return self.rows(); diff --git a/python/paddle/v2/framework/tests/test_sgd_op.py b/python/paddle/v2/framework/tests/test_sgd_op.py index c7d6a3b345f6d5f88a7c263a3b1169a6e34e1174..01262bba4d43adaed179baef88ccab6e69b0884b 100644 --- a/python/paddle/v2/framework/tests/test_sgd_op.py +++ b/python/paddle/v2/framework/tests/test_sgd_op.py @@ -20,11 +20,10 @@ class TestSGDOp(OpTest): class TestSparseSGDOp(unittest.TestCase): - def test_sparse_sgd(self): + def check_with_place(self, place): scope = core.Scope() # create and initialize Grad Variable - place = core.CPUPlace() height = 10 rows = [0, 4, 7] row_numel = 12 @@ -35,6 +34,7 @@ class TestSparseSGDOp(unittest.TestCase): np_array = np.ones((len(rows), row_numel)).astype("float32") np_array[0, 0] = 2.0 np_array[2, 8] = 4.0 + grad_tensor = grad_selected_rows.get_tensor() grad_tensor.set(np_array, place) @@ -76,6 +76,13 @@ class TestSparseSGDOp(unittest.TestCase): # rows[2] = 7, 5.0 - 2.0 * 4.0 self.assertAlmostEqual(-3.0, result_array[rows[2], 8]) + def test_sparse_sgd(self): + places = [core.CPUPlace()] + if core.is_compile_gpu(): + places.append(core.GPUPlace(0)) + for place in places: + self.check_with_place(place) + if __name__ == "__main__": unittest.main()