提交 f9681459 编写于 作者: Q qijun

fix gpu build error

上级 ab8cc401
...@@ -61,7 +61,7 @@ param_out = param - learning_rate * grad; ...@@ -61,7 +61,7 @@ param_out = param - learning_rate * grad;
template <typename T> template <typename T>
struct SparseSGDFunctor<platform::CPUPlace, T> { struct SparseSGDFunctor<platform::CPUPlace, T> {
void operator()(const platform::DeviceContext& ctx, void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input, const framework::SelectedRows& input,
const framework::Tensor& learning_rate, const framework::Tensor& learning_rate,
framework::Tensor* output) { framework::Tensor* output) {
......
...@@ -34,15 +34,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows, ...@@ -34,15 +34,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows,
for (int index = tid; index < row_numel; index += block_size) { for (int index = tid; index < row_numel; index += block_size) {
// Since index in rows of SelectedRows can be duplicate, we have to use // Since index in rows of SelectedRows can be duplicate, we have to use
// Atomic Operation to avoid concurrent write error. // Atomic Operation to avoid concurrent write error.
paddle::platform::CudaAtomicSub(tensor_out + index, paddle::platform::CudaAtomicAdd(
learning_rate[0] * selected_rows[index]); tensor_out + index, -1.0 * learning_rate[0] * selected_rows[index]);
} }
} }
} // namespace } // namespace
template <typename T> template <typename T>
struct SparseSGDFunctor<platform::GPUPlace, T> { struct SparseSGDFunctor<platform::GPUPlace, T> {
void operator()(const platform::DeviceContext& ctx, void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input, const framework::SelectedRows& input,
const framework::Tensor& learning_rate, const framework::Tensor& learning_rate,
framework::Tensor* output) { framework::Tensor* output) {
......
...@@ -22,7 +22,7 @@ namespace operators { ...@@ -22,7 +22,7 @@ namespace operators {
template <typename Place, typename T> template <typename Place, typename T>
struct SparseSGDFunctor { struct SparseSGDFunctor {
void operator()(const platform::DeviceContext& ctx, void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input, const framework::SelectedRows& input,
const framework::Tensor& learning_rate, const framework::Tensor& learning_rate,
framework::Tensor* output); framework::Tensor* output);
......
...@@ -153,7 +153,15 @@ PYBIND11_PLUGIN(core) { ...@@ -153,7 +153,15 @@ PYBIND11_PLUGIN(core) {
py::return_value_policy::reference) py::return_value_policy::reference)
.def("set_height", &SelectedRows::set_height) .def("set_height", &SelectedRows::set_height)
.def("height", &SelectedRows::height) .def("height", &SelectedRows::height)
.def("set_rows", &SelectedRows::set_rows) .def("set_rows",
[](SelectedRows &self, std::vector<int64_t> rows) {
#ifndef PADDLE_WITH_CUDA
self.set_rows(rows);
#else
Vector<int64_t> new_rows(rows);
self.set_rows(new_rows);
#endif
})
.def("rows", [](SelectedRows &self) { .def("rows", [](SelectedRows &self) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
return self.rows(); return self.rows();
......
...@@ -20,11 +20,10 @@ class TestSGDOp(OpTest): ...@@ -20,11 +20,10 @@ class TestSGDOp(OpTest):
class TestSparseSGDOp(unittest.TestCase): class TestSparseSGDOp(unittest.TestCase):
def test_sparse_sgd(self): def check_with_place(self, place):
scope = core.Scope() scope = core.Scope()
# create and initialize Grad Variable # create and initialize Grad Variable
place = core.CPUPlace()
height = 10 height = 10
rows = [0, 4, 7] rows = [0, 4, 7]
row_numel = 12 row_numel = 12
...@@ -35,6 +34,7 @@ class TestSparseSGDOp(unittest.TestCase): ...@@ -35,6 +34,7 @@ class TestSparseSGDOp(unittest.TestCase):
np_array = np.ones((len(rows), row_numel)).astype("float32") np_array = np.ones((len(rows), row_numel)).astype("float32")
np_array[0, 0] = 2.0 np_array[0, 0] = 2.0
np_array[2, 8] = 4.0 np_array[2, 8] = 4.0
grad_tensor = grad_selected_rows.get_tensor() grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(np_array, place) grad_tensor.set(np_array, place)
...@@ -76,6 +76,13 @@ class TestSparseSGDOp(unittest.TestCase): ...@@ -76,6 +76,13 @@ class TestSparseSGDOp(unittest.TestCase):
# rows[2] = 7, 5.0 - 2.0 * 4.0 # rows[2] = 7, 5.0 - 2.0 * 4.0
self.assertAlmostEqual(-3.0, result_array[rows[2], 8]) self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
def test_sparse_sgd(self):
places = [core.CPUPlace()]
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for place in places:
self.check_with_place(place)
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册