提交 f9681459 编写于 作者: Q qijun

fix gpu build error

上级 ab8cc401
......@@ -61,7 +61,7 @@ param_out = param - learning_rate * grad;
template <typename T>
struct SparseSGDFunctor<platform::CPUPlace, T> {
void operator()(const platform::DeviceContext& ctx,
void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input,
const framework::Tensor& learning_rate,
framework::Tensor* output) {
......
......@@ -34,15 +34,15 @@ __global__ void SparseSGDFunctorKernel(const T* selected_rows,
for (int index = tid; index < row_numel; index += block_size) {
// Since index in rows of SelectedRows can be duplicate, we have to use
// Atomic Operation to avoid concurrent write error.
paddle::platform::CudaAtomicSub(tensor_out + index,
learning_rate[0] * selected_rows[index]);
paddle::platform::CudaAtomicAdd(
tensor_out + index, -1.0 * learning_rate[0] * selected_rows[index]);
}
}
} // namespace
template <typename T>
struct SparseSGDFunctor<platform::GPUPlace, T> {
void operator()(const platform::DeviceContext& ctx,
void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input,
const framework::Tensor& learning_rate,
framework::Tensor* output) {
......
......@@ -22,7 +22,7 @@ namespace operators {
template <typename Place, typename T>
struct SparseSGDFunctor {
void operator()(const platform::DeviceContext& ctx,
void operator()(const platform::DeviceContext& context,
const framework::SelectedRows& input,
const framework::Tensor& learning_rate,
framework::Tensor* output);
......
......@@ -153,7 +153,15 @@ PYBIND11_PLUGIN(core) {
py::return_value_policy::reference)
.def("set_height", &SelectedRows::set_height)
.def("height", &SelectedRows::height)
.def("set_rows", &SelectedRows::set_rows)
.def("set_rows",
[](SelectedRows &self, std::vector<int64_t> rows) {
#ifndef PADDLE_WITH_CUDA
self.set_rows(rows);
#else
Vector<int64_t> new_rows(rows);
self.set_rows(new_rows);
#endif
})
.def("rows", [](SelectedRows &self) {
#ifndef PADDLE_WITH_CUDA
return self.rows();
......
......@@ -20,11 +20,10 @@ class TestSGDOp(OpTest):
class TestSparseSGDOp(unittest.TestCase):
def test_sparse_sgd(self):
def check_with_place(self, place):
scope = core.Scope()
# create and initialize Grad Variable
place = core.CPUPlace()
height = 10
rows = [0, 4, 7]
row_numel = 12
......@@ -35,6 +34,7 @@ class TestSparseSGDOp(unittest.TestCase):
np_array = np.ones((len(rows), row_numel)).astype("float32")
np_array[0, 0] = 2.0
np_array[2, 8] = 4.0
grad_tensor = grad_selected_rows.get_tensor()
grad_tensor.set(np_array, place)
......@@ -76,6 +76,13 @@ class TestSparseSGDOp(unittest.TestCase):
# rows[2] = 7, 5.0 - 2.0 * 4.0
self.assertAlmostEqual(-3.0, result_array[rows[2], 8])
def test_sparse_sgd(self):
places = [core.CPUPlace()]
if core.is_compile_gpu():
places.append(core.GPUPlace(0))
for place in places:
self.check_with_place(place)
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册