diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 09db68399f332ba6411bc72c472ab014ee763205..0cfe8942ae0f965b302a4a6af58fc894858ae256 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -22,6 +22,7 @@ #include "paddle/fluid/platform/errors.h" #include "paddle/phi/api/all.h" #include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/sparse_coo_tensor.h" namespace egr { @@ -49,6 +50,22 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, paddle::imperative::SelectedRowsAddTensor(*tensor, t, &new_buffer); tensor->set_impl(new_buffer.impl()); } + } else if (LIKELY(t.is_sparse_coo_tensor())) { + // In fact, the gradient of SparseTensor is still a SparseTensor + if (LIKELY(tensor->is_sparse_coo_tensor())) { + auto t_sparse = + std::dynamic_pointer_cast(t.impl()); + paddle::experimental::Tensor t_values( + std::make_shared( + t_sparse->non_zero_elements())); + auto tensor_sparse = + std::dynamic_pointer_cast(tensor->impl()); + paddle::experimental::Tensor tensor_values( + std::make_shared( + tensor_sparse->non_zero_elements())); + paddle::imperative::TensorAdd( + t_values, &tensor_values); + } } else { // TODO(jiabin): Support Other TensorBase later // TODO(zhanlve): Replace SelectedRowsAddTensor with diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index 6abf759cdba7aebfeb776f23611b4f2622be8e21..af2fcee6084f122280bac3886a8a40e1159b7b25 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -18,6 +18,7 @@ #include "paddle/fluid/framework/convert_utils.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/imperative/gradient_accumulator.h" +#include "paddle/phi/core/sparse_coo_tensor.h" #include "paddle/phi/kernels/funcs/math_function.h" namespace egr { @@ -130,6 +131,25 @@ void GradTensorHolder::add(size_t slot_id, size_t rank, &new_buffer); buffer_tensor.set_impl(new_buffer.impl()); } + } else if (t.is_sparse_coo_tensor()) { + auto t_sparse = std::dynamic_pointer_cast(t.impl()); + paddle::experimental::Tensor t_values( + std::make_shared(t_sparse->non_zero_elements())); + // In fact, the gradient of SparseTensor is still a SparseTensor + if (buffer_tensor.is_sparse_coo_tensor()) { + auto buffer_sparse = std::dynamic_pointer_cast( + buffer_tensor.impl()); + paddle::experimental::Tensor buffer_values( + std::make_shared( + buffer_sparse->non_zero_elements())); + if (create_graph) { + buffer_values = + add_final_state_dygraph_function(t_values, buffer_values); + } else { + paddle::imperative::TensorAdd( + t_values, &buffer_values); + } + } } else { // TODO(jiabin): Support Other TensorBase later // TODO(zhanlve): Replace SelectedRowsAddTensor with add_dygraph_function diff --git a/python/paddle/fluid/tests/unittests/test_sparse_model.py b/python/paddle/fluid/tests/unittests/test_sparse_model.py new file mode 100644 index 0000000000000000000000000000000000000000..90f30e383174c53f923b3e7ac4a58823f16d36eb --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sparse_model.py @@ -0,0 +1,64 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +import numpy as np +import paddle +from paddle.incubate import sparse +from paddle.incubate.sparse import nn +from paddle.fluid.framework import _test_eager_guard + + +class TestGradientAdd(unittest.TestCase): + + def sparse(self, sp_x): + indentity = sp_x + out = nn.functional.relu(sp_x) + values = out.values() + indentity.values() + out = sparse.sparse_coo_tensor(out.indices(), + values, + shape=out.shape, + stop_gradient=out.stop_gradient) + return out + + def dense(self, x): + indentity = x + out = paddle.nn.functional.relu(x) + out = out + indentity + return out + + def test(self): + with _test_eager_guard(): + x = paddle.randn((3, 3)) + sparse_x = x.to_sparse_coo(sparse_dim=2) + + x.stop_gradient = False + sparse_x.stop_gradient = False + + dense_out = self.dense(x) + loss = dense_out.mean() + loss.backward(retain_graph=True) + + sparse_out = self.sparse(sparse_x) + sparse_loss = sparse_out.values().mean() + sparse_loss.backward(retain_graph=True) + + assert np.allclose(dense_out.numpy(), sparse_out.to_dense().numpy()) + assert np.allclose(loss.numpy(), loss.numpy()) + assert np.allclose(x.grad.numpy(), sparse_x.grad.to_dense().numpy()) + + loss.backward() + sparse_loss.backward() + + assert np.allclose(x.grad.numpy(), sparse_x.grad.to_dense().numpy())