未验证 提交 acf3e526 编写于 作者: Z zhangkaihuo 提交者: GitHub

[Sparse]fix sparse bug (#53390)

上级 4ea1d041
......@@ -126,8 +126,13 @@ class SparseCooTensor : public TensorBase,
bool valid() const noexcept override { return non_zero_elements_.valid(); }
/// \brief Test whether the non_zero_elements_ storage is allocated.
/// return Whether the non_zero_elements_ storage is allocated.
bool initialized() const override { return non_zero_elements_.initialized(); }
/// In special cases, when nnz=0, non_zero_elements_ will not need to be
/// initialized, but it is neccessary to return true here, otherwise the
/// gradient will be None. return Whether the non_zero_elements_ storage is
/// allocated.
bool initialized() const override {
return values().initialized() || (nnz() == 0 && numel() > 0);
}
/// \brief resize sparse coo tensor.
/// \param dense_dims The dims of original dense tensor.
......
......@@ -131,8 +131,13 @@ class SparseCsrTensor : public TensorBase,
bool valid() const noexcept override { return non_zero_elements_.valid(); }
/// \brief Test whether the non_zero_elements_ storage is allocated.
/// return Whether the non_zero_elements_ storage is allocated.
bool initialized() const override { return non_zero_elements_.initialized(); }
/// In special cases, when nnz=0, non_zero_elements_ will not need to be
/// initialized, but it is neccessary to return true here, otherwise the
/// gradient will be None. return Whether the non_zero_elements_ storage is
/// allocated.
bool initialized() const override {
return values().initialized() || (nnz() == 0 && numel() > 0);
}
/// \brief resize sparse csr tensor.
/// \param dense_dims The dims of original dense tensor.
......
......@@ -113,13 +113,6 @@ void CsrToCooCPUKernel(const CPUContext& dev_ctx,
SparseCooTensor* out) {
const DDim& x_dims = x.dims();
const int64_t non_zero_num = x.cols().numel();
const auto& csr_crows = x.crows();
const auto& csr_cols = x.cols();
const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>();
const T* csr_values_data = csr_values.data<T>();
int64_t sparse_dim = 2;
if (x_dims.size() == 3) {
sparse_dim = 3;
......@@ -127,6 +120,17 @@ void CsrToCooCPUKernel(const CPUContext& dev_ctx,
phi::DenseTensor indices =
phi::Empty<IntT>(dev_ctx, {sparse_dim, non_zero_num});
phi::DenseTensor values = phi::Empty<T>(dev_ctx, {non_zero_num});
if (x.nnz() <= 0) {
out->SetMember(indices, values, x_dims, true);
return;
}
const auto& csr_crows = x.crows();
const auto& csr_cols = x.cols();
const auto& csr_values = x.values();
const IntT* csr_crows_data = csr_crows.data<IntT>();
const IntT* csr_cols_data = csr_cols.data<IntT>();
const T* csr_values_data = csr_values.data<T>();
IntT* coo_indices = indices.data<IntT>();
IntT* batch_ptr = x_dims.size() == 2 ? nullptr : coo_indices;
IntT* coo_rows_data =
......@@ -177,7 +181,6 @@ void CooToCsrCPUKernel(const CPUContext& dev_ctx,
phi::errors::InvalidArgument(
"SparseCsrTensor only support 2-D or 3-D matrix"));
const int64_t non_zero_num = x.nnz();
if (non_zero_num <= 0) return;
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
......@@ -185,6 +188,10 @@ void CooToCsrCPUKernel(const CPUContext& dev_ctx,
phi::DenseTensor crows = phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor values = phi::EmptyLike<T, CPUContext>(dev_ctx, x.values());
if (non_zero_num <= 0) {
out->SetMember(crows, cols, values, x_dims);
return;
}
IntT* csr_crows_data = crows.data<IntT>();
IntT* csr_cols_data = cols.data<IntT>();
T* csr_values_data = values.data<T>();
......@@ -268,6 +275,12 @@ void CooToDenseCPUKernel(const CPUContext& dev_ctx,
const T* x_data = values.data<T>();
dev_ctx.template Alloc<T>(out);
T* out_data = out->data<T>();
memset(out_data, 0, sizeof(T) * out->numel());
if (x.nnz() <= 0) {
return;
}
int64_t base_offset = 1;
for (int64_t i = 0; i < dense_dim; i++) {
base_offset *= dense_dims[sparse_dim + i];
......@@ -279,7 +292,6 @@ void CooToDenseCPUKernel(const CPUContext& dev_ctx,
offset *= dense_dims[i];
}
memset(out_data, 0, sizeof(T) * out->numel());
for (auto i = 0; i < non_zero_num; i++) {
int64_t index = 0;
for (int j = 0; j < sparse_dim; j++) {
......
......@@ -61,6 +61,13 @@ void MaskCooGPUKernel(const GPUContext& dev_ctx,
phi::errors::InvalidArgument("the input x and mask must have the shape"));
const DenseTensor& indices = mask.indices();
const DenseTensor& values = mask.values();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, values);
if (mask.nnz() <= 0) {
out->SetMember(out_indices, out_values, dims, true);
return;
}
const int sparse_dim = mask.sparse_dim();
DenseTensor sparse_offsets = phi::Empty<GPUContext>(
dev_ctx,
......@@ -75,9 +82,6 @@ void MaskCooGPUKernel(const GPUContext& dev_ctx,
gpuMemcpyHostToDevice,
dev_ctx.stream());
DenseTensor out_indices = phi::EmptyLike<T>(dev_ctx, indices);
DenseTensor out_values = phi::EmptyLike<T>(dev_ctx, values);
phi::Copy(dev_ctx, indices, dev_ctx.GetPlace(), false, &out_indices);
const IntT* indices_ptr = indices.data<IntT>();
......
......@@ -164,6 +164,7 @@ void DenseToCooKernel(const Context& dev_ctx,
T* sparse_data = dev_ctx.template Alloc<T>(&values);
// 3. calc indices by indexs and get values by indexs
if (non_zero_num > 0) {
config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num, 1);
GetNonZeroElementsAndIndices<<<config.block_per_grid.x,
config.thread_per_block.x,
......@@ -176,6 +177,7 @@ void DenseToCooKernel(const Context& dev_ctx,
temp_indexs_ptr,
indices_data,
sparse_data);
}
out->SetMember(indices, values, x_dims, true);
}
......@@ -218,6 +220,21 @@ void CsrToCooGPUKernel(const GPUContext& dev_ctx,
SparseCooTensor* out) {
const DDim& x_dims = x.dims();
const int64_t non_zero_num = x.cols().numel();
int64_t sparse_dim = 2;
if (x_dims.size() == 3) {
sparse_dim = 3;
}
if (x.nnz() <= 0) {
#ifdef PADDLE_WITH_HIP
DenseTensor indices = phi::Empty<int>(dev_ctx, {sparse_dim, non_zero_num});
#else
DenseTensor indices = phi::Empty<IntT>(dev_ctx, {sparse_dim, non_zero_num});
#endif
DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values());
out->SetMember(indices, values, x_dims, true);
return;
}
// rocsparse_csr2coo only support index with type 'rocsparse_int' (aka 'int')
// now
......@@ -235,10 +252,6 @@ void CsrToCooGPUKernel(const GPUContext& dev_ctx,
const auto& csr_values = x.values();
const T* csr_values_data = csr_values.data<T>();
int64_t sparse_dim = 2;
if (x_dims.size() == 3) {
sparse_dim = 3;
}
int batches = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
......@@ -395,7 +408,6 @@ void CooToCsrGPUKernel(const GPUContext& dev_ctx,
phi::errors::InvalidArgument(
"SparseCsrTensor only support 2-D or 3-D matrix"));
const int64_t non_zero_num = x.nnz();
if (non_zero_num <= 0) return;
int batchs = x_dims.size() == 2 ? 1 : x_dims[0];
int rows = x_dims.size() == 2 ? x_dims[0] : x_dims[1];
......@@ -403,6 +415,10 @@ void CooToCsrGPUKernel(const GPUContext& dev_ctx,
phi::DenseTensor crows = phi::Empty<IntT>(dev_ctx, {batchs * (rows + 1)});
phi::DenseTensor cols = phi::Empty<IntT>(dev_ctx, {non_zero_num});
phi::DenseTensor values = phi::EmptyLike<T, GPUContext>(dev_ctx, x.values());
if (non_zero_num <= 0) {
out->SetMember(crows, cols, values, x_dims);
return;
}
IntT* csr_crows_data = crows.data<IntT>();
IntT* csr_cols_data = cols.data<IntT>();
T* csr_values_data = values.data<T>();
......@@ -503,10 +519,17 @@ void CooToDenseGPUKernel(const GPUContext& dev_ctx,
const int64_t dense_dim = values.dims().size() - 1;
const auto place = dev_ctx.GetPlace();
const T* x_data = values.data<T>();
dev_ctx.template Alloc<T>(out);
T* out_data = out->data<T>();
phi::backends::gpu::GpuMemsetAsync(
out_data, 0, sizeof(T) * out->numel(), dev_ctx.stream());
if (x.nnz() <= 0) {
return;
}
const T* x_data = values.data<T>();
int64_t base_offset = 1;
for (int64_t i = 0; i < dense_dim; i++) {
base_offset *= dense_dims[sparse_dim + i];
......@@ -525,8 +548,6 @@ void CooToDenseGPUKernel(const GPUContext& dev_ctx,
sparse_dim * sizeof(int64_t),
gpuMemcpyHostToDevice,
dev_ctx.stream());
phi::backends::gpu::GpuMemsetAsync(
out_data, 0, sizeof(T) * out->numel(), dev_ctx.stream());
auto config =
phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, non_zero_num, 1);
......
......@@ -351,6 +351,27 @@ class TestSparseConvert(unittest.TestCase):
dense_x[2] = 0
verify(dense_x)
def test_zero_nnz(self):
for device in devices:
if device == 'cpu' or (
device == 'gpu' and paddle.is_compiled_with_cuda()
):
paddle.device.set_device(device)
x1 = paddle.zeros([2, 2, 2])
x2 = paddle.zeros([2, 2, 2])
sp_csr_x = x1.to_sparse_csr()
sp_coo_x = x2.to_sparse_coo(1)
sp_coo_x.stop_gradient = False
out1 = sp_csr_x.to_dense()
out2 = sp_coo_x.to_dense()
out2.backward()
np.testing.assert_allclose(out1.numpy(), x1.numpy())
np.testing.assert_allclose(out2.numpy(), x2.numpy())
np.testing.assert_allclose(
sp_coo_x.grad.to_dense().numpy().sum(), 0.0
)
class TestCooError(unittest.TestCase):
def test_small_shape(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册