diff --git a/paddle/phi/kernels/sparse/convolution_grad_kernel.h b/paddle/phi/kernels/sparse/convolution_grad_kernel.h index 23e059c72e77615e2c24aed961d22b3154c30449..5a47575141a2d6dc474a5a1abbdf63c77e58098e 100644 --- a/paddle/phi/kernels/sparse/convolution_grad_kernel.h +++ b/paddle/phi/kernels/sparse/convolution_grad_kernel.h @@ -25,37 +25,37 @@ namespace sparse { template void Conv3dGradKernel(const Context& dev_ctx, const SparseCooTensor& x, - const DenseTensor& rulebook, const DenseTensor& kernel, - const DenseTensor& out_grad, + const DenseTensor& rulebook, + const SparseCooTensor& out_grad, const std::vector& paddings, const std::vector& dilations, const std::vector& strides, const int groups, const bool subm, - DenseTensor* x_grad, + SparseCooTensor* x_grad, DenseTensor* kernel_grad); template -std::vector Conv3dGrad(const Context& dev_ctx, - const SparseCooTensor& x, - const DenseTensor& rulebook, - const DenseTensor& kernel, - const DenseTensor& out_grad, - const std::vector& paddings, - const std::vector& dilations, - const std::vector& strides, - const int groups, - const bool subm) { - DenseTensor x_grad = - phi::Empty(dev_ctx, DenseTensorMeta(x.dtype(), {1}, x.layout())); +std::tuple Conv3dGrad( + const Context& dev_ctx, + const SparseCooTensor& x, + const DenseTensor& kernel, + const DenseTensor& rulebook, + const SparseCooTensor& out_grad, + const std::vector& paddings, + const std::vector& dilations, + const std::vector& strides, + const int groups, + const bool subm) { + SparseCooTensor x_grad; DenseTensor kernel_grad = phi::Empty( dev_ctx, DenseTensorMeta(kernel.dtype(), {1}, kernel.layout())); // TODO(zhangkaihuo): call InferMeta func here Conv3dGradKernel(dev_ctx, x, - rulebook, kernel, + rulebook, out_grad, paddings, dilations, @@ -64,10 +64,7 @@ std::vector Conv3dGrad(const Context& dev_ctx, subm, &x_grad, &kernel_grad); - std::vector out(2); - out[0] = x_grad; - out[1] = kernel_grad; - return out; + return std::make_tuple(x_grad, kernel_grad); } } // namespace sparse diff --git a/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc b/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc index 3348d81cf6b4bbffe7f6db24dbe12fef24cadf40..29079918cbf86b1821234adfbe731a3889586605 100644 --- a/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc +++ b/paddle/phi/kernels/sparse/cpu/convolution_grad_kernel.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h" +#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/sparse/cpu/convolution.h" @@ -31,15 +32,15 @@ namespace sparse { template void Conv3dGradKernel(const Context& dev_ctx, const SparseCooTensor& x, - const DenseTensor& rulebook, const DenseTensor& kernel, - const DenseTensor& out_grad, + const DenseTensor& rulebook, + const SparseCooTensor& out_grad, const std::vector& paddings, const std::vector& dilations, const std::vector& strides, const int groups, const bool subm, - DenseTensor* x_grad, + SparseCooTensor* x_grad, DenseTensor* kernel_grad) { const auto& kernel_dims = kernel.dims(); const int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; @@ -73,11 +74,18 @@ void Conv3dGradKernel(const Context& dev_ctx, int half_kernel_size = kernel_size / 2; auto blas = phi::funcs::GetBlas(dev_ctx); - x_grad->Resize(x.non_zero_elements().dims()); - dev_ctx.Alloc(x_grad, x_grad->dtype(), sizeof(T) * x_grad->numel()); - T* x_grad_values_ptr = x_grad->data(); - memset(x_grad_values_ptr, 0, sizeof(T) * x_grad->numel()); + DenseTensor x_grad_indices = + phi::EmptyLike(dev_ctx, x.non_zero_indices()); + DenseTensor x_grad_values = phi::EmptyLike(dev_ctx, x.non_zero_elements()); + T* x_grad_values_ptr = x_grad_values.data(); + memset(x_grad_values_ptr, 0, sizeof(T) * x_grad_values.numel()); memset(d_x_features_ptr, 0, sizeof(T) * d_x_features.numel()); + phi::Copy(dev_ctx, + x.non_zero_indices(), + dev_ctx.GetPlace(), + false, + &x_grad_indices); + x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); std::vector offsets(kernel_size + 1), counter(kernel_size, 0); for (int i = 0; i < rulebook_len; i++) { @@ -97,12 +105,12 @@ void Conv3dGradKernel(const Context& dev_ctx, phi::funcs::sparse::SubmPreProcess(dev_ctx, x, kernel, - out_grad, + out_grad.non_zero_elements(), in_channels, out_channels, half_kernel_size, kernel_grad, - x_grad); + &x_grad_values); if (max_count == 0) { return; } @@ -113,7 +121,7 @@ void Conv3dGradKernel(const Context& dev_ctx, rulebook_len, in_channels, in_features_ptr); - Gather(out_grad.data(), + Gather(out_grad.non_zero_elements().data(), rulebook_ptr + rulebook_len * 2, rulebook_len, out_channels, diff --git a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu index 4db0a0b0011b5a664b66d54f6d42f2e1954ccd12..4a6094c23bc79108e31eff84d261af7ba7eb4fbf 100644 --- a/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu +++ b/paddle/phi/kernels/sparse/gpu/convolution_grad_kernel.cu @@ -12,11 +12,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "glog/logging.h" #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/tensor_meta.h" +#include "paddle/phi/kernels/copy_kernel.h" #include "paddle/phi/kernels/funcs/blas/blas.h" #include "paddle/phi/kernels/funcs/math_function.h" #include "paddle/phi/kernels/sparse/convolution_grad_kernel.h" @@ -36,15 +38,15 @@ namespace sparse { template void Conv3dGradKernel(const Context& dev_ctx, const SparseCooTensor& x, - const DenseTensor& rulebook, const DenseTensor& kernel, - const DenseTensor& out_grad, + const DenseTensor& rulebook, + const SparseCooTensor& out_grad, const std::vector& paddings, const std::vector& dilations, const std::vector& strides, const int groups, const bool subm, - DenseTensor* x_grad, + SparseCooTensor* x_grad, DenseTensor* kernel_grad) { const auto& kernel_dims = kernel.dims(); const int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; @@ -70,17 +72,25 @@ void Conv3dGradKernel(const Context& dev_ctx, T* in_features_ptr = in_features.data(); T* d_x_features_ptr = d_x_features.data(); T* out_grad_features_ptr = out_grad_features.data(); - kernel_grad->ResizeAndAllocate(kernel_dims); + *kernel_grad = phi::EmptyLike(dev_ctx, kernel); T* d_kernel_ptr = kernel_grad->data(); phi::funcs::SetConstant set_zero; set_zero(dev_ctx, kernel_grad, static_cast(0.0f)); int half_kernel_size = kernel_size / 2; auto blas = phi::funcs::GetBlas(dev_ctx); - x_grad->ResizeAndAllocate(x.non_zero_elements().dims()); - T* x_grad_values_ptr = x_grad->data(); - set_zero(dev_ctx, x_grad, static_cast(0.0f)); + DenseTensor x_grad_indices = + phi::EmptyLike(dev_ctx, x.non_zero_indices()); + DenseTensor x_grad_values = phi::EmptyLike(dev_ctx, x.non_zero_elements()); + T* x_grad_values_ptr = x_grad_values.data(); + set_zero(dev_ctx, &x_grad_values, static_cast(0.0f)); set_zero(dev_ctx, &d_x_features, static_cast(0.0f)); + phi::Copy(dev_ctx, + x.non_zero_indices(), + dev_ctx.GetPlace(), + false, + &x_grad_indices); + x_grad->SetMember(x_grad_indices, x_grad_values, x.dims(), true); std::vector offsets(kernel_size + 1), counter(kernel_size, 0), h_counter(rulebook_len, 0); @@ -113,12 +123,12 @@ void Conv3dGradKernel(const Context& dev_ctx, phi::funcs::sparse::SubmPreProcess(dev_ctx, x, kernel, - out_grad, + out_grad.non_zero_elements(), in_channels, out_channels, half_kernel_size, kernel_grad, - x_grad); + &x_grad_values); if (max_count == 0) { return; } @@ -140,11 +150,12 @@ void Conv3dGradKernel(const Context& dev_ctx, GatherKernel<<>>(out_grad.data(), - rulebook_ptr + rulebook_len * 2, - out_grad_features_ptr, - rulebook_len, - out_channels); + dev_ctx.stream()>>>( + out_grad.non_zero_elements().data(), + rulebook_ptr + rulebook_len * 2, + out_grad_features_ptr, + rulebook_len, + out_channels); const T* kernel_ptr = kernel.data(); for (int i = 0; i < kernel_size; i++) { @@ -189,7 +200,7 @@ void Conv3dGradKernel(const Context& dev_ctx, } // 4. scatter - x_grad->ResizeAndAllocate(x.non_zero_elements().dims()); + // x_grad->ResizeAndAllocate(x.non_zero_elements().dims()); DenseTensorMeta index_meta(DataType::INT32, {rulebook_len}, DataLayout::NCHW); DenseTensor out_index = phi::Empty(dev_ctx, std::move(index_meta)); DenseTensor unique_key = phi::Empty(dev_ctx, std::move(index_meta)); diff --git a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc index 33f84db76e78eec6710abea3a93a06f1eaa55408..c22464e538c2118671afa85c5af70dd09ad56927 100644 --- a/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc +++ b/paddle/phi/tests/kernels/test_sparse_conv3d_dev_api.cc @@ -71,6 +71,10 @@ void TestConv3dBase(const std::vector& indices, paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); + dev_ctx_cpu.SetHostAllocator( + paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CPUPlace()) + .get()); dev_ctx_cpu.Init(); const int in_channels = kernel_dims[3]; @@ -132,19 +136,19 @@ void TestConv3dBase(const std::vector& indices, f_verify(out.non_zero_elements().data(), correct_out_features); if (backward) { - std::vector grads = + std::tuple grads = sparse::Conv3dGrad(dev_ctx_cpu, x_tensor, - rulebook, kernel_tensor, - out.non_zero_elements(), + rulebook, + out, paddings, dilations, strides, 1, subm); - f_verify(grads[0].data(), features_grad); - f_verify(grads[1].data(), kernel_grad); + f_verify(std::get<0>(grads).non_zero_elements().data(), features_grad); + f_verify(std::get<1>(grads).data(), kernel_grad); } } @@ -233,23 +237,28 @@ void TestConv3dBase(const std::vector& indices, f_verify(h_features_tensor.data(), correct_out_features); if (backward) { - std::vector grads = + std::tuple grads = sparse::Conv3dGrad(dev_ctx_gpu, d_x_tensor, - d_rulebook, d_kernel_tensor, - d_out.non_zero_elements(), + d_rulebook, + d_out, paddings, dilations, strides, 1, subm); - DenseTensor h_features_grad = phi::EmptyLike(dev_ctx_cpu, grads[0]); - phi::Copy(dev_ctx_gpu, grads[0], phi::CPUPlace(), true, &h_features_grad); + DenseTensor d_features_grad = std::get<0>(grads).non_zero_elements(); + DenseTensor d_kernel_grad = std::get<1>(grads); + DenseTensor h_features_grad = + phi::EmptyLike(dev_ctx_cpu, d_features_grad); + phi::Copy( + dev_ctx_gpu, d_features_grad, phi::CPUPlace(), true, &h_features_grad); f_verify(h_features_grad.data(), features_grad); - DenseTensor h_kernel_grad = phi::EmptyLike(dev_ctx_cpu, grads[1]); - phi::Copy(dev_ctx_gpu, grads[1], phi::CPUPlace(), true, &h_kernel_grad); + DenseTensor h_kernel_grad = phi::EmptyLike(dev_ctx_cpu, d_kernel_grad); + phi::Copy( + dev_ctx_gpu, std::get<1>(grads), phi::CPUPlace(), true, &h_kernel_grad); f_verify(h_kernel_grad.data(), kernel_grad); } #endif diff --git a/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py new file mode 100644 index 0000000000000000000000000000000000000000..075806a93b07d049b84542af96753d98bcca429a --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sparse_conv_op.py @@ -0,0 +1,54 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function +import unittest +import numpy as np +import paddle +from paddle import _C_ops +from paddle.fluid import core +from paddle.fluid.framework import _test_eager_guard + + +class TestSparseConv(unittest.TestCase): + def test_conv3d(self): + with _test_eager_guard(): + kernel = [[[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]] + dense_kernel = paddle.to_tensor( + kernel, dtype='float32', stop_gradient=False) + dense_kernel = paddle.reshape(dense_kernel, [1, 3, 3, 1, 1]) + paddings = [0, 0, 0] + strides = [1, 1, 1] + dilations = [1, 1, 1] + + indices = [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]] + values = [1, 2, 3, 4] + indices = paddle.to_tensor(indices, dtype='int32') + values = paddle.to_tensor(values, dtype='float32') + dense_shape = [1, 1, 3, 4, 1] + correct_out_values = [[4], [10]] + sparse_input = core.eager.sparse_coo_tensor(indices, values, + dense_shape, False) + out = _C_ops.final_state_sparse_conv3d(sparse_input, dense_kernel, + paddings, dilations, strides, + 1, False) + out.backward(out) + #At present, only backward can be verified to work normally + #TODO(zhangkaihuo): compare the result with dense conv + print(sparse_input.grad.non_zero_elements()) + assert np.array_equal(correct_out_values, + out.non_zero_elements().numpy()) + + +#TODO: Add more test case diff --git a/python/paddle/utils/code_gen/sparse_bw_api.yaml b/python/paddle/utils/code_gen/sparse_bw_api.yaml index 1f474d56a9022c9ee63065b115ba23abcf65eb45..7ffc906b220840ca5ef3188d01334ab8b25639ea 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api.yaml +++ b/python/paddle/utils/code_gen/sparse_bw_api.yaml @@ -1,7 +1,7 @@ - backward_api : conv3d_grad forward : conv3d (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm) -> Tensor(out@SparseCooTensor), Tensor(rulebook@DenseTensor) args : (Tensor x, Tensor kernel, Tensor rulebook, Tensor out_grad, int[] paddings, int[] dilations, int[] strides, int groups, bool subm) - output : Tensor(x_grad@DenseTensor), Tensor(kernel_grad@DenseTensor) + output : Tensor(x_grad@SparseCooTensor), Tensor(kernel_grad@DenseTensor) kernel : func : sparse_conv3d_grad