未验证 提交 8492d3bb 编写于 作者: Z zhangkaihuo 提交者: GitHub

The backward code of Sparse Conv3d (#40054)

Sparse Conv3d backward code
上级 28795771
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/kernels/empty_kernel.h"
namespace phi {
namespace sparse {
template <typename T, typename Context>
void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const int groups,
DenseTensor* x_grad,
DenseTensor* kernel_grad);
template <typename T, typename Context>
std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const int groups) {
DenseTensor x_grad = phi::Empty<T, Context>(dev_ctx);
DenseTensor kernel_grad = phi::Empty<T, Context>(dev_ctx);
Conv3dGradKernel<T, Context>(dev_ctx,
x,
rulebook,
kernel,
out_grad,
paddings,
dilations,
strides,
groups,
&x_grad,
&kernel_grad);
std::vector<DenseTensor> out(2);
out[0] = x_grad;
out[1] = kernel_grad;
return out;
}
} // namespace sparse
} // namespace phi
......@@ -23,6 +23,7 @@ limitations under the License. */
#include "paddle/phi/core/sparse_coo_tensor.h"
#include "paddle/phi/core/tensor_meta.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/sparse/convolution_kernel.h"
namespace phi {
namespace sparse {
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/sparse/convolution_grad_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/sparse/cpu/convolution.h"
namespace phi {
namespace sparse {
// rulebook:
//[
// [kernel_index],
// [in_i],
// [out_i],
//]
// x_grad = out_grad * transpose(kenrel)
// kernel_grad = transpose(x) * out_grad
template <typename T, typename Context>
void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
const int groups,
DenseTensor* x_grad,
DenseTensor* kernel_grad) {
const auto& kernel_dims = kernel.dims();
const int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
const int in_channels = kernel_dims[3];
const int out_channels = kernel_dims[4];
const int* rulebook_ptr = rulebook.data<int>();
const int rulebook_len = rulebook.dims()[1];
DenseTensorMeta in_features_meta(
x.dtype(), {rulebook_len, in_channels}, DataLayout::NCHW);
DenseTensorMeta d_x_features_meta(
x.dtype(), {rulebook_len, in_channels}, DataLayout::NCHW);
DenseTensorMeta out_grad_features_meta(
x.dtype(), {rulebook_len, out_channels}, DataLayout::NCHW);
phi::DenseTensor in_features =
phi::Empty(dev_ctx, std::move(in_features_meta));
phi::DenseTensor d_x_features =
phi::Empty(dev_ctx, std::move(d_x_features_meta));
phi::DenseTensor out_grad_features =
phi::Empty(dev_ctx, std::move(out_grad_features_meta));
dev_ctx.Alloc(
&in_features, in_features.dtype(), sizeof(T) * in_features.numel());
T* in_features_ptr = in_features.data<T>();
dev_ctx.Alloc(
&d_x_features, d_x_features.dtype(), sizeof(T) * d_x_features.numel());
T* d_x_features_ptr = d_x_features.data<T>();
dev_ctx.Alloc(&out_grad_features,
out_grad_features.dtype(),
sizeof(T) * out_grad_features.numel());
T* out_grad_features_ptr = out_grad_features.data<T>();
kernel_grad->Resize(kernel_dims);
dev_ctx.Alloc(
kernel_grad, kernel_grad->dtype(), kernel_grad->numel() * sizeof(T));
T* d_kernel_ptr = kernel_grad->data<T>();
Gather<T>(x.non_zero_elements().data<T>(),
rulebook_ptr + rulebook_len,
rulebook_len,
in_channels,
in_features_ptr);
Gather<T>(out_grad.non_zero_elements().data<T>(),
rulebook_ptr + rulebook_len * 2,
rulebook_len,
out_channels,
out_grad_features_ptr);
auto blas = phi::funcs::GetBlas<Context, T>(dev_ctx);
std::vector<int> offsets(kernel_size + 1), counter(kernel_size, 0);
for (int i = 0; i < rulebook_len; i++) {
counter[rulebook_ptr[i]] += 1;
}
int offset = 0;
for (int i = 0; i < kernel_size; i++) {
offsets[i] = offset;
offset += counter[i];
}
offsets[kernel_size] = offset;
const T* kernel_ptr = kernel.data<T>();
for (int i = 0; i < kernel_size; i++) {
if (counter[i] <= 0) {
continue;
}
const int M = counter[i];
const int K = in_channels;
const int N = out_channels;
T* tmp_in_ptr = in_features_ptr + offsets[i] * in_channels;
T* tmp_out_grad_ptr = out_grad_features_ptr + offsets[i] * out_channels;
const T* tmp_kernel_ptr = kernel_ptr + i * in_channels * out_channels;
T* tmp_d_x_ptr = d_x_features_ptr + offsets[i] * out_channels;
T* tmp_d_kernel_ptr = d_kernel_ptr + i * in_channels * out_channels;
// call gemm: d_kernel = transpose(x) * out_grad
// (in_channels, n) * (n, out_channels)
blas.GEMM(CblasTrans,
CblasNoTrans,
M,
N,
K,
static_cast<T>(1),
tmp_in_ptr,
tmp_out_grad_ptr,
static_cast<T>(0),
tmp_d_kernel_ptr);
// call gemm: d_x = out_grad * transpose(kernel)
// (n, out_channels) * (out_channels, in_channels)
blas.GEMM(CblasNoTrans,
CblasTrans,
M,
K,
N,
static_cast<T>(1),
tmp_out_grad_ptr,
tmp_kernel_ptr,
static_cast<T>(0),
tmp_d_x_ptr);
}
// 4. scatter
x_grad->Resize(x.non_zero_elements().dims());
dev_ctx.Alloc(x_grad, x_grad->dtype(), sizeof(T) * x_grad->numel());
T* x_grad_values_ptr = x_grad->data<T>();
memset(x_grad_values_ptr, 0, sizeof(T) * x_grad->numel());
Scatter<T>(d_x_features_ptr,
rulebook.data<int>() + rulebook_len,
rulebook_len,
in_channels,
x_grad_values_ptr);
}
} // namespace sparse
} // namespace phi
PD_REGISTER_KERNEL(sparse_conv_grad,
CPU,
ALL_LAYOUT,
phi::sparse::Conv3dGradKernel,
float,
double) {
kernel->InputAt(0).SetDataLayout(phi::DataLayout::SPARSE_COO);
kernel->InputAt(3).SetDataLayout(phi::DataLayout::SPARSE_COO);
}
......@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/phi/common/place.h"
#include "paddle/phi/kernels/copy_kernel.h"
#include "paddle/phi/kernels/sparse/convolution_grad_kernel.h"
#include "paddle/phi/kernels/sparse/convolution_kernel.h"
#include "paddle/fluid/memory/allocation/allocator_facade.h"
......@@ -59,7 +60,10 @@ void TestConv3dBase(const std::vector<int>& indices,
const std::vector<int>& paddings,
const std::vector<int>& strides,
const std::vector<int>& dilations,
const float diff = 1e-3) {
const float diff = 1e-3,
const bool backward = false,
const std::vector<T> features_grad = {},
const std::vector<T> kernel_grad = {}) {
phi::CPUContext dev_ctx_cpu;
dev_ctx_cpu.SetAllocator(
paddle::memory::allocation::AllocatorFacade::Instance()
......@@ -122,10 +126,29 @@ void TestConv3dBase(const std::vector<int>& indices,
correct_out_indices.size() * sizeof(int));
ASSERT_EQ(cmp_indices, 0);
for (uint64_t i = 0; i < correct_out_features.size(); i++) {
float tmp = std::fabs(static_cast<float>(
correct_out_features[i] - out.non_zero_elements().data<T>()[i]));
ASSERT_LT(tmp, diff);
auto f_verify = [&](const T* real_data,
const std::vector<T>& correct_data) {
for (uint64_t i = 0; i < correct_data.size(); i++) {
float tmp =
std::fabs(static_cast<float>(correct_data[i] - real_data[i]));
ASSERT_LT(tmp, diff);
}
};
f_verify(out.non_zero_elements().data<T>(), correct_out_features);
if (backward) {
std::vector<DenseTensor> grads = sparse::Conv3dGrad<T>(dev_ctx_cpu,
x_tensor,
rulebook,
kernel_tensor,
out,
paddings,
dilations,
strides,
1);
f_verify(grads[0].data<T>(), features_grad);
f_verify(grads[1].data<T>(), kernel_grad);
}
}
}
......@@ -141,7 +164,11 @@ void TestConv3d(const std::vector<int>& indices,
const int non_zero_num,
const std::vector<int>& paddings,
const std::vector<int>& strides,
const std::vector<int>& dilations) {
const std::vector<int>& dilations,
const float diff = 1e-3,
const bool backward = false,
const std::vector<float> features_grad = {},
const std::vector<float> kernel_grad = {}) {
// test float
TestConv3dBase<float>(indices,
features,
......@@ -154,7 +181,11 @@ void TestConv3d(const std::vector<int>& indices,
non_zero_num,
paddings,
strides,
dilations);
dilations,
diff,
backward,
features_grad,
kernel_grad);
// test double
TestConv3dBase<double>(indices,
cast<float, double>(features),
......@@ -167,7 +198,11 @@ void TestConv3d(const std::vector<int>& indices,
non_zero_num,
paddings,
strides,
dilations);
dilations,
diff,
backward,
cast<float, double>(features_grad),
cast<float, double>(kernel_grad));
}
TEST(DEV_API, sparse_conv3d) {
......@@ -467,5 +502,66 @@ TEST(DEV_API, sparse_conv2d) {
dilations);
}
TEST(DEV_API, sparse_conv3d_backward) {
const int in_channels = 1;
const int out_channels = 1;
DDim x_dims = {1, 4, 4, 4, in_channels};
DDim kernel_dims = {3, 3, 3, in_channels, out_channels};
DDim out_dims = {1, 2, 2, 2, out_channels};
std::vector<int> paddings = {0, 0, 0};
std::vector<int> strides = {1, 1, 1};
std::vector<int> dilations = {1, 1, 1};
const int non_zero_num = 2;
std::vector<int> indices_flatten = {0, 0, 0, 2, 3, 2, 3, 2};
std::vector<float> features = {-0.28833008, 0.0287323};
// 3*3*3=27
std::vector<float> kernel = {
0.64306641, 0.45043945, 0.47216797, 0.22924805, 0.97509766, 0.86181641,
0.57861328, 0.91796875, 0.87255859, 0.16589355, 0.44555664, 0.01889038,
0.46459961, 0.44726562, 0.19909668, 0.89697266, 0.37158203, 0.00513077,
0.69628906, 0.26904297, 0.74707031, 0.54003906, 0.5390625, 0.07958984,
0.47338867, 0.90966797, 0.17126465};
std::vector<int> out_indices_flatten = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0,
1, 1, 0, 1, 0, 1, 0, 1, 0, 1};
std::vector<float> out_features = {4.9200e-03,
2.6140e-02,
2.2900e-03,
-2.3596e-01,
1.5000e-04,
1.0670e-02,
5.7200e-03,
1.2850e-02};
std::vector<float> features_grad = {-0.20593, -0.09149};
std::vector<float> kernel_grad = {
0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00,
0.000e+00, 0.000e+00, 6.805e-02, 0.000e+00, 0.000e+00, 0.000e+00,
0.000e+00, 3.700e-04, 1.600e-04, 0.000e+00, 3.100e-04, 0.000e+00,
0.000e+00, 0.000e+00, 0.000e+00, 0.000e+00, -6.780e-03, 7.000e-05,
0.000e+00, 7.500e-04, 1.400e-04};
TestConv3d(indices_flatten,
features,
x_dims,
kernel,
kernel_dims,
out_indices_flatten,
out_features,
out_dims,
non_zero_num,
paddings,
strides,
dilations,
1e-3,
true,
features_grad,
kernel_grad);
}
} // namespace tests
} // namespace phi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册