未验证 提交 2f5fb031 编写于 作者: Z zhangkaihuo 提交者: GitHub

Restructure sparse conv (#40570)

restructure conv
上级 3898080e
...@@ -93,7 +93,7 @@ inline HOSTDEVICE void IndexToPoint( ...@@ -93,7 +93,7 @@ inline HOSTDEVICE void IndexToPoint(
} }
inline void GetOutShape(const DDim& x_dims, inline void GetOutShape(const DDim& x_dims,
const DDim& kernel_dims, const std::vector<int>& kernel_sizes,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -102,17 +102,17 @@ inline void GetOutShape(const DDim& x_dims, ...@@ -102,17 +102,17 @@ inline void GetOutShape(const DDim& x_dims,
x_dims.size(), x_dims.size(),
5, 5,
phi::errors::InvalidArgument("the shape of x should be (N, D, H, W, C)")); phi::errors::InvalidArgument("the shape of x should be (N, D, H, W, C)"));
PADDLE_ENFORCE_EQ(kernel_dims.size(), PADDLE_ENFORCE_EQ(kernel_sizes.size(),
5, 5,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
"the shape of kernel should be (D, H, W, C, OC)")); "the shape of kernel should be (D, H, W, C, OC)"));
// infer out shape // infer out shape
(*out_dims)[0] = x_dims[0]; (*out_dims)[0] = x_dims[0];
(*out_dims)[4] = kernel_dims[4]; (*out_dims)[4] = kernel_sizes[4];
for (int i = 1; i < 4; i++) { for (int i = 1; i < 4; i++) {
(*out_dims)[i] = (x_dims[i] + 2 * paddings[i - 1] - (*out_dims)[i] = (x_dims[i] + 2 * paddings[i - 1] -
dilations[i - 1] * (kernel_dims[i - 1] - 1) - 1) / dilations[i - 1] * (kernel_sizes[i - 1] - 1) - 1) /
strides[i - 1] + strides[i - 1] +
1; 1;
} }
...@@ -131,7 +131,7 @@ template <typename T, typename Context> ...@@ -131,7 +131,7 @@ template <typename T, typename Context>
inline void SubmPreProcess(const Context& dev_ctx, inline void SubmPreProcess(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& kernel, const DenseTensor& kernel,
const SparseCooTensor& out_grad, const DenseTensor& out_grad,
const int in_channels, const int in_channels,
const int out_channels, const int out_channels,
const int half_kernel_size, const int half_kernel_size,
...@@ -142,11 +142,11 @@ inline void SubmPreProcess(const Context& dev_ctx, ...@@ -142,11 +142,11 @@ inline void SubmPreProcess(const Context& dev_ctx,
blas.GEMM(CblasTrans, blas.GEMM(CblasTrans,
CblasNoTrans, CblasNoTrans,
x.non_zero_elements().dims()[1], x.non_zero_elements().dims()[1],
out_grad.non_zero_elements().dims()[1], out_grad.dims()[1],
x.non_zero_elements().dims()[0], x.non_zero_elements().dims()[0],
static_cast<T>(1), static_cast<T>(1),
x.non_zero_elements().data<T>(), x.non_zero_elements().data<T>(),
out_grad.non_zero_elements().data<T>(), out_grad.data<T>(),
static_cast<T>(0), static_cast<T>(0),
d_kernel_ptr + half_kernel_size * in_channels * out_channels); d_kernel_ptr + half_kernel_size * in_channels * out_channels);
...@@ -155,11 +155,11 @@ inline void SubmPreProcess(const Context& dev_ctx, ...@@ -155,11 +155,11 @@ inline void SubmPreProcess(const Context& dev_ctx,
T* x_grad_ptr = x_grad->data<T>(); T* x_grad_ptr = x_grad->data<T>();
blas.GEMM(CblasNoTrans, blas.GEMM(CblasNoTrans,
CblasTrans, CblasTrans,
out_grad.non_zero_elements().dims()[0], out_grad.dims()[0],
in_channels, in_channels,
out_grad.non_zero_elements().dims()[1], out_grad.dims()[1],
static_cast<T>(1), static_cast<T>(1),
out_grad.non_zero_elements().data<T>(), out_grad.data<T>(),
kernel.data<T>() + half_kernel_size * in_channels * out_channels, kernel.data<T>() + half_kernel_size * in_channels * out_channels,
static_cast<T>(0), static_cast<T>(0),
x_grad_ptr); x_grad_ptr);
......
...@@ -27,7 +27,7 @@ void Conv3dGradKernel(const Context& dev_ctx, ...@@ -27,7 +27,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& rulebook, const DenseTensor& rulebook,
const DenseTensor& kernel, const DenseTensor& kernel,
const SparseCooTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -41,7 +41,7 @@ std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx, ...@@ -41,7 +41,7 @@ std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& rulebook, const DenseTensor& rulebook,
const DenseTensor& kernel, const DenseTensor& kernel,
const SparseCooTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
......
...@@ -34,7 +34,7 @@ using Dims4D = phi::funcs::sparse::Dims4D; ...@@ -34,7 +34,7 @@ using Dims4D = phi::funcs::sparse::Dims4D;
template <typename T, typename Context> template <typename T, typename Context>
void ProductRuleBook(const Context& dev_ctx, void ProductRuleBook(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& kernel, const std::vector<int>& kernel_sizes,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -42,19 +42,19 @@ void ProductRuleBook(const Context& dev_ctx, ...@@ -42,19 +42,19 @@ void ProductRuleBook(const Context& dev_ctx,
const bool subm, const bool subm,
DenseTensor* rulebook, DenseTensor* rulebook,
DenseTensor* counter_per_kernel) { DenseTensor* counter_per_kernel) {
const auto& kernel_dims = kernel.dims();
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices(); const auto& non_zero_indices = x.non_zero_indices();
const int* indices_ptr = non_zero_indices.data<int>(); const int* indices_ptr = non_zero_indices.data<int>();
int* counter_ptr = counter_per_kernel->data<int>(); int* counter_ptr = counter_per_kernel->data<int>();
int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
memset(counter_ptr, 0, kernel_size * sizeof(int)); memset(counter_ptr, 0, kernel_size * sizeof(int));
int rulebook_len = 0; int rulebook_len = 0;
// calc the rulebook_len // calc the rulebook_len
const auto& x_dims = x.dims(); const auto& x_dims = x.dims();
const Dims4D c_x_dims(x_dims[0], x_dims[3], x_dims[2], x_dims[1]); const Dims4D c_x_dims(x_dims[0], x_dims[3], x_dims[2], x_dims[1]);
const Dims4D c_kernel_dims(1, kernel_dims[2], kernel_dims[1], kernel_dims[0]); const Dims4D c_kernel_dims(
1, kernel_sizes[2], kernel_sizes[1], kernel_sizes[0]);
const Dims4D c_out_dims(out_dims[0], out_dims[3], out_dims[2], out_dims[1]); const Dims4D c_out_dims(out_dims[0], out_dims[3], out_dims[2], out_dims[1]);
const Dims4D c_paddings(1, paddings[2], paddings[1], paddings[0]); const Dims4D c_paddings(1, paddings[2], paddings[1], paddings[0]);
const Dims4D c_strides(1, strides[2], strides[1], strides[0]); const Dims4D c_strides(1, strides[2], strides[1], strides[0]);
...@@ -75,9 +75,9 @@ void ProductRuleBook(const Context& dev_ctx, ...@@ -75,9 +75,9 @@ void ProductRuleBook(const Context& dev_ctx,
auto f_calc_rulebook = [&](int* rulebook_ptr) { auto f_calc_rulebook = [&](int* rulebook_ptr) {
int kernel_index = 0, rulebook_index = 0; int kernel_index = 0, rulebook_index = 0;
for (int kz = 0; kz < kernel_dims[0]; kz++) { for (int kz = 0; kz < kernel_sizes[0]; kz++) {
for (int ky = 0; ky < kernel_dims[1]; ky++) { for (int ky = 0; ky < kernel_sizes[1]; ky++) {
for (int kx = 0; kx < kernel_dims[2]; kx++) { for (int kx = 0; kx < kernel_sizes[2]; kx++) {
++kernel_index; ++kernel_index;
for (int64_t i = 0; i < non_zero_num; i++) { for (int64_t i = 0; i < non_zero_num; i++) {
int batch = indices_ptr[i]; int batch = indices_ptr[i];
......
...@@ -33,7 +33,7 @@ void Conv3dGradKernel(const Context& dev_ctx, ...@@ -33,7 +33,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& rulebook, const DenseTensor& rulebook,
const DenseTensor& kernel, const DenseTensor& kernel,
const SparseCooTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -113,7 +113,7 @@ void Conv3dGradKernel(const Context& dev_ctx, ...@@ -113,7 +113,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
rulebook_len, rulebook_len,
in_channels, in_channels,
in_features_ptr); in_features_ptr);
Gather<T>(out_grad.non_zero_elements().data<T>(), Gather<T>(out_grad.data<T>(),
rulebook_ptr + rulebook_len * 2, rulebook_ptr + rulebook_len * 2,
rulebook_len, rulebook_len,
out_channels, out_channels,
......
...@@ -44,8 +44,13 @@ void Conv3dKernel(const Context& dev_ctx, ...@@ -44,8 +44,13 @@ void Conv3dKernel(const Context& dev_ctx,
const auto& kernel_dims = kernel.dims(); const auto& kernel_dims = kernel.dims();
int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
DDim out_dims = {1, 1, 1, 1, 1}; DDim out_dims = {1, 1, 1, 1, 1};
std::vector<int> kernel_sizes(kernel_dims.size());
for (int i = 0; i < kernel_dims.size(); i++) {
kernel_sizes[i] = kernel_dims[i];
}
phi::funcs::sparse::GetOutShape( phi::funcs::sparse::GetOutShape(
x_dims, kernel_dims, paddings, dilations, strides, &out_dims); x_dims, kernel_sizes, paddings, dilations, strides, &out_dims);
const int in_channels = kernel_dims[3]; const int in_channels = kernel_dims[3];
const int out_channels = kernel_dims[4]; const int out_channels = kernel_dims[4];
...@@ -63,7 +68,7 @@ void Conv3dKernel(const Context& dev_ctx, ...@@ -63,7 +68,7 @@ void Conv3dKernel(const Context& dev_ctx,
ProductRuleBook<T, Context>(dev_ctx, ProductRuleBook<T, Context>(dev_ctx,
x, x,
kernel, kernel_sizes,
subm_paddings, subm_paddings,
dilations, dilations,
subm_strides, subm_strides,
......
...@@ -38,7 +38,7 @@ void Conv3dGradKernel(const Context& dev_ctx, ...@@ -38,7 +38,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x, const SparseCooTensor& x,
const DenseTensor& rulebook, const DenseTensor& rulebook,
const DenseTensor& kernel, const DenseTensor& kernel,
const SparseCooTensor& out_grad, const DenseTensor& out_grad,
const std::vector<int>& paddings, const std::vector<int>& paddings,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
...@@ -140,12 +140,11 @@ void Conv3dGradKernel(const Context& dev_ctx, ...@@ -140,12 +140,11 @@ void Conv3dGradKernel(const Context& dev_ctx,
GatherKernel<T, int><<<config.block_per_grid.x, GatherKernel<T, int><<<config.block_per_grid.x,
config.thread_per_block.x, config.thread_per_block.x,
0, 0,
dev_ctx.stream()>>>( dev_ctx.stream()>>>(out_grad.data<T>(),
out_grad.non_zero_elements().data<T>(), rulebook_ptr + rulebook_len * 2,
rulebook_ptr + rulebook_len * 2, out_grad_features_ptr,
out_grad_features_ptr, rulebook_len,
rulebook_len, out_channels);
out_channels);
const T* kernel_ptr = kernel.data<T>(); const T* kernel_ptr = kernel.data<T>();
for (int i = 0; i < kernel_size; i++) { for (int i = 0; i < kernel_size; i++) {
......
...@@ -132,16 +132,17 @@ void TestConv3dBase(const std::vector<int>& indices, ...@@ -132,16 +132,17 @@ void TestConv3dBase(const std::vector<int>& indices,
f_verify(out.non_zero_elements().data<T>(), correct_out_features); f_verify(out.non_zero_elements().data<T>(), correct_out_features);
if (backward) { if (backward) {
std::vector<DenseTensor> grads = sparse::Conv3dGrad<T>(dev_ctx_cpu, std::vector<DenseTensor> grads =
x_tensor, sparse::Conv3dGrad<T>(dev_ctx_cpu,
rulebook, x_tensor,
kernel_tensor, rulebook,
out, kernel_tensor,
paddings, out.non_zero_elements(),
dilations, paddings,
strides, dilations,
1, strides,
subm); 1,
subm);
f_verify(grads[0].data<T>(), features_grad); f_verify(grads[0].data<T>(), features_grad);
f_verify(grads[1].data<T>(), kernel_grad); f_verify(grads[1].data<T>(), kernel_grad);
} }
...@@ -231,16 +232,17 @@ void TestConv3dBase(const std::vector<int>& indices, ...@@ -231,16 +232,17 @@ void TestConv3dBase(const std::vector<int>& indices,
f_verify(h_features_tensor.data<T>(), correct_out_features); f_verify(h_features_tensor.data<T>(), correct_out_features);
if (backward) { if (backward) {
std::vector<DenseTensor> grads = sparse::Conv3dGrad<T>(dev_ctx_gpu, std::vector<DenseTensor> grads =
d_x_tensor, sparse::Conv3dGrad<T>(dev_ctx_gpu,
d_rulebook, d_x_tensor,
d_kernel_tensor, d_rulebook,
d_out, d_kernel_tensor,
paddings, d_out.non_zero_elements(),
dilations, paddings,
strides, dilations,
1, strides,
subm); 1,
subm);
DenseTensor h_features_grad = phi::Empty( DenseTensor h_features_grad = phi::Empty(
dev_ctx_cpu, dev_ctx_cpu,
DenseTensorMeta(grads[0].dtype(), grads[0].dims(), grads[0].layout())); DenseTensorMeta(grads[0].dtype(), grads[0].dims(), grads[0].layout()));
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册