未验证 提交 2f5fb031 编写于 作者: Z zhangkaihuo 提交者: GitHub

Restructure sparse conv (#40570)

restructure conv
上级 3898080e
......@@ -93,7 +93,7 @@ inline HOSTDEVICE void IndexToPoint(
}
inline void GetOutShape(const DDim& x_dims,
const DDim& kernel_dims,
const std::vector<int>& kernel_sizes,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
......@@ -102,17 +102,17 @@ inline void GetOutShape(const DDim& x_dims,
x_dims.size(),
5,
phi::errors::InvalidArgument("the shape of x should be (N, D, H, W, C)"));
PADDLE_ENFORCE_EQ(kernel_dims.size(),
PADDLE_ENFORCE_EQ(kernel_sizes.size(),
5,
phi::errors::InvalidArgument(
"the shape of kernel should be (D, H, W, C, OC)"));
// infer out shape
(*out_dims)[0] = x_dims[0];
(*out_dims)[4] = kernel_dims[4];
(*out_dims)[4] = kernel_sizes[4];
for (int i = 1; i < 4; i++) {
(*out_dims)[i] = (x_dims[i] + 2 * paddings[i - 1] -
dilations[i - 1] * (kernel_dims[i - 1] - 1) - 1) /
dilations[i - 1] * (kernel_sizes[i - 1] - 1) - 1) /
strides[i - 1] +
1;
}
......@@ -131,7 +131,7 @@ template <typename T, typename Context>
inline void SubmPreProcess(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const DenseTensor& out_grad,
const int in_channels,
const int out_channels,
const int half_kernel_size,
......@@ -142,11 +142,11 @@ inline void SubmPreProcess(const Context& dev_ctx,
blas.GEMM(CblasTrans,
CblasNoTrans,
x.non_zero_elements().dims()[1],
out_grad.non_zero_elements().dims()[1],
out_grad.dims()[1],
x.non_zero_elements().dims()[0],
static_cast<T>(1),
x.non_zero_elements().data<T>(),
out_grad.non_zero_elements().data<T>(),
out_grad.data<T>(),
static_cast<T>(0),
d_kernel_ptr + half_kernel_size * in_channels * out_channels);
......@@ -155,11 +155,11 @@ inline void SubmPreProcess(const Context& dev_ctx,
T* x_grad_ptr = x_grad->data<T>();
blas.GEMM(CblasNoTrans,
CblasTrans,
out_grad.non_zero_elements().dims()[0],
out_grad.dims()[0],
in_channels,
out_grad.non_zero_elements().dims()[1],
out_grad.dims()[1],
static_cast<T>(1),
out_grad.non_zero_elements().data<T>(),
out_grad.data<T>(),
kernel.data<T>() + half_kernel_size * in_channels * out_channels,
static_cast<T>(0),
x_grad_ptr);
......
......@@ -27,7 +27,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const DenseTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
......@@ -41,7 +41,7 @@ std::vector<DenseTensor> Conv3dGrad(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const DenseTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
......
......@@ -34,7 +34,7 @@ using Dims4D = phi::funcs::sparse::Dims4D;
template <typename T, typename Context>
void ProductRuleBook(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& kernel,
const std::vector<int>& kernel_sizes,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
......@@ -42,19 +42,19 @@ void ProductRuleBook(const Context& dev_ctx,
const bool subm,
DenseTensor* rulebook,
DenseTensor* counter_per_kernel) {
const auto& kernel_dims = kernel.dims();
const int64_t non_zero_num = x.nnz();
const auto& non_zero_indices = x.non_zero_indices();
const int* indices_ptr = non_zero_indices.data<int>();
int* counter_ptr = counter_per_kernel->data<int>();
int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
memset(counter_ptr, 0, kernel_size * sizeof(int));
int rulebook_len = 0;
// calc the rulebook_len
const auto& x_dims = x.dims();
const Dims4D c_x_dims(x_dims[0], x_dims[3], x_dims[2], x_dims[1]);
const Dims4D c_kernel_dims(1, kernel_dims[2], kernel_dims[1], kernel_dims[0]);
const Dims4D c_kernel_dims(
1, kernel_sizes[2], kernel_sizes[1], kernel_sizes[0]);
const Dims4D c_out_dims(out_dims[0], out_dims[3], out_dims[2], out_dims[1]);
const Dims4D c_paddings(1, paddings[2], paddings[1], paddings[0]);
const Dims4D c_strides(1, strides[2], strides[1], strides[0]);
......@@ -75,9 +75,9 @@ void ProductRuleBook(const Context& dev_ctx,
auto f_calc_rulebook = [&](int* rulebook_ptr) {
int kernel_index = 0, rulebook_index = 0;
for (int kz = 0; kz < kernel_dims[0]; kz++) {
for (int ky = 0; ky < kernel_dims[1]; ky++) {
for (int kx = 0; kx < kernel_dims[2]; kx++) {
for (int kz = 0; kz < kernel_sizes[0]; kz++) {
for (int ky = 0; ky < kernel_sizes[1]; ky++) {
for (int kx = 0; kx < kernel_sizes[2]; kx++) {
++kernel_index;
for (int64_t i = 0; i < non_zero_num; i++) {
int batch = indices_ptr[i];
......
......@@ -33,7 +33,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const DenseTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
......@@ -113,7 +113,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
rulebook_len,
in_channels,
in_features_ptr);
Gather<T>(out_grad.non_zero_elements().data<T>(),
Gather<T>(out_grad.data<T>(),
rulebook_ptr + rulebook_len * 2,
rulebook_len,
out_channels,
......
......@@ -44,8 +44,13 @@ void Conv3dKernel(const Context& dev_ctx,
const auto& kernel_dims = kernel.dims();
int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
DDim out_dims = {1, 1, 1, 1, 1};
std::vector<int> kernel_sizes(kernel_dims.size());
for (int i = 0; i < kernel_dims.size(); i++) {
kernel_sizes[i] = kernel_dims[i];
}
phi::funcs::sparse::GetOutShape(
x_dims, kernel_dims, paddings, dilations, strides, &out_dims);
x_dims, kernel_sizes, paddings, dilations, strides, &out_dims);
const int in_channels = kernel_dims[3];
const int out_channels = kernel_dims[4];
......@@ -63,7 +68,7 @@ void Conv3dKernel(const Context& dev_ctx,
ProductRuleBook<T, Context>(dev_ctx,
x,
kernel,
kernel_sizes,
subm_paddings,
dilations,
subm_strides,
......
......@@ -38,7 +38,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
const SparseCooTensor& x,
const DenseTensor& rulebook,
const DenseTensor& kernel,
const SparseCooTensor& out_grad,
const DenseTensor& out_grad,
const std::vector<int>& paddings,
const std::vector<int>& dilations,
const std::vector<int>& strides,
......@@ -140,8 +140,7 @@ void Conv3dGradKernel(const Context& dev_ctx,
GatherKernel<T, int><<<config.block_per_grid.x,
config.thread_per_block.x,
0,
dev_ctx.stream()>>>(
out_grad.non_zero_elements().data<T>(),
dev_ctx.stream()>>>(out_grad.data<T>(),
rulebook_ptr + rulebook_len * 2,
out_grad_features_ptr,
rulebook_len,
......
......@@ -132,11 +132,12 @@ void TestConv3dBase(const std::vector<int>& indices,
f_verify(out.non_zero_elements().data<T>(), correct_out_features);
if (backward) {
std::vector<DenseTensor> grads = sparse::Conv3dGrad<T>(dev_ctx_cpu,
std::vector<DenseTensor> grads =
sparse::Conv3dGrad<T>(dev_ctx_cpu,
x_tensor,
rulebook,
kernel_tensor,
out,
out.non_zero_elements(),
paddings,
dilations,
strides,
......@@ -231,11 +232,12 @@ void TestConv3dBase(const std::vector<int>& indices,
f_verify(h_features_tensor.data<T>(), correct_out_features);
if (backward) {
std::vector<DenseTensor> grads = sparse::Conv3dGrad<T>(dev_ctx_gpu,
std::vector<DenseTensor> grads =
sparse::Conv3dGrad<T>(dev_ctx_gpu,
d_x_tensor,
d_rulebook,
d_kernel_tensor,
d_out,
d_out.non_zero_elements(),
paddings,
dilations,
strides,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册