未验证 提交 3e3f5d90 编写于 作者: L LUZY0726 提交者: GitHub

[sparse] Add backend conv2d support (#54707)

上级 e05d5c0a
...@@ -23,10 +23,31 @@ inline void GetOutShape(const DDim& x_dims, ...@@ -23,10 +23,31 @@ inline void GetOutShape(const DDim& x_dims,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
DDim* out_dims) { DDim* out_dims) {
const bool is2D = out_dims->size() == 4 ? true : false;
if (is2D) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x_dims.size(), x_dims.size(),
4,
phi::errors::InvalidArgument("the shape of x should be (N, H, W, C)"));
PADDLE_ENFORCE_EQ(kernel_sizes.size(),
4,
phi::errors::InvalidArgument(
"the shape of kernel should be (H, W, C, OC)"));
// infer out shape
(*out_dims)[0] = x_dims[0];
(*out_dims)[3] = kernel_sizes[3];
for (int i = 1; i < 3; i++) {
(*out_dims)[i] = (x_dims[i] + 2 * paddings[i - 1] -
dilations[i - 1] * (kernel_sizes[i - 1] - 1) - 1) /
strides[i - 1] +
1;
}
} else {
PADDLE_ENFORCE_EQ(x_dims.size(),
5, 5,
phi::errors::InvalidArgument("the shape of x should be (N, D, H, W, C)")); phi::errors::InvalidArgument(
"the shape of x should be (N, D, H, W, C)"));
PADDLE_ENFORCE_EQ(kernel_sizes.size(), PADDLE_ENFORCE_EQ(kernel_sizes.size(),
5, 5,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
...@@ -41,6 +62,7 @@ inline void GetOutShape(const DDim& x_dims, ...@@ -41,6 +62,7 @@ inline void GetOutShape(const DDim& x_dims,
strides[i - 1] + strides[i - 1] +
1; 1;
} }
}
} }
inline void ResetSubmKernelSizeAndStrides(const DDim& kernel_dims, inline void ResetSubmKernelSizeAndStrides(const DDim& kernel_dims,
...@@ -64,8 +86,12 @@ void Conv3dInferMeta(const MetaTensor& x, ...@@ -64,8 +86,12 @@ void Conv3dInferMeta(const MetaTensor& x,
MetaTensor* rulebook, MetaTensor* rulebook,
MetaTensor* counter) { MetaTensor* counter) {
const auto& x_dims = x.dims(); const auto& x_dims = x.dims();
const bool is2D = x_dims.size() == 4 ? true : false;
const auto& kernel_dims = kernel.dims(); const auto& kernel_dims = kernel.dims();
DDim out_dims = {1, 1, 1, 1, 1};
int rank = is2D ? 4 : 5;
std::vector<int> out_dims_vec(rank, 1);
DDim out_dims = make_ddim(out_dims_vec);
std::vector<int> kernel_sizes(kernel_dims.size()); std::vector<int> kernel_sizes(kernel_dims.size());
for (int i = 0; i < kernel_dims.size(); i++) { for (int i = 0; i < kernel_dims.size(); i++) {
......
...@@ -101,10 +101,31 @@ inline void GetOutShape(const DDim& x_dims, ...@@ -101,10 +101,31 @@ inline void GetOutShape(const DDim& x_dims,
const std::vector<int>& dilations, const std::vector<int>& dilations,
const std::vector<int>& strides, const std::vector<int>& strides,
DDim* out_dims) { DDim* out_dims) {
const bool is2D = out_dims->size() == 4 ? true : false;
if (is2D) {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
x_dims.size(), x_dims.size(),
4,
phi::errors::InvalidArgument("the shape of x should be (N, H, W, C)"));
PADDLE_ENFORCE_EQ(kernel_sizes.size(),
4,
phi::errors::InvalidArgument(
"the shape of kernel should be (H, W, C, OC)"));
// infer out shape
(*out_dims)[0] = x_dims[0];
(*out_dims)[3] = kernel_sizes[3];
for (int i = 1; i < 3; i++) {
(*out_dims)[i] = (x_dims[i] + 2 * paddings[i - 1] -
dilations[i - 1] * (kernel_sizes[i - 1] - 1) - 1) /
strides[i - 1] +
1;
}
} else {
PADDLE_ENFORCE_EQ(x_dims.size(),
5, 5,
phi::errors::InvalidArgument("the shape of x should be (N, D, H, W, C)")); phi::errors::InvalidArgument(
"the shape of x should be (N, D, H, W, C)"));
PADDLE_ENFORCE_EQ(kernel_sizes.size(), PADDLE_ENFORCE_EQ(kernel_sizes.size(),
5, 5,
phi::errors::InvalidArgument( phi::errors::InvalidArgument(
...@@ -119,6 +140,7 @@ inline void GetOutShape(const DDim& x_dims, ...@@ -119,6 +140,7 @@ inline void GetOutShape(const DDim& x_dims,
strides[i - 1] + strides[i - 1] +
1; 1;
} }
}
} }
inline void ResetSubmKernelSizeAndStrides(const DDim& kernel_dims, inline void ResetSubmKernelSizeAndStrides(const DDim& kernel_dims,
......
...@@ -63,6 +63,5 @@ SparseCooTensor Conv3dCoo(const Context& dev_ctx, ...@@ -63,6 +63,5 @@ SparseCooTensor Conv3dCoo(const Context& dev_ctx,
counter); counter);
return coo; return coo;
} }
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
...@@ -42,50 +42,101 @@ void ProductRuleBook(const Context& dev_ctx, ...@@ -42,50 +42,101 @@ void ProductRuleBook(const Context& dev_ctx,
const bool subm, const bool subm,
DenseTensor* rulebook, DenseTensor* rulebook,
int* counter_per_kernel) { int* counter_per_kernel) {
const bool is2D = out_dims.size() == 4 ? true : false;
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
const auto& indices = x.indices(); const auto& indices = x.indices();
const IntT* indices_ptr = indices.data<IntT>(); const IntT* indices_ptr = indices.data<IntT>();
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2]; int kernel_size = is2D ? kernel_sizes[0] * kernel_sizes[1]
: kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
memset(counter_per_kernel, 0, kernel_size * sizeof(int)); memset(counter_per_kernel, 0, kernel_size * sizeof(int));
int rulebook_len = 0; int rulebook_len = 0;
// calc the rulebook_len // calc the rulebook_len
const auto& x_dims = x.dims(); const auto& x_dims = x.dims();
const Dims4D c_x_dims(x_dims[0], x_dims[3], x_dims[2], x_dims[1]);
const Dims4D c_kernel_dims( int xdim0, xdim1, xdim2, xdim3;
1, kernel_sizes[2], kernel_sizes[1], kernel_sizes[0]); int kdim0, kdim1, kdim2, kdim3;
const Dims4D c_out_dims(out_dims[0], out_dims[3], out_dims[2], out_dims[1]); int odim0, odim1, odim2, odim3;
const Dims4D c_paddings(1, paddings[2], paddings[1], paddings[0]); int pdim0, pdim1, pdim2, pdim3;
const Dims4D c_strides(1, strides[2], strides[1], strides[0]); int sdim0, sdim1, sdim2, sdim3;
const Dims4D c_dilations(1, dilations[2], dilations[1], dilations[0]); int ddim0, ddim1, ddim2, ddim3;
xdim0 = x_dims[0];
xdim1 = is2D ? x_dims[2] : x_dims[3];
xdim2 = is2D ? x_dims[1] : x_dims[2];
xdim3 = is2D ? 1 : x_dims[1];
kdim0 = 1;
kdim1 = is2D ? kernel_sizes[1] : kernel_sizes[2];
kdim2 = is2D ? kernel_sizes[0] : kernel_sizes[1];
kdim3 = is2D ? 1 : kernel_sizes[0];
odim0 = out_dims[0];
odim1 = is2D ? out_dims[2] : out_dims[3];
odim2 = is2D ? out_dims[1] : out_dims[2];
odim3 = is2D ? 1 : out_dims[1];
pdim0 = 1;
pdim1 = is2D ? paddings[1] : paddings[2];
pdim2 = is2D ? paddings[0] : paddings[1];
pdim3 = is2D ? 1 : paddings[0];
sdim0 = 1;
sdim1 = is2D ? strides[1] : strides[2];
sdim2 = is2D ? strides[0] : strides[1];
sdim3 = is2D ? 1 : strides[0];
ddim0 = 1;
ddim1 = is2D ? dilations[1] : dilations[2];
ddim2 = is2D ? dilations[0] : dilations[1];
ddim3 = is2D ? 1 : dilations[0];
const Dims4D c_x_dims(xdim0, xdim1, xdim2, xdim3);
const Dims4D c_kernel_dims(kdim0, kdim1, kdim2, kdim3);
const Dims4D c_out_dims(odim0, odim1, odim2, odim3);
const Dims4D c_paddings(pdim0, pdim1, pdim2, pdim3);
const Dims4D c_strides(sdim0, sdim1, sdim2, sdim3);
const Dims4D c_dilations(ddim0, ddim1, ddim2, ddim3);
std::set<IntT> hash_in; std::set<IntT> hash_in;
if (subm) { if (subm) {
for (int i = 0; i < non_zero_num; i++) { for (int i = 0; i < non_zero_num; i++) {
IntT batch = indices_ptr[i]; IntT batch = indices_ptr[i];
IntT in_z = indices_ptr[i + non_zero_num]; IntT in_z = is2D ? 0 : indices_ptr[i + non_zero_num];
IntT in_y = indices_ptr[i + 2 * non_zero_num]; IntT in_y = is2D ? indices_ptr[i + non_zero_num]
IntT in_x = indices_ptr[i + 3 * non_zero_num]; : indices_ptr[i + 2 * non_zero_num];
IntT index = phi::funcs::sparse::PointToIndex<DDim>( IntT in_x = is2D ? indices_ptr[i + 2 * non_zero_num]
batch, in_x, in_y, in_z, x_dims); : indices_ptr[i + 3 * non_zero_num];
IntT index = phi::funcs::sparse::PointToIndex<Dims4D>(
batch, in_x, in_y, in_z, c_x_dims);
hash_in.insert(index); hash_in.insert(index);
} }
} }
auto f_calc_rulebook = [&](IntT* rulebook_ptr) { auto f_calc_rulebook = [&](IntT* rulebook_ptr) {
int kernel_index = 0, rulebook_index = 0; int kernel_index = 0, rulebook_index = 0;
for (int kz = 0; kz < kernel_sizes[0]; kz++) { int zceil = is2D ? 1 : kernel_sizes[0];
for (int ky = 0; ky < kernel_sizes[1]; ky++) { int yceil = is2D ? kernel_sizes[0] : kernel_sizes[1];
for (int kx = 0; kx < kernel_sizes[2]; kx++) { int xceil = is2D ? kernel_sizes[1] : kernel_sizes[2];
for (int kz = 0; kz < zceil; kz++) {
for (int ky = 0; ky < yceil; ky++) {
for (int kx = 0; kx < xceil; kx++) {
++kernel_index; ++kernel_index;
for (int64_t i = 0; i < non_zero_num; i++) { for (int64_t i = 0; i < non_zero_num; i++) {
IntT batch = indices_ptr[i]; IntT batch = indices_ptr[i];
IntT in_z = indices_ptr[i + non_zero_num]; IntT in_z = is2D ? 0 : indices_ptr[i + non_zero_num];
IntT in_y = indices_ptr[i + 2 * non_zero_num]; IntT in_y = is2D ? indices_ptr[i + non_zero_num]
IntT in_x = indices_ptr[i + 3 * non_zero_num]; : indices_ptr[i + 2 * non_zero_num];
IntT out_z = (in_z + paddings[0] - kz * dilations[0]) / strides[0]; IntT in_x = is2D ? indices_ptr[i + 2 * non_zero_num]
IntT out_y = (in_y + paddings[1] - ky * dilations[1]) / strides[1]; : indices_ptr[i + 3 * non_zero_num];
IntT out_x = (in_x + paddings[2] - kx * dilations[2]) / strides[2];
IntT out_z =
is2D ? 0
: (in_z + paddings[0] - kz * dilations[0]) / strides[0];
IntT out_y =
(in_y + c_paddings[2] - ky * c_dilations[2]) / c_strides[2];
IntT out_x =
(in_x + c_paddings[3] - kx * c_dilations[3]) / c_strides[3];
if (phi::funcs::sparse::Check(c_x_dims, if (phi::funcs::sparse::Check(c_x_dims,
c_kernel_dims, c_kernel_dims,
c_paddings, c_paddings,
...@@ -98,8 +149,8 @@ void ProductRuleBook(const Context& dev_ctx, ...@@ -98,8 +149,8 @@ void ProductRuleBook(const Context& dev_ctx,
ky, ky,
kz)) { kz)) {
if (subm) { if (subm) {
IntT out_index = phi::funcs::sparse::PointToIndex<DDim>( IntT out_index = phi::funcs::sparse::PointToIndex<Dims4D>(
batch, out_x, out_y, out_z, out_dims); batch, out_x, out_y, out_z, c_out_dims);
if (hash_in.find(out_index) == hash_in.end()) { if (hash_in.find(out_index) == hash_in.end()) {
continue; continue;
} }
...@@ -112,8 +163,8 @@ void ProductRuleBook(const Context& dev_ctx, ...@@ -112,8 +163,8 @@ void ProductRuleBook(const Context& dev_ctx,
rulebook_ptr[rulebook_index] = kernel_index - 1; rulebook_ptr[rulebook_index] = kernel_index - 1;
rulebook_ptr[rulebook_index + rulebook_len] = i; // in_i rulebook_ptr[rulebook_index + rulebook_len] = i; // in_i
rulebook_ptr[rulebook_index + rulebook_len * 2] = rulebook_ptr[rulebook_index + rulebook_len * 2] =
phi::funcs::sparse::PointToIndex<DDim>( phi::funcs::sparse::PointToIndex<Dims4D>(
batch, out_x, out_y, out_z, out_dims); // out_index batch, out_x, out_y, out_z, c_out_dims); // out_index
++rulebook_index; ++rulebook_index;
} }
} }
...@@ -141,6 +192,8 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx, ...@@ -141,6 +192,8 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
const DDim& out_dims, const DDim& out_dims,
DenseTensor* rulebook, DenseTensor* rulebook,
SparseCooTensor* out) { SparseCooTensor* out) {
const bool is2D = out_dims.size() == 4 ? true : false;
std::set<IntT> out_indexs; std::set<IntT> out_indexs;
int n = rulebook->dims()[1]; int n = rulebook->dims()[1];
IntT* rulebook_ptr = rulebook->data<IntT>(); IntT* rulebook_ptr = rulebook->data<IntT>();
...@@ -149,7 +202,7 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx, ...@@ -149,7 +202,7 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
} }
int out_non_zero_num = out_indexs.size(); int out_non_zero_num = out_indexs.size();
const int64_t sparse_dim = 4; const int64_t sparse_dim = is2D ? 3 : 4;
DenseTensorMeta indices_meta(phi::CppTypeToDataType<IntT>::Type(), DenseTensorMeta indices_meta(phi::CppTypeToDataType<IntT>::Type(),
{sparse_dim, out_non_zero_num}, {sparse_dim, out_non_zero_num},
DataLayout::NCHW); DataLayout::NCHW);
...@@ -159,15 +212,29 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx, ...@@ -159,15 +212,29 @@ void UpdateRulebookAndOutIndex(const Context& dev_ctx,
phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta)); phi::DenseTensor out_values = phi::Empty(dev_ctx, std::move(values_meta));
IntT* out_indices_ptr = out_indices.data<IntT>(); IntT* out_indices_ptr = out_indices.data<IntT>();
int i = 0; int i = 0;
int odim0, odim1, odim2, odim3;
odim0 = out_dims[0];
odim1 = is2D ? out_dims[2] : out_dims[3];
odim2 = is2D ? out_dims[1] : out_dims[2];
odim3 = is2D ? 1 : out_dims[1];
const Dims4D c_out_dims(odim0, odim1, odim2, odim3);
for (auto it = out_indexs.begin(); it != out_indexs.end(); it++, i++) { for (auto it = out_indexs.begin(); it != out_indexs.end(); it++, i++) {
const IntT index = *it; const IntT index = *it;
IntT batch, x, y, z; IntT batch, x, y, z;
phi::funcs::sparse::IndexToPoint<DDim>(index, out_dims, &batch, &x, &y, &z); phi::funcs::sparse::IndexToPoint<Dims4D>(
index, c_out_dims, &batch, &x, &y, &z);
out_indices_ptr[i] = batch; out_indices_ptr[i] = batch;
if (is2D) {
out_indices_ptr[i + out_non_zero_num] = y;
out_indices_ptr[i + out_non_zero_num * 2] = x;
} else {
out_indices_ptr[i + out_non_zero_num] = z; out_indices_ptr[i + out_non_zero_num] = z;
out_indices_ptr[i + out_non_zero_num * 2] = y; out_indices_ptr[i + out_non_zero_num * 2] = y;
out_indices_ptr[i + out_non_zero_num * 3] = x; out_indices_ptr[i + out_non_zero_num * 3] = x;
} }
}
for (i = 0; i < n; i++) { for (i = 0; i < n; i++) {
IntT out_index = rulebook_ptr[i + n * 2]; IntT out_index = rulebook_ptr[i + n * 2];
rulebook_ptr[i + n * 2] = rulebook_ptr[i + n * 2] =
......
...@@ -47,9 +47,12 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx, ...@@ -47,9 +47,12 @@ void Conv3dCooGradCPUKernel(const CPUContext& dev_ctx,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* kernel_grad) { DenseTensor* kernel_grad) {
const auto& kernel_dims = kernel.dims(); const auto& kernel_dims = kernel.dims();
const int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; const bool is2D = kernel_dims.size() == 4 ? true : false;
const int in_channels = kernel_dims[3]; const int kernel_size =
const int out_channels = kernel_dims[4]; is2D ? kernel_dims[0] * kernel_dims[1]
: kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
const int in_channels = is2D ? kernel_dims[2] : kernel_dims[3];
const int out_channels = is2D ? kernel_dims[3] : kernel_dims[4];
int rulebook_len = 0; int rulebook_len = 0;
const IntT* rulebook_ptr = phi::funcs::sparse::GetRulebookPtr<IntT>( const IntT* rulebook_ptr = phi::funcs::sparse::GetRulebookPtr<IntT>(
...@@ -210,7 +213,6 @@ void Conv3dCooGradKernel(const Context& dev_ctx, ...@@ -210,7 +213,6 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
kernel_grad); kernel_grad);
})); }));
} }
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
......
...@@ -45,9 +45,15 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx, ...@@ -45,9 +45,15 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
// if x.layout != NDHWC then transpose(x), transpose(weight) // if x.layout != NDHWC then transpose(x), transpose(weight)
const auto& x_dims = x.dims(); const auto& x_dims = x.dims();
const bool is2D = x_dims.size() == 4 ? true : false;
const auto& kernel_dims = kernel.dims(); const auto& kernel_dims = kernel.dims();
int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; int kernel_size = is2D ? kernel_dims[0] * kernel_dims[1]
DDim out_dims = {1, 1, 1, 1, 1}; : kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
int count_tmp = is2D ? 4 : 5;
std::vector<int> out_dims_vec(count_tmp, 1);
DDim out_dims = make_ddim(out_dims_vec);
std::vector<int> kernel_sizes(kernel_dims.size()); std::vector<int> kernel_sizes(kernel_dims.size());
for (int i = 0; i < kernel_dims.size(); i++) { for (int i = 0; i < kernel_dims.size(); i++) {
kernel_sizes[i] = kernel_dims[i]; kernel_sizes[i] = kernel_dims[i];
...@@ -63,8 +69,8 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx, ...@@ -63,8 +69,8 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
phi::funcs::sparse::GetOutShape( phi::funcs::sparse::GetOutShape(
x_dims, kernel_sizes, subm_paddings, dilations, subm_strides, &out_dims); x_dims, kernel_sizes, subm_paddings, dilations, subm_strides, &out_dims);
const int in_channels = kernel_dims[3]; const int in_channels = is2D ? kernel_dims[2] : kernel_dims[3];
const int out_channels = kernel_dims[4]; const int out_channels = is2D ? kernel_dims[3] : kernel_dims[4];
// Second algorithm: // Second algorithm:
// https://pdfs.semanticscholar.org/5125/a16039cabc6320c908a4764f32596e018ad3.pdf // https://pdfs.semanticscholar.org/5125/a16039cabc6320c908a4764f32596e018ad3.pdf
...@@ -112,7 +118,6 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx, ...@@ -112,7 +118,6 @@ void Conv3dCooCPUKernel(const CPUContext& dev_ctx,
phi::funcs::sparse::SaveToTable( phi::funcs::sparse::SaveToTable(
dev_ctx, x, key, tmp_rulebook, h_counter, out, rulebook, counter); dev_ctx, x, key, tmp_rulebook, h_counter, out, rulebook, counter);
} }
// int n = rulebook->dims()[1];
// 2. gather // 2. gather
DenseTensorMeta in_features_meta( DenseTensorMeta in_features_meta(
...@@ -198,7 +203,6 @@ void Conv3dCooKernel(const Context& dev_ctx, ...@@ -198,7 +203,6 @@ void Conv3dCooKernel(const Context& dev_ctx,
counter); counter);
})); }));
} }
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
......
...@@ -331,6 +331,7 @@ __global__ void ProductRuleBookKernel(const T* x_indices, ...@@ -331,6 +331,7 @@ __global__ void ProductRuleBookKernel(const T* x_indices,
const Dims4D paddings, const Dims4D paddings,
const Dims4D dilations, const Dims4D dilations,
const Dims4D strides, const Dims4D strides,
const bool is2D,
T* rulebook, T* rulebook,
int* counter) { int* counter) {
int tid = threadIdx.x + blockIdx.x * blockDim.x; int tid = threadIdx.x + blockIdx.x * blockDim.x;
...@@ -345,9 +346,11 @@ __global__ void ProductRuleBookKernel(const T* x_indices, ...@@ -345,9 +346,11 @@ __global__ void ProductRuleBookKernel(const T* x_indices,
for (int i = tid; i < non_zero_num; i += gridDim.x * blockDim.x) { for (int i = tid; i < non_zero_num; i += gridDim.x * blockDim.x) {
int kernel_index = 0; int kernel_index = 0;
T batch = x_indices[i]; T batch = x_indices[i];
T in_z = x_indices[i + non_zero_num]; T in_z = is2D ? 0 : x_indices[i + non_zero_num];
T in_y = x_indices[i + 2 * non_zero_num]; T in_y =
T in_x = x_indices[i + 3 * non_zero_num]; is2D ? x_indices[i + non_zero_num] : x_indices[i + 2 * non_zero_num];
T in_x = is2D ? x_indices[i + 2 * non_zero_num]
: x_indices[i + 3 * non_zero_num];
for (int kz = 0; kz < kernel_dims[1]; kz++) { for (int kz = 0; kz < kernel_dims[1]; kz++) {
for (int ky = 0; ky < kernel_dims[2]; ky++) { for (int ky = 0; ky < kernel_dims[2]; ky++) {
for (int kx = 0; kx < kernel_dims[3]; kx++) { for (int kx = 0; kx < kernel_dims[3]; kx++) {
...@@ -363,7 +366,9 @@ __global__ void ProductRuleBookKernel(const T* x_indices, ...@@ -363,7 +366,9 @@ __global__ void ProductRuleBookKernel(const T* x_indices,
kx, kx,
ky, ky,
kz)) { kz)) {
T out_z = (in_z + paddings[1] - kz * dilations[1]) / strides[1]; T out_z =
is2D ? 0
: (in_z + paddings[1] - kz * dilations[1]) / strides[1];
T out_y = (in_y + paddings[2] - ky * dilations[2]) / strides[2]; T out_y = (in_y + paddings[2] - ky * dilations[2]) / strides[2];
T out_x = (in_x + paddings[3] - kx * dilations[3]) / strides[3]; T out_x = (in_x + paddings[3] - kx * dilations[3]) / strides[3];
in_i = i; in_i = i;
...@@ -390,12 +395,15 @@ __global__ void GetOutIndexTable1(const IntT* indices, ...@@ -390,12 +395,15 @@ __global__ void GetOutIndexTable1(const IntT* indices,
const IntT non_zero_num, const IntT non_zero_num,
const Dims4D dims, const Dims4D dims,
int* index_flags, int* index_flags,
const bool is2D,
int* out_index_table) { int* out_index_table) {
CUDA_KERNEL_LOOP_TYPE(i, non_zero_num, int64_t) { CUDA_KERNEL_LOOP_TYPE(i, non_zero_num, int64_t) {
IntT batch = indices[i]; IntT batch = indices[i];
IntT in_z = indices[i + non_zero_num]; IntT in_z = is2D ? 0 : indices[i + non_zero_num];
IntT in_y = indices[i + 2 * non_zero_num]; IntT in_y =
IntT in_x = indices[i + 3 * non_zero_num]; is2D ? indices[i + non_zero_num] : indices[i + 2 * non_zero_num];
IntT in_x =
is2D ? indices[i + 2 * non_zero_num] : indices[i + 3 * non_zero_num];
IntT index = PointToIndex(batch, in_x, in_y, in_z, dims); IntT index = PointToIndex(batch, in_x, in_y, in_z, dims);
phi::funcs::sparse::SetBits(index, index_flags); phi::funcs::sparse::SetBits(index, index_flags);
out_index_table[index] = i; out_index_table[index] = i;
...@@ -406,6 +414,7 @@ template <typename IntT> ...@@ -406,6 +414,7 @@ template <typename IntT>
__global__ void GetOutIndexTable(int* indexs, __global__ void GetOutIndexTable(int* indexs,
const int non_zero_num, const int non_zero_num,
const Dims4D out_dims, const Dims4D out_dims,
const bool is2D,
int* out_index_table, int* out_index_table,
IntT* out_indices) { IntT* out_indices) {
CUDA_KERNEL_LOOP_TYPE(i, non_zero_num, int64_t) { CUDA_KERNEL_LOOP_TYPE(i, non_zero_num, int64_t) {
...@@ -416,9 +425,14 @@ __global__ void GetOutIndexTable(int* indexs, ...@@ -416,9 +425,14 @@ __global__ void GetOutIndexTable(int* indexs,
index, out_dims, &batch, &x, &y, &z); index, out_dims, &batch, &x, &y, &z);
// get out indices // get out indices
out_indices[i] = batch; out_indices[i] = batch;
if (is2D) {
out_indices[i + non_zero_num] = y;
out_indices[i + non_zero_num * 2] = x;
} else {
out_indices[i + non_zero_num] = z; out_indices[i + non_zero_num] = z;
out_indices[i + non_zero_num * 2] = y; out_indices[i + non_zero_num * 2] = y;
out_indices[i + non_zero_num * 3] = x; out_indices[i + non_zero_num * 3] = x;
}
indexs[i] = 0; indexs[i] = 0;
} }
} }
...@@ -464,6 +478,7 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices, ...@@ -464,6 +478,7 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices,
const Dims4D paddings, const Dims4D paddings,
const Dims4D dilations, const Dims4D dilations,
const Dims4D strides, const Dims4D strides,
const bool is2D,
const int* index_flags, const int* index_flags,
const int* out_index_table, const int* out_index_table,
T* rulebook, T* rulebook,
...@@ -472,7 +487,6 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices, ...@@ -472,7 +487,6 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices,
const int kernel_size = kernel_dims[3] * kernel_dims[2] * kernel_dims[1]; const int kernel_size = kernel_dims[3] * kernel_dims[2] * kernel_dims[1];
extern __shared__ int counter_buf[]; // kernel_size extern __shared__ int counter_buf[]; // kernel_size
int* counter_buf2 = counter_buf + kernel_size; int* counter_buf2 = counter_buf + kernel_size;
// length = kernel_size * blockDim.x * 2;
int* rulebook_buf = counter_buf + kernel_size * 2; int* rulebook_buf = counter_buf + kernel_size * 2;
const int offset = kernel_size * non_zero_num; const int offset = kernel_size * non_zero_num;
...@@ -484,9 +498,11 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices, ...@@ -484,9 +498,11 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices,
for (int i = tid; i < non_zero_num; i += gridDim.x * blockDim.x) { for (int i = tid; i < non_zero_num; i += gridDim.x * blockDim.x) {
int kernel_index = 0; int kernel_index = 0;
T batch = x_indices[i]; T batch = x_indices[i];
T in_z = x_indices[i + non_zero_num]; T in_z = is2D ? 0 : x_indices[i + non_zero_num];
T in_y = x_indices[i + 2 * non_zero_num]; T in_y =
T in_x = x_indices[i + 3 * non_zero_num]; is2D ? x_indices[i + non_zero_num] : x_indices[i + 2 * non_zero_num];
T in_x = is2D ? x_indices[i + 2 * non_zero_num]
: x_indices[i + 3 * non_zero_num];
for (int kz = 0; kz < kernel_dims[1]; kz++) { for (int kz = 0; kz < kernel_dims[1]; kz++) {
for (int ky = 0; ky < kernel_dims[2]; ky++) { for (int ky = 0; ky < kernel_dims[2]; ky++) {
for (int kx = 0; kx < kernel_dims[3]; kx++) { for (int kx = 0; kx < kernel_dims[3]; kx++) {
...@@ -502,7 +518,9 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices, ...@@ -502,7 +518,9 @@ __global__ void ProductSubmRuleBookKernel(const T* x_indices,
kx, kx,
ky, ky,
kz)) { kz)) {
T out_z = (in_z + paddings[1] - kz * dilations[1]) / strides[1]; T out_z =
is2D ? 0
: (in_z + paddings[1] - kz * dilations[1]) / strides[1];
T out_y = (in_y + paddings[2] - ky * dilations[2]) / strides[2]; T out_y = (in_y + paddings[2] - ky * dilations[2]) / strides[2];
T out_x = (in_x + paddings[3] - kx * dilations[3]) / strides[3]; T out_x = (in_x + paddings[3] - kx * dilations[3]) / strides[3];
out_index = phi::funcs::sparse::PointToIndex<Dims4D>( out_index = phi::funcs::sparse::PointToIndex<Dims4D>(
...@@ -637,21 +655,62 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -637,21 +655,62 @@ int ProductRuleBook(const Context& dev_ctx,
SparseCooTensor* out, SparseCooTensor* out,
int* h_counter, int* h_counter,
int* h_offsets) { int* h_offsets) {
const bool is2D = out_dims.size() == 4 ? true : false;
auto indices_dtype = phi::CppTypeToDataType<IntT>::Type(); auto indices_dtype = phi::CppTypeToDataType<IntT>::Type();
const int64_t non_zero_num = x.nnz(); const int64_t non_zero_num = x.nnz();
const auto& indices = x.indices(); const auto& indices = x.indices();
const IntT* indices_ptr = indices.data<IntT>(); const IntT* indices_ptr = indices.data<IntT>();
int* counter_ptr = counter_per_kernel->data<int>(); int* counter_ptr = counter_per_kernel->data<int>();
int* offsets_ptr = offsets_per_kernel->data<int>(); int* offsets_ptr = offsets_per_kernel->data<int>();
int kernel_size = kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2]; int kernel_size = is2D ? kernel_sizes[0] * kernel_sizes[1]
: kernel_sizes[0] * kernel_sizes[1] * kernel_sizes[2];
const auto x_dims = x.dims(); const auto x_dims = x.dims();
Dims4D d_x_dims(x_dims[0], x_dims[3], x_dims[2], x_dims[1]);
Dims4D d_kernel_dims(1, kernel_sizes[2], kernel_sizes[1], kernel_sizes[0]); int xdim0, xdim1, xdim2, xdim3;
Dims4D d_out_dims(out_dims[0], out_dims[3], out_dims[2], out_dims[1]); int kdim0, kdim1, kdim2, kdim3;
Dims4D d_paddings(1, paddings[2], paddings[1], paddings[0]); int odim0, odim1, odim2, odim3;
Dims4D d_strides(1, strides[2], strides[1], strides[0]); int pdim0, pdim1, pdim2, pdim3;
Dims4D d_dilations(1, dilations[2], dilations[1], dilations[0]); int sdim0, sdim1, sdim2, sdim3;
int ddim0, ddim1, ddim2, ddim3;
xdim0 = x_dims[0];
xdim1 = is2D ? x_dims[2] : x_dims[3];
xdim2 = is2D ? x_dims[1] : x_dims[2];
xdim3 = is2D ? 1 : x_dims[1];
kdim0 = 1;
kdim1 = is2D ? kernel_sizes[1] : kernel_sizes[2];
kdim2 = is2D ? kernel_sizes[0] : kernel_sizes[1];
kdim3 = is2D ? 1 : kernel_sizes[0];
odim0 = out_dims[0];
odim1 = is2D ? out_dims[2] : out_dims[3];
odim2 = is2D ? out_dims[1] : out_dims[2];
odim3 = is2D ? 1 : out_dims[1];
pdim0 = 1;
pdim1 = is2D ? paddings[1] : paddings[2];
pdim2 = is2D ? paddings[0] : paddings[1];
pdim3 = is2D ? 1 : paddings[0];
sdim0 = 1;
sdim1 = is2D ? strides[1] : strides[2];
sdim2 = is2D ? strides[0] : strides[1];
sdim3 = is2D ? 1 : strides[0];
ddim0 = 1;
ddim1 = is2D ? dilations[1] : dilations[2];
ddim2 = is2D ? dilations[0] : dilations[1];
ddim3 = is2D ? 1 : dilations[0];
const Dims4D d_x_dims(xdim0, xdim1, xdim2, xdim3);
const Dims4D d_kernel_dims(kdim0, kdim1, kdim2, kdim3);
const Dims4D d_out_dims(odim0, odim1, odim2, odim3);
const Dims4D d_paddings(pdim0, pdim1, pdim2, pdim3);
const Dims4D d_strides(sdim0, sdim1, sdim2, sdim3);
const Dims4D d_dilations(ddim0, ddim1, ddim2, ddim3);
// 1. product rule book // 1. product rule book
phi::backends::gpu::GpuMemsetAsync(counter_ptr, phi::backends::gpu::GpuMemsetAsync(counter_ptr,
0, 0,
...@@ -682,7 +741,9 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -682,7 +741,9 @@ int ProductRuleBook(const Context& dev_ctx,
DenseTensor tmp_rulebook = phi::Empty(dev_ctx, std::move(rulebook_meta)); DenseTensor tmp_rulebook = phi::Empty(dev_ctx, std::move(rulebook_meta));
IntT* rulebook_ptr = tmp_rulebook.data<IntT>(); IntT* rulebook_ptr = tmp_rulebook.data<IntT>();
DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices()); DenseTensor out_indices = phi::EmptyLike<IntT>(dev_ctx, x.indices());
DenseTensor out_values = phi::Empty<T>(dev_ctx, {x.nnz(), kernel_sizes[4]}); int tmpidx = is2D ? 3 : 4;
DenseTensor out_values =
phi::Empty<T>(dev_ctx, {x.nnz(), kernel_sizes[tmpidx]});
phi::Copy(dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &out_indices); phi::Copy(dev_ctx, x.indices(), dev_ctx.GetPlace(), false, &out_indices);
...@@ -695,6 +756,7 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -695,6 +756,7 @@ int ProductRuleBook(const Context& dev_ctx,
non_zero_num, non_zero_num,
d_x_dims, d_x_dims,
index_flags_ptr, index_flags_ptr,
is2D,
out_index_table_ptr); out_index_table_ptr);
size_t cache_size = size_t cache_size =
...@@ -721,6 +783,7 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -721,6 +783,7 @@ int ProductRuleBook(const Context& dev_ctx,
d_paddings, d_paddings,
d_dilations, d_dilations,
d_strides, d_strides,
is2D,
index_flags_ptr, index_flags_ptr,
out_index_table_ptr, out_index_table_ptr,
rulebook_ptr, rulebook_ptr,
...@@ -766,6 +829,7 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -766,6 +829,7 @@ int ProductRuleBook(const Context& dev_ctx,
d_paddings, d_paddings,
d_dilations, d_dilations,
d_strides, d_strides,
is2D,
rulebook_ptr, rulebook_ptr,
counter_ptr); counter_ptr);
...@@ -833,11 +897,11 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -833,11 +897,11 @@ int ProductRuleBook(const Context& dev_ctx,
out_nnz, out_nnz,
out_index_ptr); out_index_ptr);
const int64_t sparse_dim = 4; const int64_t sparse_dim = is2D ? 3 : 4;
phi::DenseTensor out_indices = phi::DenseTensor out_indices =
phi::Empty<IntT>(dev_ctx, {sparse_dim, out_nnz}); phi::Empty<IntT>(dev_ctx, {sparse_dim, out_nnz});
phi::DenseTensor out_values = phi::DenseTensor out_values =
phi::Empty<T>(dev_ctx, {out_nnz, kernel_sizes[4]}); phi::Empty<T>(dev_ctx, {out_nnz, kernel_sizes[sparse_dim]});
out->SetMember(out_indices, out_values, out_dims, false); out->SetMember(out_indices, out_values, out_dims, false);
IntT* out_indices_ptr = out_indices.data<IntT>(); IntT* out_indices_ptr = out_indices.data<IntT>();
...@@ -849,6 +913,7 @@ int ProductRuleBook(const Context& dev_ctx, ...@@ -849,6 +913,7 @@ int ProductRuleBook(const Context& dev_ctx,
dev_ctx.stream()>>>(out_index_ptr, dev_ctx.stream()>>>(out_index_ptr,
out_nnz, out_nnz,
d_out_dims, d_out_dims,
is2D,
out_index_table_ptr, out_index_table_ptr,
out_indices_ptr); out_indices_ptr);
config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rulebook_len, 1); config = phi::backends::gpu::GetGpuLaunchConfig1D(dev_ctx, rulebook_len, 1);
......
...@@ -57,9 +57,12 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx, ...@@ -57,9 +57,12 @@ void Conv3dCooGradGPUKernel(const GPUContext& dev_ctx,
SparseCooTensor* x_grad, SparseCooTensor* x_grad,
DenseTensor* kernel_grad) { DenseTensor* kernel_grad) {
const auto& kernel_dims = kernel.dims(); const auto& kernel_dims = kernel.dims();
const int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; const bool is2D = kernel_dims.size() == 4 ? true : false;
const int in_channels = kernel_dims[3]; const int kernel_size =
const int out_channels = kernel_dims[4]; is2D ? kernel_dims[0] * kernel_dims[1]
: kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
const int in_channels = is2D ? kernel_dims[2] : kernel_dims[3];
const int out_channels = is2D ? kernel_dims[3] : kernel_dims[4];
int rulebook_len = 0; int rulebook_len = 0;
const IntT* rulebook_ptr = phi::funcs::sparse::GetRulebookPtr<IntT>( const IntT* rulebook_ptr = phi::funcs::sparse::GetRulebookPtr<IntT>(
...@@ -324,7 +327,6 @@ void Conv3dCooGradKernel(const Context& dev_ctx, ...@@ -324,7 +327,6 @@ void Conv3dCooGradKernel(const Context& dev_ctx,
kernel_grad); kernel_grad);
})); }));
} }
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
......
...@@ -85,8 +85,14 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, ...@@ -85,8 +85,14 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
// if x.layout != NDHWC then transpose(x), transpose(weight) // if x.layout != NDHWC then transpose(x), transpose(weight)
const auto& x_dims = x.dims(); const auto& x_dims = x.dims();
const auto& kernel_dims = kernel.dims(); const auto& kernel_dims = kernel.dims();
int kernel_size = kernel_dims[0] * kernel_dims[1] * kernel_dims[2]; const bool is2D = x_dims.size() == 4 ? true : false;
DDim out_dims = {1, 1, 1, 1, 1}; int kernel_size = is2D ? kernel_dims[0] * kernel_dims[1]
: kernel_dims[0] * kernel_dims[1] * kernel_dims[2];
int rank = is2D ? 4 : 5;
std::vector<int> out_dims_vec(rank, 1);
DDim out_dims = make_ddim(out_dims_vec);
std::vector<int> kernel_sizes(kernel_dims.size()); std::vector<int> kernel_sizes(kernel_dims.size());
for (int i = 0; i < kernel_dims.size(); i++) { for (int i = 0; i < kernel_dims.size(); i++) {
kernel_sizes[i] = kernel_dims[i]; kernel_sizes[i] = kernel_dims[i];
...@@ -102,8 +108,8 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, ...@@ -102,8 +108,8 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
phi::funcs::sparse::GetOutShape( phi::funcs::sparse::GetOutShape(
x_dims, kernel_sizes, subm_paddings, dilations, subm_strides, &out_dims); x_dims, kernel_sizes, subm_paddings, dilations, subm_strides, &out_dims);
const int in_channels = kernel_dims[3]; const int in_channels = is2D ? kernel_dims[2] : kernel_dims[3];
const int out_channels = kernel_dims[4]; const int out_channels = is2D ? kernel_dims[3] : kernel_dims[4];
DenseTensor h_counter, h_offsets; DenseTensor h_counter, h_offsets;
h_counter.Resize({kernel_size}); h_counter.Resize({kernel_size});
h_offsets.Resize({kernel_size + 1}); h_offsets.Resize({kernel_size + 1});
...@@ -118,7 +124,14 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx, ...@@ -118,7 +124,14 @@ void Conv3dCooGPUKernel(const GPUContext& dev_ctx,
DenseTensor out_index = phi::Empty<int>(dev_ctx, {1}); DenseTensor out_index = phi::Empty<int>(dev_ctx, {1});
DenseTensor unique_value = phi::Empty<int>(dev_ctx, {1}); DenseTensor unique_value = phi::Empty<int>(dev_ctx, {1});
VLOG(6) << "call SubmConv3D or Conv3D " << subm << " and the key is " << key; if (is2D) {
VLOG(6) << "call SubmConv2D or Conv2D " << subm << " and the key is "
<< key;
} else {
VLOG(6) << "call SubmConv3D or Conv3D " << subm << " and the key is "
<< key;
}
int rulebook_len = 0; int rulebook_len = 0;
const IntT* rulebook_ptr = nullptr; const IntT* rulebook_ptr = nullptr;
bool need_product_rulebook = true; bool need_product_rulebook = true;
...@@ -313,7 +326,6 @@ void Conv3dCooKernel(const Context& dev_ctx, ...@@ -313,7 +326,6 @@ void Conv3dCooKernel(const Context& dev_ctx,
counter); counter);
})); }));
} }
} // namespace sparse } // namespace sparse
} // namespace phi } // namespace phi
......
...@@ -14,14 +14,12 @@ ...@@ -14,14 +14,12 @@
__all__ = [] __all__ = []
import paddle
from paddle import _C_ops, in_dynamic_mode from paddle import _C_ops, in_dynamic_mode
from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layer_helper import LayerHelper
from paddle.nn.functional.conv import _update_padding_nd from paddle.nn.functional.conv import _update_padding_nd
from paddle.utils import convert_to_list from paddle.utils import convert_to_list
from ...binary import add from ...binary import add
from ...unary import reshape
def _conv3d( def _conv3d(
...@@ -148,20 +146,13 @@ def _conv2d( ...@@ -148,20 +146,13 @@ def _conv2d(
) )
channel_last = data_format == "NHWC" channel_last = data_format == "NHWC"
n_dim = 0
channel_dim = -1 if channel_last else 1 channel_dim = -1 if channel_last else 1
h_dim = 1 if channel_last else 2
w_dim = 2 if channel_last else -1
if len(x.shape) != 4: if len(x.shape) != 4:
raise ValueError( raise ValueError(
"Input x should be 4D tensor, but received x with the shape of {}".format( "Input x should be 4D tensor, but received x with the shape of {}".format(
x.shape x.shape
) )
) )
n = x.shape[n_dim]
d = 1
h = x.shape[h_dim]
w = x.shape[w_dim]
num_channels = x.shape[channel_dim] num_channels = x.shape[channel_dim]
if num_channels < 0: if num_channels < 0:
raise ValueError( raise ValueError(
...@@ -173,16 +164,6 @@ def _conv2d( ...@@ -173,16 +164,6 @@ def _conv2d(
stride = convert_to_list(stride, dims, 'stride') stride = convert_to_list(stride, dims, 'stride')
dilation = convert_to_list(dilation, dims, 'dilation') dilation = convert_to_list(dilation, dims, 'dilation')
padding.insert(0, 0)
stride.insert(0, 1)
dilation.insert(0, 1)
x = reshape(x, [n, d, h, w, num_channels])
h_filter = weight.shape[0]
w_filter = weight.shape[1]
c_filter = weight.shape[2]
m_filter = weight.shape[3]
weight = paddle.reshape(weight, [d, h_filter, w_filter, c_filter, m_filter])
if in_dynamic_mode(): if in_dynamic_mode():
pre_bias = _C_ops.sparse_conv3d( pre_bias = _C_ops.sparse_conv3d(
x, x,
...@@ -217,11 +198,6 @@ def _conv2d( ...@@ -217,11 +198,6 @@ def _conv2d(
helper.append_op( helper.append_op(
type=op_type, inputs=inputs, outputs=outputs, attrs=attrs type=op_type, inputs=inputs, outputs=outputs, attrs=attrs
) )
n_out = pre_bias.shape[0]
h_out = pre_bias.shape[2]
w_out = pre_bias.shape[3]
channels_out = pre_bias.shape[4]
pre_bias = reshape(pre_bias, [n_out, h_out, w_out, channels_out])
if bias is not None: if bias is not None:
return add(pre_bias, bias) return add(pre_bias, bias)
else: else:
......
...@@ -378,6 +378,75 @@ class TestStatic(unittest.TestCase): ...@@ -378,6 +378,75 @@ class TestStatic(unittest.TestCase):
self.assertTrue(out_indices.dtype == paddle.int32) self.assertTrue(out_indices.dtype == paddle.int32)
paddle.disable_static() paddle.disable_static()
def test_cpu(self):
paddle.enable_static()
main = paddle.static.Program()
with paddle.static.program_guard(main):
indices = paddle.static.data(
name='indices', shape=[4, 4], dtype='int32'
)
values = paddle.static.data(
name='values', shape=[4, 1], dtype='float32'
)
dense_shape = [1, 1, 3, 4, 1]
sp_x = sparse.sparse_coo_tensor(indices, values, dense_shape)
weight_shape = [1, 3, 3, 1, 1]
weight = paddle.static.data(
name='weight', shape=weight_shape, dtype='float32'
)
bias_shape = [1]
bias = paddle.static.data(
name='bias', shape=bias_shape, dtype='float32'
)
out = sparse.nn.functional.conv3d(
sp_x,
weight,
bias,
stride=1,
padding=0,
dilation=1,
groups=1,
data_format="NDHWC",
)
sp_out = sparse.nn.functional.relu(out)
out_indices = sp_out.indices()
out_values = sp_out.values()
out = sp_out.to_dense()
place = paddle.CPUPlace()
exe = paddle.static.Executor()
indices_data = [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 1, 2],
[1, 3, 2, 3],
]
values_data = [[1.0], [2.0], [3.0], [4.0]]
weight_data = np.array(
[[[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]]
).astype('float32')
weight_data = weight_data.reshape(weight_shape)
bias_data = np.array([1]).astype('float32')
fetch = exe.run(
feed={
'indices': indices_data,
'values': values_data,
'weight': weight_data,
'bias': bias_data,
},
fetch_list=[out, out_indices, out_values],
return_numpy=True,
)
correct_out = np.array([[[[[5.0], [11.0]]]]]).astype('float64')
correct_out_values = [[5.0], [11.0]]
np.testing.assert_array_equal(correct_out, fetch[0])
np.testing.assert_array_equal(correct_out_values, fetch[2])
self.assertTrue(out_indices.dtype == paddle.int32)
paddle.disable_static()
def test2D(self): def test2D(self):
paddle.enable_static() paddle.enable_static()
main = paddle.static.Program() main = paddle.static.Program()
...@@ -441,6 +510,70 @@ class TestStatic(unittest.TestCase): ...@@ -441,6 +510,70 @@ class TestStatic(unittest.TestCase):
self.assertTrue(out_indices.dtype == paddle.int32) self.assertTrue(out_indices.dtype == paddle.int32)
paddle.disable_static() paddle.disable_static()
def test2D_cpu(self):
paddle.enable_static()
main = paddle.static.Program()
with paddle.static.program_guard(main):
indices = paddle.static.data(
name='indices', shape=[3, 4], dtype='int32'
)
values = paddle.static.data(
name='values', shape=[4, 1], dtype='float32'
)
dense_shape = [1, 3, 4, 1]
sp_x = sparse.sparse_coo_tensor(indices, values, dense_shape)
weight_shape = [3, 3, 1, 1]
weight = paddle.static.data(
name='weight', shape=weight_shape, dtype='float32'
)
bias_shape = [1]
bias = paddle.static.data(
name='bias', shape=bias_shape, dtype='float32'
)
out = sparse.nn.functional.conv2d(
sp_x,
weight,
bias,
stride=1,
padding=0,
dilation=1,
groups=1,
data_format="NHWC",
)
sp_out = sparse.nn.functional.relu(out)
out_indices = sp_out.indices()
out_values = sp_out.values()
out = sp_out.to_dense()
place = paddle.CPUPlace()
exe = paddle.static.Executor()
indices_data = [[0, 0, 0, 0], [0, 0, 1, 2], [1, 3, 2, 3]]
values_data = [[1.0], [2.0], [3.0], [4.0]]
weight_data = np.array(
[[[[[1], [1], [1]], [[1], [1], [1]], [[1], [1], [1]]]]]
).astype('float32')
weight_data = weight_data.reshape(weight_shape)
bias_data = np.array([1]).astype('float32')
fetch = exe.run(
feed={
'indices': indices_data,
'values': values_data,
'weight': weight_data,
'bias': bias_data,
},
fetch_list=[out, out_indices, out_values],
return_numpy=True,
)
correct_out = np.array([[[[5.0], [11.0]]]]).astype('float64')
correct_out_values = [[5.0], [11.0]]
np.testing.assert_array_equal(correct_out, fetch[0])
np.testing.assert_array_equal(correct_out_values, fetch[2])
self.assertTrue(out_indices.dtype == paddle.int32)
paddle.disable_static()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册