From 4a25b60d1fce1cb11b0bcef7d5a311f35f018955 Mon Sep 17 00:00:00 2001 From: Charles-hit <56987902+Charles-hit@users.noreply.github.com> Date: Wed, 31 Aug 2022 10:35:10 +0800 Subject: [PATCH] Fix split api bug (#45396) * fix split bug * solve function redefine * fix fluid.layers.split and add unit test * delete splitInferMeta register in unary.cc * modify test_split_op GPU unit test * modify test_split_op GPU unit test place param * refactor split op and fix infershape bugs * add () in && and || * fix split C++ unit test * fix split infershape --- .../generator/codegen_utils.py | 20 +- paddle/fluid/operators/split_op.cc | 97 +++++--- paddle/phi/api/yaml/legacy_api.yaml | 18 +- paddle/phi/api/yaml/legacy_backward.yaml | 6 + paddle/phi/common/int_array.cc | 1 - paddle/phi/infermeta/unary.cc | 227 +++++++++++------- paddle/phi/infermeta/unary.h | 17 +- paddle/phi/kernels/cpu/split_kernel.cc | 61 ++--- paddle/phi/kernels/gpu/split_kernel.cu | 60 ++--- paddle/phi/kernels/impl/split_kernel_impl.h | 64 +++++ paddle/phi/kernels/split_kernel.h | 54 ++++- paddle/phi/ops/compat/split_sig.cc | 5 +- .../phi/tests/kernels/test_split_dev_api.cc | 48 +++- python/paddle/fluid/layers/nn.py | 5 +- .../fluid/tests/unittests/test_split_op.py | 108 +++++++++ python/paddle/tensor/manipulation.py | 6 +- 16 files changed, 553 insertions(+), 244 deletions(-) create mode 100644 paddle/phi/kernels/impl/split_kernel_impl.h diff --git a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py index 1dbea53b379..45895791128 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/generator/codegen_utils.py @@ -22,16 +22,16 @@ import os ### Global Variables ### ######################## ops_to_fill_zero_for_empty_grads = set([ - "split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad", - "sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad", - "add_triple_grad", "multiply_grad", "multiply_double_grad", - "multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad", - "tanh_double_grad", "tanh_triple_grad", "subtract_double_grad", - "divide_double_grad", "log_double_grad", "elu_double_grad", - "leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad", - "square_double_grad", "celu_double_grad", "pad_double_grad", - "pad3d_double_grad", "squeeze_double_grad", "unsqueeze_double_grad", - "instance_norm_double_grad", "conv3d_double_grad", + "split_grad", "split_with_num_grad", "rnn_grad", "matmul_double_grad", + "matmul_triple_grad", "sigmoid_double_grad", "sigmoid_triple_grad", + "add_double_grad", "add_triple_grad", "multiply_grad", + "multiply_double_grad", "multiply_triple_grad", "conv2d_grad_grad", + "batch_norm_double_grad", "tanh_double_grad", "tanh_triple_grad", + "subtract_double_grad", "divide_double_grad", "log_double_grad", + "elu_double_grad", "leaky_relu_double_grad", "sqrt_double_grad", + "rsqrt_double_grad", "square_double_grad", "celu_double_grad", + "pad_double_grad", "pad3d_double_grad", "squeeze_double_grad", + "unsqueeze_double_grad", "instance_norm_double_grad", "conv3d_double_grad", "depthwise_conv2d_grad_grad", "concat_double_grad", "expand_grad", "argsort_grad" ]) diff --git a/paddle/fluid/operators/split_op.cc b/paddle/fluid/operators/split_op.cc index 65d4e6c28b0..e5c59575a74 100644 --- a/paddle/fluid/operators/split_op.cc +++ b/paddle/fluid/operators/split_op.cc @@ -21,7 +21,9 @@ limitations under the License. */ namespace paddle { namespace operators { +using framework::LoDTensor; using framework::Tensor; +using framework::Variable; class SplitOp : public framework::OperatorWithKernel { public: @@ -36,47 +38,72 @@ class SplitOp : public framework::OperatorWithKernel { 1UL, platform::errors::InvalidArgument( "Outputs(Out) of SplitOp should not be empty.")); - auto in_dims = ctx->GetInputDim("X"); - auto outs_names = ctx->Outputs("Out"); - size_t axis = static_cast(ctx->Attrs().Get("axis")); - size_t num = static_cast(ctx->Attrs().Get("num")); + int axis = static_cast(ctx->Attrs().Get("axis")); + int num = static_cast(ctx->Attrs().Get("num")); std::vector sections = static_cast>( ctx->Attrs().Get>("sections")); - const size_t outs_number = outs_names.size(); - - if (sections.size() > 0) { - PADDLE_ENFORCE_EQ( - sections.size(), - outs_number, - platform::errors::InvalidArgument("tensor split sections size " - "should be equal to output size.")); + // Construct MetaTensor for InferMeta Func + using CompatMetaTensor = framework::CompatMetaTensor; + CompatMetaTensor x(ctx->GetInputVarPtrs("X")[0], ctx->IsRuntime()); + std::vector out; + size_t out_size = ctx->GetOutputVarPtrs("Out").size(); + out.reserve(out_size); + for (size_t i = 0; i < out_size; i++) { + out.emplace_back( + CompatMetaTensor(ctx->GetOutputVarPtrs("Out")[i], ctx->IsRuntime())); + } + std::vector out_ptr(out_size); + for (size_t i = 0; i < out_size; i++) { + out_ptr[i] = &out[i]; + } + phi::Scalar axis_final; + phi::IntArray sections_final; + // Construct axis_final + if (ctx->IsRuntime() && ctx->HasInput("AxisTensor")) { + Variable *var = + PADDLE_GET_CONST(Variable *, ctx->GetInputVarPtrs("AxisTensor")[0]); + axis_final = std::move(experimental::MakePhiScalarFromVar(*var)); + } else if (!ctx->IsRuntime() && ctx->HasInput("AxisTensor")) { + axis_final = std::move(phi::Scalar(-1)); + axis_final.SetFromTensor(true); + } else { + axis_final = std::move(phi::Scalar(axis)); } - if (ctx->HasInput("AxisTensor")) { - auto out_dims = phi::make_ddim(std::vector(in_dims.size(), -1)); - std::vector outs_dims(outs_number, out_dims); - ctx->SetOutputsDim("Out", outs_dims); - for (size_t i = 0; i < outs_number; ++i) { - ctx->ShareLoD("X", "Out", 0, i); + // Construct sections_final + if (ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) { + int sections_tensor_list_size = + ctx->GetInputVarPtrs("SectionsTensorList").size(); + const paddle::small_vector + §ions_varptr_list = ctx->GetInputVarPtrs("SectionsTensorList"); + std::vector sections_from_tensor; + sections_from_tensor.reserve(sections_tensor_list_size); + for (const auto §ion_varptr : sections_varptr_list) { + Variable *var = PADDLE_GET_CONST(Variable *, section_varptr); + sections_from_tensor.emplace_back(var->Get()); } - return; + sections_final = std::move(phi::IntArray(sections_from_tensor)); + } else if (!ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) { + sections_final = std::move(phi::IntArray(std::vector( + ctx->GetInputVarPtrs("SectionsTensorList").size(), -1))); + sections_final.SetFromTensor(true); + } else { + sections_final = std::move(phi::IntArray(sections)); } - - bool each_section_is_known = - (sections.size() > 0 && !ctx->HasInputs("SectionsTensorList")); - - auto outs_dims = UpdateOutsDims(ctx->IsRuntime(), - each_section_is_known, - in_dims, - num, - sections, - axis, - outs_number); - ctx->SetOutputsDim("Out", outs_dims); - if (axis != 0) { - // Only pass LoD when not spliting along the first dim. - for (size_t i = 0; i < outs_number; ++i) { - ctx->ShareLoD("X", "Out", 0, i); + if (sections.size() > 0) { + if (ctx->IsRuntime()) { + phi::SplitInferMeta( + x, sections_final, axis_final, out_ptr, {true, false}); + } else { + phi::SplitInferMeta( + x, sections_final, axis_final, out_ptr, {false, false}); + } + } else { + if (ctx->IsRuntime()) { + phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {true, false}); + } else { + phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {false, false}); } } } diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index 0012f8e4265..7b0257f320b 100755 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -2501,11 +2501,23 @@ backward : spectral_norm_grad - api : split - args : (Tensor x, IntArray num_or_sections, Scalar(int) axis) - output : Tensor[] - invoke : split_impl(x, num_or_sections, axis) + args : (Tensor x, IntArray sections, Scalar(int) axis) + output : Tensor[]{sections.size()} + infer_meta : + func : SplitInferMeta + kernel : + func : split backward : split_grad +- api : split_with_num + args : (Tensor x, int num, Scalar(int) axis) + output : Tensor[]{num} + infer_meta : + func : SplitWithNumInferMeta + kernel : + func : split_with_num + backward : split_with_num_grad + - api : sqrt args : (Tensor x) output : Tensor(out) diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index 465b43786c1..6cf8fdeba56 100755 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -2271,6 +2271,12 @@ args : (Tensor[] out_grad, Scalar axis = -1) output : Tensor(x_grad) invoke : concat( out_grad, axis) + +- backward_api : split_with_num_grad + forward : split_with_num (Tensor x, int num, Scalar axis) -> Tensor[](out) + args : (Tensor[] out_grad, Scalar axis = -1) + output : Tensor(x_grad) + invoke : concat( out_grad, axis) # TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future. - backward_api : sqrt_double_grad diff --git a/paddle/phi/common/int_array.cc b/paddle/phi/common/int_array.cc index 81701ee010c..4aadae48c15 100644 --- a/paddle/phi/common/int_array.cc +++ b/paddle/phi/common/int_array.cc @@ -37,7 +37,6 @@ template <> IntArrayBase::IntArrayBase( const std::vector& tensor_list) { is_from_tensor_ = true; - for (size_t i = 0; i < tensor_list.size(); ++i) { DataType data_type = tensor_list[i].dtype(); switch (data_type) { diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index b6e2d9eaf57..5583ba37a2e 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -3084,81 +3084,122 @@ void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out) { out->share_lod(x); } +int GetSplitAxisValue(const MetaTensor& x, + const Scalar& axis, + MetaConfig config) { + // Tensor has no value in static graph compile time + if (axis.FromTensor() && !config.is_runtime) { + return -1; + } else { + if (axis.dtype() == DataType::FLOAT32 || + axis.dtype() == DataType::FLOAT64) { + PADDLE_THROW( + phi::errors::InvalidArgument("%s(): argument (position 3) must be " + "int, but got %s", + "split", + "float")); // NOLINT + } + int axis_value = axis.to(); + int rank = x.dims().size(); + PADDLE_ENFORCE_EQ( + axis_value >= -rank && axis_value < rank, + true, + phi::errors::InvalidArgument( + "The axis is expected to be in range of [%d, %d), but got %d", + -rank, + rank, + axis_value)); + if (axis_value < 0) { + axis_value = axis_value + rank; + } + return axis_value; + } +} + +void FillSplitOutDims(const MetaTensor& x, + const int axis_value, + const std::vector& sections_vec, + std::vector* out) { + std::vector out_dims(sections_vec.size(), x.dims()); + if (x.dims().at(axis_value) > 0) { + for (size_t i = 0; i < sections_vec.size(); ++i) { + out_dims[i][axis_value] = sections_vec[i]; + } + } else { + for (size_t i = 0; i < sections_vec.size(); ++i) { + out_dims[i][axis_value] = -1; + } + } + for (size_t i = 0; i < sections_vec.size(); ++i) { + if (axis_value != 0) { + // Only pass LoD when not spliting along the first dim. + (*out)[i]->set_dtype(x.dtype()); + (*out)[i]->set_dims(out_dims[i]); + (*out)[i]->set_layout(x.layout()); + } else { + (*out)[i]->set_dtype(x.dtype()); + (*out)[i]->set_dims(out_dims[i]); + (*out)[i]->set_layout(x.layout()); + (*out)[i]->share_lod(x); + } + } +} + void SplitInferMeta(const MetaTensor& x, - const IntArray& num_or_sections, + const IntArray& sections, const Scalar& axis, std::vector out, MetaConfig config) { - if (axis.dtype() == DataType::FLOAT32 || axis.dtype() == DataType::FLOAT64) { - PADDLE_THROW( - phi::errors::InvalidArgument("%s(): argument (position 3) must be " - "int, but got %s", - "split", - "float")); // NOLINT - } - int axis_value = axis.to(); - int rank = x.dims().size(); - PADDLE_ENFORCE_EQ( - axis_value >= -rank && axis_value < rank, - true, - phi::errors::InvalidArgument( - "The axis is expected to be in range of [%d, %d), but got %d", - -rank, - rank, - axis_value)); - if (axis_value < 0) { - axis_value = axis_value + rank; - } - - auto input_axis_dim = x.dims().at(axis_value); - auto num_or_sections_data = num_or_sections.GetData(); - // step1: get formated sections - std::vector sections; - // num_or_sections is a number - if (num_or_sections_data.size() == 1 && num_or_sections_data[0] > 0) { - int num = num_or_sections_data.at(0); - - PADDLE_ENFORCE_EQ(input_axis_dim % num, - 0, - phi::errors::InvalidArgument( - "The input's size along the split dimension " - "must be evenly divisible by Attr(num_or_sections). " - "But received Attr(num_or_sections) " - "= %d, input(X)'s shape = [%s], Attr(dim) = %d.", - num, - x.dims(), - axis_value)); - - for (int i = 0; i < num; ++i) { - sections.push_back(input_axis_dim / num); + // get axis value + int axis_value = GetSplitAxisValue(x, axis, config); + + auto sections_data = sections.GetData(); + // fill out dims with -1 + if ((sections.FromTensor() && !config.is_runtime) || axis_value == -1 || + (axis_value >= 0 && x.dims().at(axis_value) <= 0)) { + std::vector out_dims( + sections_data.size(), + phi::make_ddim(std::vector(x.dims().size(), -1))); + + for (size_t i = 0; i < sections_data.size(); ++i) { + if (axis_value != 0) { + // Only pass LoD when not spliting along the first dim. + out[i]->set_dtype(x.dtype()); + out[i]->set_dims(out_dims[i]); + out[i]->set_layout(x.layout()); + } else { + out[i]->set_dtype(x.dtype()); + out[i]->set_dims(out_dims[i]); + out[i]->set_layout(x.layout()); + out[i]->share_lod(x); + } } } else { - // num_or_sections is a sections + auto input_axis_dim = x.dims().at(axis_value); + std::vector sections_vec; const int unknow_dim_val = -1; int unknow_dim_idx = -1; int num_of_unknow = 0; int sum_of_section = 0; - for (size_t i = 0; i < num_or_sections_data.size(); ++i) { - sections.push_back(num_or_sections_data[i]); + for (size_t i = 0; i < sections_data.size(); ++i) { + sections_vec.push_back(sections_data[i]); - if (num_or_sections_data[i] == unknow_dim_val) { + if (sections_data[i] == unknow_dim_val) { num_of_unknow++; unknow_dim_idx = i; } else { - sum_of_section += num_or_sections_data[i]; + sum_of_section += sections_data[i]; } } - if (config.is_runtime) { - PADDLE_ENFORCE_LE(num_of_unknow, - 1, - phi::errors::InvalidArgument( - "Only one dimension value of Attr(num_or_sections) " - "in SplitOp can be -1. " - "But received Attr(num_or_sections) = [%s].", - phi::make_ddim(num_or_sections_data))); - } + PADDLE_ENFORCE_LE(num_of_unknow, + 1, + phi::errors::InvalidArgument( + "Only one dimension value of Attr(num_or_sections) " + "in SplitOp can be -1. " + "But received Attr(num_or_sections) = [%s].", + phi::make_ddim(sections_data))); if (unknow_dim_idx != -1) { // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1]. @@ -3173,13 +3214,11 @@ void SplitInferMeta(const MetaTensor& x, "size " "along the split dimension. But received Attr(num_or_sections) " "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.", - phi::make_ddim(num_or_sections_data), + phi::make_ddim(sections_data), x.dims(), axis_value)); - if (config.is_runtime) { - sections[unknow_dim_idx] = input_axis_dim - sum_of_section; - } + sections_vec[unknow_dim_idx] = input_axis_dim - sum_of_section; } else { PADDLE_ENFORCE_EQ( sum_of_section, @@ -3189,36 +3228,59 @@ void SplitInferMeta(const MetaTensor& x, "size " "along the split dimension. But received Attr(num_or_sections)" " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.", - phi::make_ddim(num_or_sections_data), + phi::make_ddim(sections_data), x.dims(), axis_value)); } + // fill out dims + FillSplitOutDims(x, axis_value, sections_vec, &out); } +} + +void SplitWithNumInferMeta(const MetaTensor& x, + int num, + const Scalar& axis, + std::vector out, + MetaConfig config) { + int axis_value = GetSplitAxisValue(x, axis, config); + // fill out dims with -1 + if (axis_value == -1 || (axis_value >= 0 && x.dims().at(axis_value) <= 0)) { + std::vector out_dims( + num, phi::make_ddim(std::vector(x.dims().size(), -1))); - // setp2: fill out dims - std::vector out_dims(sections.size(), x.dims()); - if (config.is_runtime || input_axis_dim > 0) { - for (size_t i = 0; i < sections.size(); ++i) { - out_dims[i][axis_value] = sections[i]; + for (int i = 0; i < num; ++i) { + if (axis_value != 0) { + // Only pass LoD when not spliting along the first dim. + out[i]->set_dtype(x.dtype()); + out[i]->set_dims(out_dims[i]); + out[i]->set_layout(x.layout()); + } else { + out[i]->set_dtype(x.dtype()); + out[i]->set_dims(out_dims[i]); + out[i]->set_layout(x.layout()); + out[i]->share_lod(x); + } } } else { - for (size_t i = 0; i < sections.size(); ++i) { - out_dims[i][axis_value] = -1; - } - } + auto input_axis_dim = x.dims().at(axis_value); + // step1: get formated sections + std::vector sections_vec; + PADDLE_ENFORCE_EQ(input_axis_dim % num, + 0, + phi::errors::InvalidArgument( + "The input's size along the split dimension " + "must be evenly divisible by Attr(num_or_sections). " + "But received Attr(num_or_sections) " + "= %d, input(X)'s shape = [%s], Attr(dim) = %d.", + num, + x.dims(), + axis_value)); - for (size_t i = 0; i < sections.size(); ++i) { - if (axis_value != 0) { - // Only pass LoD when not spliting along the first dim. - out[i]->set_dtype(x.dtype()); - out[i]->set_dims(out_dims[i]); - out[i]->set_layout(x.layout()); - } else { - out[i]->set_dtype(x.dtype()); - out[i]->set_dims(out_dims[i]); - out[i]->set_layout(x.layout()); - out[i]->share_lod(x); + for (int i = 0; i < num; ++i) { + sections_vec.push_back(input_axis_dim / num); } + // setp2: fill out dims + FillSplitOutDims(x, axis_value, sections_vec, &out); } } @@ -4623,4 +4685,3 @@ void FoldInferMeta(const MetaTensor& x, } // namespace phi PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta); -PD_REGISTER_INFER_META_FN(split, phi::SplitInferMeta); diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 1e7a65e9be8..031411f925b 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -452,12 +452,27 @@ void SliceRawInferMeta(const MetaTensor& input, void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out); +int GetSplitAxisValue(const MetaTensor& x, + const Scalar& axis, + MetaConfig config); + +void FillSplitOutDims(const MetaTensor& x, + const int axis_value, + const std::vector& sections_vec, + std::vector* out); + void SplitInferMeta(const MetaTensor& x_meta, - const IntArray& num_or_sections, + const IntArray& sections, const Scalar& axis, std::vector out, MetaConfig config = MetaConfig()); +void SplitWithNumInferMeta(const MetaTensor& x_meta, + int num, + const Scalar& axis, + std::vector out, + MetaConfig config = MetaConfig()); + void SquaredL2NormInferMeta(const MetaTensor& x, MetaTensor* out); void SqueezeInferMeta(const MetaTensor& x, diff --git a/paddle/phi/kernels/cpu/split_kernel.cc b/paddle/phi/kernels/cpu/split_kernel.cc index 6034949cd81..f277e0c39f3 100644 --- a/paddle/phi/kernels/cpu/split_kernel.cc +++ b/paddle/phi/kernels/cpu/split_kernel.cc @@ -14,54 +14,9 @@ #include "paddle/phi/kernels/split_kernel.h" -#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/infermeta/unary.h" -#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" -namespace phi { - -template -void SplitKernel(const Context& dev_ctx, - const DenseTensor& x, - const IntArray& num_or_sections, - const Scalar& axis_scalar, - std::vector outs) { - // need to infershape output - if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) { - std::vector out_metas; - out_metas.reserve(outs.size()); - std::vector out_metas_ptr; - for (size_t i = 0; i < outs.size(); ++i) { - out_metas.push_back(outs[i]); - out_metas_ptr.push_back(&out_metas.back()); - } - - phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr); - - for (size_t i = 0; i < out_metas.size(); ++i) { - outs[i]->Resize(out_metas[i].dims()); - } - } - - std::vector shape_refer; - for (size_t j = 0; j < outs.size(); ++j) { - dev_ctx.template Alloc(outs[j]); - shape_refer.emplace_back(outs[j]); - } - - int axis = axis_scalar.to(); - // Sometimes direct copies will be faster, this maybe need deeply analysis. - if (axis == 0 && outs.size() < 10) { - paddle::operators::StridedMemcpyWithAxis0( - dev_ctx, x, shape_refer, &outs); - } else { - phi::funcs::SplitFunctor functor; - functor(dev_ctx, x, shape_refer, axis, &outs); - } -} - -} // namespace phi +#include "paddle/phi/kernels/impl/split_kernel_impl.h" PD_REGISTER_KERNEL(split, CPU, @@ -76,3 +31,17 @@ PD_REGISTER_KERNEL(split, int8_t, phi::dtype::float16, phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(split_with_num, + CPU, + ALL_LAYOUT, + phi::SplitWithNumKernel, + float, + double, + int64_t, + int, + bool, + uint8_t, + int8_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/split_kernel.cu b/paddle/phi/kernels/gpu/split_kernel.cu index 1e855905ae0..13373462136 100644 --- a/paddle/phi/kernels/gpu/split_kernel.cu +++ b/paddle/phi/kernels/gpu/split_kernel.cu @@ -14,53 +14,9 @@ #include "paddle/phi/kernels/split_kernel.h" -#include "paddle/fluid/operators/strided_memcpy.h" #include "paddle/phi/common/float16.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" -namespace phi { - -template -void SplitKernel(const Context& dev_ctx, - const DenseTensor& x, - const IntArray& num_or_sections, - const Scalar& axis_scalar, - std::vector outs) { - // need to infershape output - if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) { - std::vector out_metas; - out_metas.reserve(outs.size()); - std::vector out_metas_ptr; - for (size_t i = 0; i < outs.size(); ++i) { - out_metas.push_back(outs[i]); - out_metas_ptr.push_back(&out_metas.back()); - } - - phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr); - - for (size_t i = 0; i < out_metas.size(); ++i) { - outs[i]->Resize(out_metas[i].dims()); - } - } - - std::vector shape_refer; - for (size_t j = 0; j < outs.size(); ++j) { - dev_ctx.template Alloc(outs[j]); - shape_refer.emplace_back(outs[j]); - } - - int axis = axis_scalar.to(); - // Sometimes direct copies will be faster, this maybe need deeply analysis. - if (axis == 0 && outs.size() < 10) { - paddle::operators::StridedMemcpyWithAxis0( - dev_ctx, x, shape_refer, &outs); - } else { - phi::funcs::SplitFunctor functor; - functor(dev_ctx, x, shape_refer, axis, &outs); - } -} - -} // namespace phi +#include "paddle/phi/kernels/impl/split_kernel_impl.h" PD_REGISTER_KERNEL(split, GPU, @@ -75,3 +31,17 @@ PD_REGISTER_KERNEL(split, int8_t, phi::dtype::float16, phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(split_with_num, + GPU, + ALL_LAYOUT, + phi::SplitWithNumKernel, + float, + double, + int64_t, + int, + bool, + uint8_t, + int8_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/impl/split_kernel_impl.h b/paddle/phi/kernels/impl/split_kernel_impl.h new file mode 100644 index 00000000000..6f43e8ea143 --- /dev/null +++ b/paddle/phi/kernels/impl/split_kernel_impl.h @@ -0,0 +1,64 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/phi/kernels/split_kernel.h" + +#include "paddle/fluid/operators/strided_memcpy.h" +#include "paddle/phi/common/int_array.h" +#include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" + +namespace phi { +template +void SplitKernel(const Context& dev_ctx, + const DenseTensor& x, + const IntArray& sections, + const Scalar& axis_scalar, + std::vector outs) { + std::vector shape_refer; + for (size_t j = 0; j < outs.size(); ++j) { + dev_ctx.template Alloc(outs[j]); + shape_refer.emplace_back(outs[j]); + } + + int axis = axis_scalar.to(); + // Sometimes direct copies will be faster, this maybe need deeply analysis. + if (axis == 0 && outs.size() < 10) { + paddle::operators::StridedMemcpyWithAxis0( + dev_ctx, x, shape_refer, &outs); + } else { + phi::funcs::SplitFunctor functor; + functor(dev_ctx, x, shape_refer, axis, &outs); + } +} + +template +void SplitWithNumKernel(const Context& dev_ctx, + const DenseTensor& x, + int num, + const Scalar& axis_scalar, + std::vector outs) { + int axis_value = axis_scalar.to(); + auto input_axis_dim = x.dims().at(axis_value); + std::vector sections_vec; + for (int i = 0; i < num; ++i) { + sections_vec.push_back(input_axis_dim / num); + } + IntArray sections(sections_vec); + SplitKernel(dev_ctx, x, sections, axis_scalar, outs); +} + +} // namespace phi diff --git a/paddle/phi/kernels/split_kernel.h b/paddle/phi/kernels/split_kernel.h index 1a426472c02..32d6cb4b64d 100644 --- a/paddle/phi/kernels/split_kernel.h +++ b/paddle/phi/kernels/split_kernel.h @@ -18,42 +18,70 @@ #include "paddle/phi/common/scalar.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/infermeta/unary.h" -#include "paddle/phi/kernels/empty_kernel.h" namespace phi { template void SplitKernel(const Context& dev_ctx, const DenseTensor& x, - const IntArray& num_or_sections, + const IntArray& sections, const Scalar& axis, std::vector out); +template +void SplitWithNumKernel(const Context& dev_ctx, + const DenseTensor& x, + int num, + const Scalar& axis, + std::vector out); + template std::vector Split(const Context& dev_ctx, const DenseTensor& x, - const IntArray& num_or_sections, + const IntArray& sections, const Scalar& axis) { size_t out_number; - if (num_or_sections.GetData().size() == 1) { - out_number = num_or_sections.GetData()[0]; - } else { - out_number = num_or_sections.GetData().size(); + out_number = sections.GetData().size(); + + std::vector out_meta; + std::vector out_meta_ptr; + out_meta.reserve(out_number); + out_meta_ptr.reserve(out_number); + std::vector result(out_number); + + for (size_t i = 0; i < out_number; ++i) { + out_meta.emplace_back(&result[i]); + out_meta_ptr.push_back(&out_meta.back()); } + SplitInferMeta(x, sections, axis, out_meta_ptr); + std::vector outs; + outs.reserve(out_meta.size()); + for (size_t i = 0; i < out_meta.size(); ++i) { + outs.push_back(&result[i]); + } + + SplitKernel(dev_ctx, x, sections, axis, outs); + return result; +} + +template +std::vector SplitWithNum(const Context& dev_ctx, + const DenseTensor& x, + int num, + const Scalar& axis) { + size_t out_number = num; std::vector out_meta; std::vector out_meta_ptr; out_meta.reserve(out_number); out_meta_ptr.reserve(out_number); - std::vector result; - result.reserve(out_number); + std::vector result(out_number); for (size_t i = 0; i < out_number; ++i) { - result.emplace_back(DenseTensor()); - out_meta.emplace_back(&result.back()); + out_meta.emplace_back(&result[i]); out_meta_ptr.push_back(&out_meta.back()); } - SplitInferMeta(x, num_or_sections, axis, out_meta_ptr); + SplitWithNumInferMeta(x, num, axis, out_meta_ptr); std::vector outs; outs.reserve(out_meta.size()); @@ -61,7 +89,7 @@ std::vector Split(const Context& dev_ctx, outs.push_back(&result[i]); } - SplitKernel(dev_ctx, x, num_or_sections, axis, outs); + SplitWithNumKernel(dev_ctx, x, num, axis, outs); return result; } diff --git a/paddle/phi/ops/compat/split_sig.cc b/paddle/phi/ops/compat/split_sig.cc index b3a614aab00..3dcbd84d09e 100644 --- a/paddle/phi/ops/compat/split_sig.cc +++ b/paddle/phi/ops/compat/split_sig.cc @@ -21,9 +21,10 @@ KernelSignature SplitOpArgumentMapping(const ArgumentMappingContext& ctx) { // priority: AxisTensor > axis if (paddle::any_cast(ctx.Attr("num")) > 0) { if (ctx.HasInput("AxisTensor")) { - return KernelSignature("split", {"X"}, {"num", "AxisTensor"}, {"Out"}); + return KernelSignature( + "split_with_num", {"X"}, {"num", "AxisTensor"}, {"Out"}); } else { - return KernelSignature("split", {"X"}, {"num", "axis"}, {"Out"}); + return KernelSignature("split_with_num", {"X"}, {"num", "axis"}, {"Out"}); } } diff --git a/paddle/phi/tests/kernels/test_split_dev_api.cc b/paddle/phi/tests/kernels/test_split_dev_api.cc index 0389ab7afba..f0e16e214d2 100644 --- a/paddle/phi/tests/kernels/test_split_dev_api.cc +++ b/paddle/phi/tests/kernels/test_split_dev_api.cc @@ -22,6 +22,7 @@ limitations under the License. */ #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/kernels/split_kernel.h" + namespace phi { namespace tests { @@ -40,14 +41,12 @@ TEST(DEV_API, split) { dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() .GetAllocator(paddle::platform::CPUPlace()) .get()); - auto* dense_x_data = dev_ctx.Alloc(&dense_x); for (size_t i = 0; i < 4; ++i) { for (size_t j = 0; j < 10; ++j) { dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; } } - // 2. test API auto out = phi::Split(dev_ctx, dense_x, {2, 2}, 0); @@ -76,5 +75,50 @@ TEST(DEV_API, split) { } } +TEST(DEV_API, split_with_num) { + // 1. create tensor + const auto alloc = + std::make_unique(phi::CPUPlace()); + phi::DenseTensor dense_x(alloc.get(), + phi::DenseTensorMeta(phi::DataType::FLOAT32, + phi::make_ddim({4, 10}), + phi::DataLayout::NCHW)); + phi::CPUContext dev_ctx; + dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() + .GetAllocator(paddle::platform::CPUPlace()) + .get()); + auto* dense_x_data = dev_ctx.Alloc(&dense_x); + for (size_t i = 0; i < 4; ++i) { + for (size_t j = 0; j < 10; ++j) { + dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; + } + } + // 2. test API + auto out = phi::SplitWithNum(dev_ctx, dense_x, 2, 0); + // 3. check result + ASSERT_EQ(out.size(), static_cast(2)); + ASSERT_EQ(out[0].dims().size(), 2); + ASSERT_EQ(out[0].dims()[0], 2); + ASSERT_EQ(out[0].dims()[1], 10); + ASSERT_EQ(out[0].meta().dtype, phi::DataType::FLOAT32); + ASSERT_EQ(out[0].meta().layout, phi::DataLayout::NCHW); + + ASSERT_EQ(out[1].dims().size(), 2); + ASSERT_EQ(out[1].dims()[0], 2); + ASSERT_EQ(out[1].dims()[1], 10); + ASSERT_EQ(out[1].meta().dtype, phi::DataType::FLOAT32); + ASSERT_EQ(out[1].meta().layout, phi::DataLayout::NCHW); + + auto out_data_0 = out[0].data(); + auto out_data_1 = out[1].data(); + for (size_t i = 0; i < 4; ++i) { + if (i < 20) { + ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6); + } else { + ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6); + } + } +} + } // namespace tests } // namespace phi diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index 939767a7a5e..8474ff1379a 100755 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -5180,7 +5180,10 @@ def split(input, num_or_sections, dim=-1, name=None): "The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but " "received %s." % (type(num_or_sections))) if in_dygraph_mode(): - return _C_ops.split(input, [num], dim) + if isinstance(num_or_sections, int): + return _C_ops.split_with_num(input, num_or_sections, dim) + else: + return _C_ops.split(input, num_or_sections, dim) elif _in_legacy_dygraph(): out = [_varbase_creator() for n in range(num)] _legacy_C_ops.split(input, out, *attrs) diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index b9e92913c11..c31169feedb 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -421,6 +421,95 @@ class API_TestSplit4(unittest.TestCase): np.testing.assert_allclose(ex_x1, r1, rtol=1e-05) +class API_TestSplit5(unittest.TestCase): + + def test_out(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + with fluid.program_guard(fluid.Program(), fluid.Program()): + input_1 = np.random.random([5, 4]).astype("int32") + # input is a variable which shape is [5, 4] + input = paddle.to_tensor(input_1) + n = paddle.full([1], 5, dtype='int32') + out = paddle.split(input, [n]) + exe = paddle.static.Executor(place=place) + re = exe.run(fetch_list=[out]) + re = re[0] + ex_out = np.split(input_1, [5]) + ex_out = ex_out[0] + np.testing.assert_allclose(ex_out, re, rtol=1e-05) + + +class API_TestDygraphFluidSplit(unittest.TestCase): + + def test_out1(self): + with fluid.dygraph.guard(): + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = paddle.to_tensor(input_1) + x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1) + with _test_eager_guard(): + # input is a variable which shape is [4, 6, 6] + input = paddle.to_tensor(input_1) + input.stop_gradient = False + x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1) + eager_x0_out = x0.numpy() + eager_x1_out = x1.numpy() + eager_x2_out = x2.numpy() + loss = x0.sum() + loss.backward() + manul_grad = np.zeros_like(input_1) + manul_grad[:, :2, :] = 1 + np.testing.assert_allclose(input.gradient(), + manul_grad, + rtol=1e-05) + np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05) + np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05) + np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05) + + np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05) + np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05) + np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05) + + def test_out2(self): + with fluid.dygraph.guard(): + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = paddle.to_tensor(input_1) + x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1) + x0_out = x0.numpy() + x1_out = x1.numpy() + x2_out = x2.numpy() + ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1) + with _test_eager_guard(): + # input is a variable which shape is [4, 6, 6] + input = paddle.to_tensor(input_1) + input.stop_gradient = False + x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1) + eager_x0_out = x0.numpy() + eager_x1_out = x1.numpy() + eager_x2_out = x2.numpy() + loss = x0.sum() + loss.backward() + manul_grad = np.zeros_like(input_1) + manul_grad[:, :2, :] = 1 + np.testing.assert_allclose(input.gradient(), + manul_grad, + rtol=1e-05) + np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05) + np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05) + np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05) + + np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05) + np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05) + np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05) + + class API_TestDygraphSplit(unittest.TestCase): def test_out1(self): @@ -471,6 +560,25 @@ class API_TestDygraphSplit(unittest.TestCase): np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05) np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05) + def test_out3(self): + with fluid.dygraph.guard(): + np.random.seed(2021) + input_1 = np.random.random([4, 6, 6]).astype("int32") + # input is a variable which shape is [4, 6, 6] + input = paddle.to_tensor(input_1) + out_dy = paddle.split(input, [6], axis=1) + out_dy = out_dy[0] + out_dy_np = out_dy.numpy() + ex_out = np.split(input_1, [6], axis=1) + ex_out = ex_out[0] + with _test_eager_guard(): + input = paddle.to_tensor(input_1) + out_eager = paddle.split(input, [6], axis=1) + out_eager = out_eager[0] + out_eager_np = out_dy.numpy() + np.testing.assert_allclose(ex_out, out_eager_np, rtol=1e-05) + np.testing.assert_allclose(ex_out, out_dy_np, rtol=1e-05) + def test_out_tensor_input(self): with fluid.dygraph.guard(): input_1 = np.random.random([4, 6, 6]).astype("int32") diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index ef7a20911da..42e3bc9039f 100755 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -1839,8 +1839,10 @@ def split(x, num_or_sections, axis=0, name=None): "The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but " "received %s." % (type(num_or_sections))) if in_dygraph_mode(): - return _C_ops.split(input, [num_or_sections] if isinstance( - num_or_sections, int) else num_or_sections, dim) + if isinstance(num_or_sections, int): + return _C_ops.split_with_num(input, num_or_sections, dim) + else: + return _C_ops.split(input, num_or_sections, dim) elif _in_legacy_dygraph(): out = [_varbase_creator() for n in range(num)] _legacy_C_ops.split(input, out, *attrs) -- GitLab