未验证 提交 4a25b60d 编写于 作者: C Charles-hit 提交者: GitHub

Fix split api bug (#45396)

* fix split bug

* solve function redefine

* fix fluid.layers.split and add unit test

* delete splitInferMeta register in unary.cc

* modify test_split_op GPU unit test

* modify test_split_op GPU unit test place param

* refactor split op and fix infershape bugs

* add () in && and ||

* fix split C++ unit test

* fix split infershape
上级 df7600ab
...@@ -22,16 +22,16 @@ import os ...@@ -22,16 +22,16 @@ import os
### Global Variables ### ### Global Variables ###
######################## ########################
ops_to_fill_zero_for_empty_grads = set([ ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad", "split_grad", "split_with_num_grad", "rnn_grad", "matmul_double_grad",
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad", "matmul_triple_grad", "sigmoid_double_grad", "sigmoid_triple_grad",
"add_triple_grad", "multiply_grad", "multiply_double_grad", "add_double_grad", "add_triple_grad", "multiply_grad",
"multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad", "multiply_double_grad", "multiply_triple_grad", "conv2d_grad_grad",
"tanh_double_grad", "tanh_triple_grad", "subtract_double_grad", "batch_norm_double_grad", "tanh_double_grad", "tanh_triple_grad",
"divide_double_grad", "log_double_grad", "elu_double_grad", "subtract_double_grad", "divide_double_grad", "log_double_grad",
"leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad", "elu_double_grad", "leaky_relu_double_grad", "sqrt_double_grad",
"square_double_grad", "celu_double_grad", "pad_double_grad", "rsqrt_double_grad", "square_double_grad", "celu_double_grad",
"pad3d_double_grad", "squeeze_double_grad", "unsqueeze_double_grad", "pad_double_grad", "pad3d_double_grad", "squeeze_double_grad",
"instance_norm_double_grad", "conv3d_double_grad", "unsqueeze_double_grad", "instance_norm_double_grad", "conv3d_double_grad",
"depthwise_conv2d_grad_grad", "concat_double_grad", "expand_grad", "depthwise_conv2d_grad_grad", "concat_double_grad", "expand_grad",
"argsort_grad" "argsort_grad"
]) ])
......
...@@ -21,7 +21,9 @@ limitations under the License. */ ...@@ -21,7 +21,9 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using framework::LoDTensor;
using framework::Tensor; using framework::Tensor;
using framework::Variable;
class SplitOp : public framework::OperatorWithKernel { class SplitOp : public framework::OperatorWithKernel {
public: public:
...@@ -36,47 +38,72 @@ class SplitOp : public framework::OperatorWithKernel { ...@@ -36,47 +38,72 @@ class SplitOp : public framework::OperatorWithKernel {
1UL, 1UL,
platform::errors::InvalidArgument( platform::errors::InvalidArgument(
"Outputs(Out) of SplitOp should not be empty.")); "Outputs(Out) of SplitOp should not be empty."));
auto in_dims = ctx->GetInputDim("X"); int axis = static_cast<int>(ctx->Attrs().Get<int>("axis"));
auto outs_names = ctx->Outputs("Out"); int num = static_cast<int>(ctx->Attrs().Get<int>("num"));
size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis"));
size_t num = static_cast<size_t>(ctx->Attrs().Get<int>("num"));
std::vector<int> sections = static_cast<std::vector<int>>( std::vector<int> sections = static_cast<std::vector<int>>(
ctx->Attrs().Get<std::vector<int>>("sections")); ctx->Attrs().Get<std::vector<int>>("sections"));
const size_t outs_number = outs_names.size(); // Construct MetaTensor for InferMeta Func
using CompatMetaTensor = framework::CompatMetaTensor;
if (sections.size() > 0) { CompatMetaTensor x(ctx->GetInputVarPtrs("X")[0], ctx->IsRuntime());
PADDLE_ENFORCE_EQ( std::vector<CompatMetaTensor> out;
sections.size(), size_t out_size = ctx->GetOutputVarPtrs("Out").size();
outs_number, out.reserve(out_size);
platform::errors::InvalidArgument("tensor split sections size " for (size_t i = 0; i < out_size; i++) {
"should be equal to output size.")); out.emplace_back(
CompatMetaTensor(ctx->GetOutputVarPtrs("Out")[i], ctx->IsRuntime()));
}
std::vector<phi::MetaTensor *> out_ptr(out_size);
for (size_t i = 0; i < out_size; i++) {
out_ptr[i] = &out[i];
}
phi::Scalar axis_final;
phi::IntArray sections_final;
// Construct axis_final
if (ctx->IsRuntime() && ctx->HasInput("AxisTensor")) {
Variable *var =
PADDLE_GET_CONST(Variable *, ctx->GetInputVarPtrs("AxisTensor")[0]);
axis_final = std::move(experimental::MakePhiScalarFromVar(*var));
} else if (!ctx->IsRuntime() && ctx->HasInput("AxisTensor")) {
axis_final = std::move(phi::Scalar(-1));
axis_final.SetFromTensor(true);
} else {
axis_final = std::move(phi::Scalar(axis));
} }
if (ctx->HasInput("AxisTensor")) { // Construct sections_final
auto out_dims = phi::make_ddim(std::vector<int>(in_dims.size(), -1)); if (ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
std::vector<framework::DDim> outs_dims(outs_number, out_dims); int sections_tensor_list_size =
ctx->SetOutputsDim("Out", outs_dims); ctx->GetInputVarPtrs("SectionsTensorList").size();
for (size_t i = 0; i < outs_number; ++i) { const paddle::small_vector<framework::InferShapeVarPtr,
ctx->ShareLoD("X", "Out", 0, i); phi::kInputSmallVectorSize>
&sections_varptr_list = ctx->GetInputVarPtrs("SectionsTensorList");
std::vector<LoDTensor> sections_from_tensor;
sections_from_tensor.reserve(sections_tensor_list_size);
for (const auto &section_varptr : sections_varptr_list) {
Variable *var = PADDLE_GET_CONST(Variable *, section_varptr);
sections_from_tensor.emplace_back(var->Get<LoDTensor>());
} }
return; sections_final = std::move(phi::IntArray(sections_from_tensor));
} else if (!ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
sections_final = std::move(phi::IntArray(std::vector<int>(
ctx->GetInputVarPtrs("SectionsTensorList").size(), -1)));
sections_final.SetFromTensor(true);
} else {
sections_final = std::move(phi::IntArray(sections));
} }
if (sections.size() > 0) {
bool each_section_is_known = if (ctx->IsRuntime()) {
(sections.size() > 0 && !ctx->HasInputs("SectionsTensorList")); phi::SplitInferMeta(
x, sections_final, axis_final, out_ptr, {true, false});
auto outs_dims = UpdateOutsDims(ctx->IsRuntime(), } else {
each_section_is_known, phi::SplitInferMeta(
in_dims, x, sections_final, axis_final, out_ptr, {false, false});
num, }
sections, } else {
axis, if (ctx->IsRuntime()) {
outs_number); phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {true, false});
ctx->SetOutputsDim("Out", outs_dims); } else {
if (axis != 0) { phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {false, false});
// Only pass LoD when not spliting along the first dim.
for (size_t i = 0; i < outs_number; ++i) {
ctx->ShareLoD("X", "Out", 0, i);
} }
} }
} }
......
...@@ -2501,11 +2501,23 @@ ...@@ -2501,11 +2501,23 @@
backward : spectral_norm_grad backward : spectral_norm_grad
- api : split - api : split
args : (Tensor x, IntArray num_or_sections, Scalar(int) axis) args : (Tensor x, IntArray sections, Scalar(int) axis)
output : Tensor[] output : Tensor[]{sections.size()}
invoke : split_impl(x, num_or_sections, axis) infer_meta :
func : SplitInferMeta
kernel :
func : split
backward : split_grad backward : split_grad
- api : split_with_num
args : (Tensor x, int num, Scalar(int) axis)
output : Tensor[]{num}
infer_meta :
func : SplitWithNumInferMeta
kernel :
func : split_with_num
backward : split_with_num_grad
- api : sqrt - api : sqrt
args : (Tensor x) args : (Tensor x)
output : Tensor(out) output : Tensor(out)
......
...@@ -2271,6 +2271,12 @@ ...@@ -2271,6 +2271,12 @@
args : (Tensor[] out_grad, Scalar axis = -1) args : (Tensor[] out_grad, Scalar axis = -1)
output : Tensor(x_grad) output : Tensor(x_grad)
invoke : concat( out_grad, axis) invoke : concat( out_grad, axis)
- backward_api : split_with_num_grad
forward : split_with_num (Tensor x, int num, Scalar axis) -> Tensor[](out)
args : (Tensor[] out_grad, Scalar axis = -1)
output : Tensor(x_grad)
invoke : concat( out_grad, axis)
# TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future. # TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future.
- backward_api : sqrt_double_grad - backward_api : sqrt_double_grad
......
...@@ -37,7 +37,6 @@ template <> ...@@ -37,7 +37,6 @@ template <>
IntArrayBase<phi::DenseTensor>::IntArrayBase( IntArrayBase<phi::DenseTensor>::IntArrayBase(
const std::vector<phi::DenseTensor>& tensor_list) { const std::vector<phi::DenseTensor>& tensor_list) {
is_from_tensor_ = true; is_from_tensor_ = true;
for (size_t i = 0; i < tensor_list.size(); ++i) { for (size_t i = 0; i < tensor_list.size(); ++i) {
DataType data_type = tensor_list[i].dtype(); DataType data_type = tensor_list[i].dtype();
switch (data_type) { switch (data_type) {
......
...@@ -3084,81 +3084,122 @@ void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out) { ...@@ -3084,81 +3084,122 @@ void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out) {
out->share_lod(x); out->share_lod(x);
} }
int GetSplitAxisValue(const MetaTensor& x,
const Scalar& axis,
MetaConfig config) {
// Tensor has no value in static graph compile time
if (axis.FromTensor() && !config.is_runtime) {
return -1;
} else {
if (axis.dtype() == DataType::FLOAT32 ||
axis.dtype() == DataType::FLOAT64) {
PADDLE_THROW(
phi::errors::InvalidArgument("%s(): argument (position 3) must be "
"int, but got %s",
"split",
"float")); // NOLINT
}
int axis_value = axis.to<int>();
int rank = x.dims().size();
PADDLE_ENFORCE_EQ(
axis_value >= -rank && axis_value < rank,
true,
phi::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d",
-rank,
rank,
axis_value));
if (axis_value < 0) {
axis_value = axis_value + rank;
}
return axis_value;
}
}
void FillSplitOutDims(const MetaTensor& x,
const int axis_value,
const std::vector<int64_t>& sections_vec,
std::vector<MetaTensor*>* out) {
std::vector<phi::DDim> out_dims(sections_vec.size(), x.dims());
if (x.dims().at(axis_value) > 0) {
for (size_t i = 0; i < sections_vec.size(); ++i) {
out_dims[i][axis_value] = sections_vec[i];
}
} else {
for (size_t i = 0; i < sections_vec.size(); ++i) {
out_dims[i][axis_value] = -1;
}
}
for (size_t i = 0; i < sections_vec.size(); ++i) {
if (axis_value != 0) {
// Only pass LoD when not spliting along the first dim.
(*out)[i]->set_dtype(x.dtype());
(*out)[i]->set_dims(out_dims[i]);
(*out)[i]->set_layout(x.layout());
} else {
(*out)[i]->set_dtype(x.dtype());
(*out)[i]->set_dims(out_dims[i]);
(*out)[i]->set_layout(x.layout());
(*out)[i]->share_lod(x);
}
}
}
void SplitInferMeta(const MetaTensor& x, void SplitInferMeta(const MetaTensor& x,
const IntArray& num_or_sections, const IntArray& sections,
const Scalar& axis, const Scalar& axis,
std::vector<MetaTensor*> out, std::vector<MetaTensor*> out,
MetaConfig config) { MetaConfig config) {
if (axis.dtype() == DataType::FLOAT32 || axis.dtype() == DataType::FLOAT64) { // get axis value
PADDLE_THROW( int axis_value = GetSplitAxisValue(x, axis, config);
phi::errors::InvalidArgument("%s(): argument (position 3) must be "
"int, but got %s", auto sections_data = sections.GetData();
"split", // fill out dims with -1
"float")); // NOLINT if ((sections.FromTensor() && !config.is_runtime) || axis_value == -1 ||
} (axis_value >= 0 && x.dims().at(axis_value) <= 0)) {
int axis_value = axis.to<int>(); std::vector<phi::DDim> out_dims(
int rank = x.dims().size(); sections_data.size(),
PADDLE_ENFORCE_EQ( phi::make_ddim(std::vector<int>(x.dims().size(), -1)));
axis_value >= -rank && axis_value < rank,
true, for (size_t i = 0; i < sections_data.size(); ++i) {
phi::errors::InvalidArgument( if (axis_value != 0) {
"The axis is expected to be in range of [%d, %d), but got %d", // Only pass LoD when not spliting along the first dim.
-rank, out[i]->set_dtype(x.dtype());
rank, out[i]->set_dims(out_dims[i]);
axis_value)); out[i]->set_layout(x.layout());
if (axis_value < 0) { } else {
axis_value = axis_value + rank; out[i]->set_dtype(x.dtype());
} out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
auto input_axis_dim = x.dims().at(axis_value); out[i]->share_lod(x);
auto num_or_sections_data = num_or_sections.GetData(); }
// step1: get formated sections
std::vector<int64_t> sections;
// num_or_sections is a number
if (num_or_sections_data.size() == 1 && num_or_sections_data[0] > 0) {
int num = num_or_sections_data.at(0);
PADDLE_ENFORCE_EQ(input_axis_dim % num,
0,
phi::errors::InvalidArgument(
"The input's size along the split dimension "
"must be evenly divisible by Attr(num_or_sections). "
"But received Attr(num_or_sections) "
"= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
num,
x.dims(),
axis_value));
for (int i = 0; i < num; ++i) {
sections.push_back(input_axis_dim / num);
} }
} else { } else {
// num_or_sections is a sections auto input_axis_dim = x.dims().at(axis_value);
std::vector<int64_t> sections_vec;
const int unknow_dim_val = -1; const int unknow_dim_val = -1;
int unknow_dim_idx = -1; int unknow_dim_idx = -1;
int num_of_unknow = 0; int num_of_unknow = 0;
int sum_of_section = 0; int sum_of_section = 0;
for (size_t i = 0; i < num_or_sections_data.size(); ++i) { for (size_t i = 0; i < sections_data.size(); ++i) {
sections.push_back(num_or_sections_data[i]); sections_vec.push_back(sections_data[i]);
if (num_or_sections_data[i] == unknow_dim_val) { if (sections_data[i] == unknow_dim_val) {
num_of_unknow++; num_of_unknow++;
unknow_dim_idx = i; unknow_dim_idx = i;
} else { } else {
sum_of_section += num_or_sections_data[i]; sum_of_section += sections_data[i];
} }
} }
if (config.is_runtime) { PADDLE_ENFORCE_LE(num_of_unknow,
PADDLE_ENFORCE_LE(num_of_unknow, 1,
1, phi::errors::InvalidArgument(
phi::errors::InvalidArgument( "Only one dimension value of Attr(num_or_sections) "
"Only one dimension value of Attr(num_or_sections) " "in SplitOp can be -1. "
"in SplitOp can be -1. " "But received Attr(num_or_sections) = [%s].",
"But received Attr(num_or_sections) = [%s].", phi::make_ddim(sections_data)));
phi::make_ddim(num_or_sections_data)));
}
if (unknow_dim_idx != -1) { if (unknow_dim_idx != -1) {
// for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1]. // for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1].
...@@ -3173,13 +3214,11 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -3173,13 +3214,11 @@ void SplitInferMeta(const MetaTensor& x,
"size " "size "
"along the split dimension. But received Attr(num_or_sections) " "along the split dimension. But received Attr(num_or_sections) "
"= [%s], input(X)'s shape = [%s], Attr(dim) = %d.", "= [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
phi::make_ddim(num_or_sections_data), phi::make_ddim(sections_data),
x.dims(), x.dims(),
axis_value)); axis_value));
if (config.is_runtime) { sections_vec[unknow_dim_idx] = input_axis_dim - sum_of_section;
sections[unknow_dim_idx] = input_axis_dim - sum_of_section;
}
} else { } else {
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
sum_of_section, sum_of_section,
...@@ -3189,36 +3228,59 @@ void SplitInferMeta(const MetaTensor& x, ...@@ -3189,36 +3228,59 @@ void SplitInferMeta(const MetaTensor& x,
"size " "size "
"along the split dimension. But received Attr(num_or_sections)" "along the split dimension. But received Attr(num_or_sections)"
" = [%s], input(X)'s shape = [%s], Attr(dim) = %d.", " = [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
phi::make_ddim(num_or_sections_data), phi::make_ddim(sections_data),
x.dims(), x.dims(),
axis_value)); axis_value));
} }
// fill out dims
FillSplitOutDims(x, axis_value, sections_vec, &out);
} }
}
void SplitWithNumInferMeta(const MetaTensor& x,
int num,
const Scalar& axis,
std::vector<MetaTensor*> out,
MetaConfig config) {
int axis_value = GetSplitAxisValue(x, axis, config);
// fill out dims with -1
if (axis_value == -1 || (axis_value >= 0 && x.dims().at(axis_value) <= 0)) {
std::vector<phi::DDim> out_dims(
num, phi::make_ddim(std::vector<int>(x.dims().size(), -1)));
// setp2: fill out dims for (int i = 0; i < num; ++i) {
std::vector<phi::DDim> out_dims(sections.size(), x.dims()); if (axis_value != 0) {
if (config.is_runtime || input_axis_dim > 0) { // Only pass LoD when not spliting along the first dim.
for (size_t i = 0; i < sections.size(); ++i) { out[i]->set_dtype(x.dtype());
out_dims[i][axis_value] = sections[i]; out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
}
} }
} else { } else {
for (size_t i = 0; i < sections.size(); ++i) { auto input_axis_dim = x.dims().at(axis_value);
out_dims[i][axis_value] = -1; // step1: get formated sections
} std::vector<int64_t> sections_vec;
} PADDLE_ENFORCE_EQ(input_axis_dim % num,
0,
phi::errors::InvalidArgument(
"The input's size along the split dimension "
"must be evenly divisible by Attr(num_or_sections). "
"But received Attr(num_or_sections) "
"= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
num,
x.dims(),
axis_value));
for (size_t i = 0; i < sections.size(); ++i) { for (int i = 0; i < num; ++i) {
if (axis_value != 0) { sections_vec.push_back(input_axis_dim / num);
// Only pass LoD when not spliting along the first dim.
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
} }
// setp2: fill out dims
FillSplitOutDims(x, axis_value, sections_vec, &out);
} }
} }
...@@ -4623,4 +4685,3 @@ void FoldInferMeta(const MetaTensor& x, ...@@ -4623,4 +4685,3 @@ void FoldInferMeta(const MetaTensor& x,
} // namespace phi } // namespace phi
PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta); PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta);
PD_REGISTER_INFER_META_FN(split, phi::SplitInferMeta);
...@@ -452,12 +452,27 @@ void SliceRawInferMeta(const MetaTensor& input, ...@@ -452,12 +452,27 @@ void SliceRawInferMeta(const MetaTensor& input,
void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out); void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out);
int GetSplitAxisValue(const MetaTensor& x,
const Scalar& axis,
MetaConfig config);
void FillSplitOutDims(const MetaTensor& x,
const int axis_value,
const std::vector<int64_t>& sections_vec,
std::vector<MetaTensor*>* out);
void SplitInferMeta(const MetaTensor& x_meta, void SplitInferMeta(const MetaTensor& x_meta,
const IntArray& num_or_sections, const IntArray& sections,
const Scalar& axis, const Scalar& axis,
std::vector<MetaTensor*> out, std::vector<MetaTensor*> out,
MetaConfig config = MetaConfig()); MetaConfig config = MetaConfig());
void SplitWithNumInferMeta(const MetaTensor& x_meta,
int num,
const Scalar& axis,
std::vector<MetaTensor*> out,
MetaConfig config = MetaConfig());
void SquaredL2NormInferMeta(const MetaTensor& x, MetaTensor* out); void SquaredL2NormInferMeta(const MetaTensor& x, MetaTensor* out);
void SqueezeInferMeta(const MetaTensor& x, void SqueezeInferMeta(const MetaTensor& x,
......
...@@ -14,54 +14,9 @@ ...@@ -14,54 +14,9 @@
#include "paddle/phi/kernels/split_kernel.h" #include "paddle/phi/kernels/split_kernel.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/kernels/impl/split_kernel_impl.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& num_or_sections,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
// need to infershape output
if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) {
std::vector<MetaTensor> out_metas;
out_metas.reserve(outs.size());
std::vector<MetaTensor*> out_metas_ptr;
for (size_t i = 0; i < outs.size(); ++i) {
out_metas.push_back(outs[i]);
out_metas_ptr.push_back(&out_metas.back());
}
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
for (size_t i = 0; i < out_metas.size(); ++i) {
outs[i]->Resize(out_metas[i].dims());
}
}
std::vector<const DenseTensor*> shape_refer;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
shape_refer.emplace_back(outs[j]);
}
int axis = axis_scalar.to<int>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
paddle::operators::StridedMemcpyWithAxis0<T>(
dev_ctx, x, shape_refer, &outs);
} else {
phi::funcs::SplitFunctor<Context, T> functor;
functor(dev_ctx, x, shape_refer, axis, &outs);
}
}
} // namespace phi
PD_REGISTER_KERNEL(split, PD_REGISTER_KERNEL(split,
CPU, CPU,
...@@ -76,3 +31,17 @@ PD_REGISTER_KERNEL(split, ...@@ -76,3 +31,17 @@ PD_REGISTER_KERNEL(split,
int8_t, int8_t,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(split_with_num,
CPU,
ALL_LAYOUT,
phi::SplitWithNumKernel,
float,
double,
int64_t,
int,
bool,
uint8_t,
int8_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
...@@ -14,53 +14,9 @@ ...@@ -14,53 +14,9 @@
#include "paddle/phi/kernels/split_kernel.h" #include "paddle/phi/kernels/split_kernel.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/common/float16.h" #include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/phi/kernels/impl/split_kernel_impl.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& num_or_sections,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
// need to infershape output
if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) {
std::vector<MetaTensor> out_metas;
out_metas.reserve(outs.size());
std::vector<MetaTensor*> out_metas_ptr;
for (size_t i = 0; i < outs.size(); ++i) {
out_metas.push_back(outs[i]);
out_metas_ptr.push_back(&out_metas.back());
}
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
for (size_t i = 0; i < out_metas.size(); ++i) {
outs[i]->Resize(out_metas[i].dims());
}
}
std::vector<const DenseTensor*> shape_refer;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
shape_refer.emplace_back(outs[j]);
}
int axis = axis_scalar.to<int>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
paddle::operators::StridedMemcpyWithAxis0<T>(
dev_ctx, x, shape_refer, &outs);
} else {
phi::funcs::SplitFunctor<Context, T> functor;
functor(dev_ctx, x, shape_refer, axis, &outs);
}
}
} // namespace phi
PD_REGISTER_KERNEL(split, PD_REGISTER_KERNEL(split,
GPU, GPU,
...@@ -75,3 +31,17 @@ PD_REGISTER_KERNEL(split, ...@@ -75,3 +31,17 @@ PD_REGISTER_KERNEL(split,
int8_t, int8_t,
phi::dtype::float16, phi::dtype::float16,
phi::dtype::bfloat16) {} phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(split_with_num,
GPU,
ALL_LAYOUT,
phi::SplitWithNumKernel,
float,
double,
int64_t,
int,
bool,
uint8_t,
int8_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/split_kernel.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& sections,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
std::vector<const DenseTensor*> shape_refer;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
shape_refer.emplace_back(outs[j]);
}
int axis = axis_scalar.to<int>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
paddle::operators::StridedMemcpyWithAxis0<T>(
dev_ctx, x, shape_refer, &outs);
} else {
phi::funcs::SplitFunctor<Context, T> functor;
functor(dev_ctx, x, shape_refer, axis, &outs);
}
}
template <typename T, typename Context>
void SplitWithNumKernel(const Context& dev_ctx,
const DenseTensor& x,
int num,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
int axis_value = axis_scalar.to<int>();
auto input_axis_dim = x.dims().at(axis_value);
std::vector<int64_t> sections_vec;
for (int i = 0; i < num; ++i) {
sections_vec.push_back(input_axis_dim / num);
}
IntArray sections(sections_vec);
SplitKernel<T, Context>(dev_ctx, x, sections, axis_scalar, outs);
}
} // namespace phi
...@@ -18,42 +18,70 @@ ...@@ -18,42 +18,70 @@
#include "paddle/phi/common/scalar.h" #include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h" #include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/empty_kernel.h"
namespace phi { namespace phi {
template <typename T, typename Context> template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx, void SplitKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const IntArray& num_or_sections, const IntArray& sections,
const Scalar& axis, const Scalar& axis,
std::vector<DenseTensor*> out); std::vector<DenseTensor*> out);
template <typename T, typename Context>
void SplitWithNumKernel(const Context& dev_ctx,
const DenseTensor& x,
int num,
const Scalar& axis,
std::vector<DenseTensor*> out);
template <typename T, typename Context> template <typename T, typename Context>
std::vector<DenseTensor> Split(const Context& dev_ctx, std::vector<DenseTensor> Split(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const IntArray& num_or_sections, const IntArray& sections,
const Scalar& axis) { const Scalar& axis) {
size_t out_number; size_t out_number;
if (num_or_sections.GetData().size() == 1) { out_number = sections.GetData().size();
out_number = num_or_sections.GetData()[0];
} else { std::vector<MetaTensor> out_meta;
out_number = num_or_sections.GetData().size(); std::vector<MetaTensor*> out_meta_ptr;
out_meta.reserve(out_number);
out_meta_ptr.reserve(out_number);
std::vector<DenseTensor> result(out_number);
for (size_t i = 0; i < out_number; ++i) {
out_meta.emplace_back(&result[i]);
out_meta_ptr.push_back(&out_meta.back());
} }
SplitInferMeta(x, sections, axis, out_meta_ptr);
std::vector<DenseTensor*> outs;
outs.reserve(out_meta.size());
for (size_t i = 0; i < out_meta.size(); ++i) {
outs.push_back(&result[i]);
}
SplitKernel<T, Context>(dev_ctx, x, sections, axis, outs);
return result;
}
template <typename T, typename Context>
std::vector<DenseTensor> SplitWithNum(const Context& dev_ctx,
const DenseTensor& x,
int num,
const Scalar& axis) {
size_t out_number = num;
std::vector<MetaTensor> out_meta; std::vector<MetaTensor> out_meta;
std::vector<MetaTensor*> out_meta_ptr; std::vector<MetaTensor*> out_meta_ptr;
out_meta.reserve(out_number); out_meta.reserve(out_number);
out_meta_ptr.reserve(out_number); out_meta_ptr.reserve(out_number);
std::vector<DenseTensor> result; std::vector<DenseTensor> result(out_number);
result.reserve(out_number);
for (size_t i = 0; i < out_number; ++i) { for (size_t i = 0; i < out_number; ++i) {
result.emplace_back(DenseTensor()); out_meta.emplace_back(&result[i]);
out_meta.emplace_back(&result.back());
out_meta_ptr.push_back(&out_meta.back()); out_meta_ptr.push_back(&out_meta.back());
} }
SplitInferMeta(x, num_or_sections, axis, out_meta_ptr); SplitWithNumInferMeta(x, num, axis, out_meta_ptr);
std::vector<DenseTensor*> outs; std::vector<DenseTensor*> outs;
outs.reserve(out_meta.size()); outs.reserve(out_meta.size());
...@@ -61,7 +89,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx, ...@@ -61,7 +89,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx,
outs.push_back(&result[i]); outs.push_back(&result[i]);
} }
SplitKernel<T, Context>(dev_ctx, x, num_or_sections, axis, outs); SplitWithNumKernel<T, Context>(dev_ctx, x, num, axis, outs);
return result; return result;
} }
......
...@@ -21,9 +21,10 @@ KernelSignature SplitOpArgumentMapping(const ArgumentMappingContext& ctx) { ...@@ -21,9 +21,10 @@ KernelSignature SplitOpArgumentMapping(const ArgumentMappingContext& ctx) {
// priority: AxisTensor > axis // priority: AxisTensor > axis
if (paddle::any_cast<int>(ctx.Attr("num")) > 0) { if (paddle::any_cast<int>(ctx.Attr("num")) > 0) {
if (ctx.HasInput("AxisTensor")) { if (ctx.HasInput("AxisTensor")) {
return KernelSignature("split", {"X"}, {"num", "AxisTensor"}, {"Out"}); return KernelSignature(
"split_with_num", {"X"}, {"num", "AxisTensor"}, {"Out"});
} else { } else {
return KernelSignature("split", {"X"}, {"num", "axis"}, {"Out"}); return KernelSignature("split_with_num", {"X"}, {"num", "axis"}, {"Out"});
} }
} }
......
...@@ -22,6 +22,7 @@ limitations under the License. */ ...@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/split_kernel.h" #include "paddle/phi/kernels/split_kernel.h"
namespace phi { namespace phi {
namespace tests { namespace tests {
...@@ -40,14 +41,12 @@ TEST(DEV_API, split) { ...@@ -40,14 +41,12 @@ TEST(DEV_API, split) {
dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance() dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CPUPlace()) .GetAllocator(paddle::platform::CPUPlace())
.get()); .get());
auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x); auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
for (size_t i = 0; i < 4; ++i) { for (size_t i = 0; i < 4; ++i) {
for (size_t j = 0; j < 10; ++j) { for (size_t j = 0; j < 10; ++j) {
dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0; dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
} }
} }
// 2. test API // 2. test API
auto out = phi::Split<float>(dev_ctx, dense_x, {2, 2}, 0); auto out = phi::Split<float>(dev_ctx, dense_x, {2, 2}, 0);
...@@ -76,5 +75,50 @@ TEST(DEV_API, split) { ...@@ -76,5 +75,50 @@ TEST(DEV_API, split) {
} }
} }
TEST(DEV_API, split_with_num) {
// 1. create tensor
const auto alloc =
std::make_unique<paddle::experimental::DefaultAllocator>(phi::CPUPlace());
phi::DenseTensor dense_x(alloc.get(),
phi::DenseTensorMeta(phi::DataType::FLOAT32,
phi::make_ddim({4, 10}),
phi::DataLayout::NCHW));
phi::CPUContext dev_ctx;
dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CPUPlace())
.get());
auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
for (size_t i = 0; i < 4; ++i) {
for (size_t j = 0; j < 10; ++j) {
dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
}
}
// 2. test API
auto out = phi::SplitWithNum<float>(dev_ctx, dense_x, 2, 0);
// 3. check result
ASSERT_EQ(out.size(), static_cast<size_t>(2));
ASSERT_EQ(out[0].dims().size(), 2);
ASSERT_EQ(out[0].dims()[0], 2);
ASSERT_EQ(out[0].dims()[1], 10);
ASSERT_EQ(out[0].meta().dtype, phi::DataType::FLOAT32);
ASSERT_EQ(out[0].meta().layout, phi::DataLayout::NCHW);
ASSERT_EQ(out[1].dims().size(), 2);
ASSERT_EQ(out[1].dims()[0], 2);
ASSERT_EQ(out[1].dims()[1], 10);
ASSERT_EQ(out[1].meta().dtype, phi::DataType::FLOAT32);
ASSERT_EQ(out[1].meta().layout, phi::DataLayout::NCHW);
auto out_data_0 = out[0].data<float>();
auto out_data_1 = out[1].data<float>();
for (size_t i = 0; i < 4; ++i) {
if (i < 20) {
ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6);
} else {
ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6);
}
}
}
} // namespace tests } // namespace tests
} // namespace phi } // namespace phi
...@@ -5180,7 +5180,10 @@ def split(input, num_or_sections, dim=-1, name=None): ...@@ -5180,7 +5180,10 @@ def split(input, num_or_sections, dim=-1, name=None):
"The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but " "The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but "
"received %s." % (type(num_or_sections))) "received %s." % (type(num_or_sections)))
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.split(input, [num], dim) if isinstance(num_or_sections, int):
return _C_ops.split_with_num(input, num_or_sections, dim)
else:
return _C_ops.split(input, num_or_sections, dim)
elif _in_legacy_dygraph(): elif _in_legacy_dygraph():
out = [_varbase_creator() for n in range(num)] out = [_varbase_creator() for n in range(num)]
_legacy_C_ops.split(input, out, *attrs) _legacy_C_ops.split(input, out, *attrs)
......
...@@ -421,6 +421,95 @@ class API_TestSplit4(unittest.TestCase): ...@@ -421,6 +421,95 @@ class API_TestSplit4(unittest.TestCase):
np.testing.assert_allclose(ex_x1, r1, rtol=1e-05) np.testing.assert_allclose(ex_x1, r1, rtol=1e-05)
class API_TestSplit5(unittest.TestCase):
def test_out(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
with fluid.program_guard(fluid.Program(), fluid.Program()):
input_1 = np.random.random([5, 4]).astype("int32")
# input is a variable which shape is [5, 4]
input = paddle.to_tensor(input_1)
n = paddle.full([1], 5, dtype='int32')
out = paddle.split(input, [n])
exe = paddle.static.Executor(place=place)
re = exe.run(fetch_list=[out])
re = re[0]
ex_out = np.split(input_1, [5])
ex_out = ex_out[0]
np.testing.assert_allclose(ex_out, re, rtol=1e-05)
class API_TestDygraphFluidSplit(unittest.TestCase):
def test_out1(self):
with fluid.dygraph.guard():
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
x0_out = x0.numpy()
x1_out = x1.numpy()
x2_out = x2.numpy()
ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
with _test_eager_guard():
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
input.stop_gradient = False
x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
eager_x0_out = x0.numpy()
eager_x1_out = x1.numpy()
eager_x2_out = x2.numpy()
loss = x0.sum()
loss.backward()
manul_grad = np.zeros_like(input_1)
manul_grad[:, :2, :] = 1
np.testing.assert_allclose(input.gradient(),
manul_grad,
rtol=1e-05)
np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05)
np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
def test_out2(self):
with fluid.dygraph.guard():
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1)
x0_out = x0.numpy()
x1_out = x1.numpy()
x2_out = x2.numpy()
ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
with _test_eager_guard():
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
input.stop_gradient = False
x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1)
eager_x0_out = x0.numpy()
eager_x1_out = x1.numpy()
eager_x2_out = x2.numpy()
loss = x0.sum()
loss.backward()
manul_grad = np.zeros_like(input_1)
manul_grad[:, :2, :] = 1
np.testing.assert_allclose(input.gradient(),
manul_grad,
rtol=1e-05)
np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05)
np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
class API_TestDygraphSplit(unittest.TestCase): class API_TestDygraphSplit(unittest.TestCase):
def test_out1(self): def test_out1(self):
...@@ -471,6 +560,25 @@ class API_TestDygraphSplit(unittest.TestCase): ...@@ -471,6 +560,25 @@ class API_TestDygraphSplit(unittest.TestCase):
np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05) np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05) np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
def test_out3(self):
with fluid.dygraph.guard():
np.random.seed(2021)
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
out_dy = paddle.split(input, [6], axis=1)
out_dy = out_dy[0]
out_dy_np = out_dy.numpy()
ex_out = np.split(input_1, [6], axis=1)
ex_out = ex_out[0]
with _test_eager_guard():
input = paddle.to_tensor(input_1)
out_eager = paddle.split(input, [6], axis=1)
out_eager = out_eager[0]
out_eager_np = out_dy.numpy()
np.testing.assert_allclose(ex_out, out_eager_np, rtol=1e-05)
np.testing.assert_allclose(ex_out, out_dy_np, rtol=1e-05)
def test_out_tensor_input(self): def test_out_tensor_input(self):
with fluid.dygraph.guard(): with fluid.dygraph.guard():
input_1 = np.random.random([4, 6, 6]).astype("int32") input_1 = np.random.random([4, 6, 6]).astype("int32")
......
...@@ -1839,8 +1839,10 @@ def split(x, num_or_sections, axis=0, name=None): ...@@ -1839,8 +1839,10 @@ def split(x, num_or_sections, axis=0, name=None):
"The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but " "The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but "
"received %s." % (type(num_or_sections))) "received %s." % (type(num_or_sections)))
if in_dygraph_mode(): if in_dygraph_mode():
return _C_ops.split(input, [num_or_sections] if isinstance( if isinstance(num_or_sections, int):
num_or_sections, int) else num_or_sections, dim) return _C_ops.split_with_num(input, num_or_sections, dim)
else:
return _C_ops.split(input, num_or_sections, dim)
elif _in_legacy_dygraph(): elif _in_legacy_dygraph():
out = [_varbase_creator() for n in range(num)] out = [_varbase_creator() for n in range(num)]
_legacy_C_ops.split(input, out, *attrs) _legacy_C_ops.split(input, out, *attrs)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册