未验证 提交 4a25b60d 编写于 作者: C Charles-hit 提交者: GitHub

Fix split api bug (#45396)

* fix split bug

* solve function redefine

* fix fluid.layers.split and add unit test

* delete splitInferMeta register in unary.cc

* modify test_split_op GPU unit test

* modify test_split_op GPU unit test place param

* refactor split op and fix infershape bugs

* add () in && and ||

* fix split C++ unit test

* fix split infershape
上级 df7600ab
......@@ -22,16 +22,16 @@ import os
### Global Variables ###
########################
ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad",
"add_triple_grad", "multiply_grad", "multiply_double_grad",
"multiply_triple_grad", "conv2d_grad_grad", "batch_norm_double_grad",
"tanh_double_grad", "tanh_triple_grad", "subtract_double_grad",
"divide_double_grad", "log_double_grad", "elu_double_grad",
"leaky_relu_double_grad", "sqrt_double_grad", "rsqrt_double_grad",
"square_double_grad", "celu_double_grad", "pad_double_grad",
"pad3d_double_grad", "squeeze_double_grad", "unsqueeze_double_grad",
"instance_norm_double_grad", "conv3d_double_grad",
"split_grad", "split_with_num_grad", "rnn_grad", "matmul_double_grad",
"matmul_triple_grad", "sigmoid_double_grad", "sigmoid_triple_grad",
"add_double_grad", "add_triple_grad", "multiply_grad",
"multiply_double_grad", "multiply_triple_grad", "conv2d_grad_grad",
"batch_norm_double_grad", "tanh_double_grad", "tanh_triple_grad",
"subtract_double_grad", "divide_double_grad", "log_double_grad",
"elu_double_grad", "leaky_relu_double_grad", "sqrt_double_grad",
"rsqrt_double_grad", "square_double_grad", "celu_double_grad",
"pad_double_grad", "pad3d_double_grad", "squeeze_double_grad",
"unsqueeze_double_grad", "instance_norm_double_grad", "conv3d_double_grad",
"depthwise_conv2d_grad_grad", "concat_double_grad", "expand_grad",
"argsort_grad"
])
......
......@@ -21,7 +21,9 @@ limitations under the License. */
namespace paddle {
namespace operators {
using framework::LoDTensor;
using framework::Tensor;
using framework::Variable;
class SplitOp : public framework::OperatorWithKernel {
public:
......@@ -36,47 +38,72 @@ class SplitOp : public framework::OperatorWithKernel {
1UL,
platform::errors::InvalidArgument(
"Outputs(Out) of SplitOp should not be empty."));
auto in_dims = ctx->GetInputDim("X");
auto outs_names = ctx->Outputs("Out");
size_t axis = static_cast<size_t>(ctx->Attrs().Get<int>("axis"));
size_t num = static_cast<size_t>(ctx->Attrs().Get<int>("num"));
int axis = static_cast<int>(ctx->Attrs().Get<int>("axis"));
int num = static_cast<int>(ctx->Attrs().Get<int>("num"));
std::vector<int> sections = static_cast<std::vector<int>>(
ctx->Attrs().Get<std::vector<int>>("sections"));
const size_t outs_number = outs_names.size();
if (sections.size() > 0) {
PADDLE_ENFORCE_EQ(
sections.size(),
outs_number,
platform::errors::InvalidArgument("tensor split sections size "
"should be equal to output size."));
// Construct MetaTensor for InferMeta Func
using CompatMetaTensor = framework::CompatMetaTensor;
CompatMetaTensor x(ctx->GetInputVarPtrs("X")[0], ctx->IsRuntime());
std::vector<CompatMetaTensor> out;
size_t out_size = ctx->GetOutputVarPtrs("Out").size();
out.reserve(out_size);
for (size_t i = 0; i < out_size; i++) {
out.emplace_back(
CompatMetaTensor(ctx->GetOutputVarPtrs("Out")[i], ctx->IsRuntime()));
}
std::vector<phi::MetaTensor *> out_ptr(out_size);
for (size_t i = 0; i < out_size; i++) {
out_ptr[i] = &out[i];
}
phi::Scalar axis_final;
phi::IntArray sections_final;
// Construct axis_final
if (ctx->IsRuntime() && ctx->HasInput("AxisTensor")) {
Variable *var =
PADDLE_GET_CONST(Variable *, ctx->GetInputVarPtrs("AxisTensor")[0]);
axis_final = std::move(experimental::MakePhiScalarFromVar(*var));
} else if (!ctx->IsRuntime() && ctx->HasInput("AxisTensor")) {
axis_final = std::move(phi::Scalar(-1));
axis_final.SetFromTensor(true);
} else {
axis_final = std::move(phi::Scalar(axis));
}
if (ctx->HasInput("AxisTensor")) {
auto out_dims = phi::make_ddim(std::vector<int>(in_dims.size(), -1));
std::vector<framework::DDim> outs_dims(outs_number, out_dims);
ctx->SetOutputsDim("Out", outs_dims);
for (size_t i = 0; i < outs_number; ++i) {
ctx->ShareLoD("X", "Out", 0, i);
// Construct sections_final
if (ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
int sections_tensor_list_size =
ctx->GetInputVarPtrs("SectionsTensorList").size();
const paddle::small_vector<framework::InferShapeVarPtr,
phi::kInputSmallVectorSize>
&sections_varptr_list = ctx->GetInputVarPtrs("SectionsTensorList");
std::vector<LoDTensor> sections_from_tensor;
sections_from_tensor.reserve(sections_tensor_list_size);
for (const auto &section_varptr : sections_varptr_list) {
Variable *var = PADDLE_GET_CONST(Variable *, section_varptr);
sections_from_tensor.emplace_back(var->Get<LoDTensor>());
}
return;
sections_final = std::move(phi::IntArray(sections_from_tensor));
} else if (!ctx->IsRuntime() && ctx->HasInputs("SectionsTensorList")) {
sections_final = std::move(phi::IntArray(std::vector<int>(
ctx->GetInputVarPtrs("SectionsTensorList").size(), -1)));
sections_final.SetFromTensor(true);
} else {
sections_final = std::move(phi::IntArray(sections));
}
bool each_section_is_known =
(sections.size() > 0 && !ctx->HasInputs("SectionsTensorList"));
auto outs_dims = UpdateOutsDims(ctx->IsRuntime(),
each_section_is_known,
in_dims,
num,
sections,
axis,
outs_number);
ctx->SetOutputsDim("Out", outs_dims);
if (axis != 0) {
// Only pass LoD when not spliting along the first dim.
for (size_t i = 0; i < outs_number; ++i) {
ctx->ShareLoD("X", "Out", 0, i);
if (sections.size() > 0) {
if (ctx->IsRuntime()) {
phi::SplitInferMeta(
x, sections_final, axis_final, out_ptr, {true, false});
} else {
phi::SplitInferMeta(
x, sections_final, axis_final, out_ptr, {false, false});
}
} else {
if (ctx->IsRuntime()) {
phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {true, false});
} else {
phi::SplitWithNumInferMeta(x, num, axis_final, out_ptr, {false, false});
}
}
}
......
......@@ -2501,11 +2501,23 @@
backward : spectral_norm_grad
- api : split
args : (Tensor x, IntArray num_or_sections, Scalar(int) axis)
output : Tensor[]
invoke : split_impl(x, num_or_sections, axis)
args : (Tensor x, IntArray sections, Scalar(int) axis)
output : Tensor[]{sections.size()}
infer_meta :
func : SplitInferMeta
kernel :
func : split
backward : split_grad
- api : split_with_num
args : (Tensor x, int num, Scalar(int) axis)
output : Tensor[]{num}
infer_meta :
func : SplitWithNumInferMeta
kernel :
func : split_with_num
backward : split_with_num_grad
- api : sqrt
args : (Tensor x)
output : Tensor(out)
......
......@@ -2271,6 +2271,12 @@
args : (Tensor[] out_grad, Scalar axis = -1)
output : Tensor(x_grad)
invoke : concat( out_grad, axis)
- backward_api : split_with_num_grad
forward : split_with_num (Tensor x, int num, Scalar axis) -> Tensor[](out)
args : (Tensor[] out_grad, Scalar axis = -1)
output : Tensor(x_grad)
invoke : concat( out_grad, axis)
# TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future.
- backward_api : sqrt_double_grad
......
......@@ -37,7 +37,6 @@ template <>
IntArrayBase<phi::DenseTensor>::IntArrayBase(
const std::vector<phi::DenseTensor>& tensor_list) {
is_from_tensor_ = true;
for (size_t i = 0; i < tensor_list.size(); ++i) {
DataType data_type = tensor_list[i].dtype();
switch (data_type) {
......
......@@ -3084,81 +3084,122 @@ void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out) {
out->share_lod(x);
}
int GetSplitAxisValue(const MetaTensor& x,
const Scalar& axis,
MetaConfig config) {
// Tensor has no value in static graph compile time
if (axis.FromTensor() && !config.is_runtime) {
return -1;
} else {
if (axis.dtype() == DataType::FLOAT32 ||
axis.dtype() == DataType::FLOAT64) {
PADDLE_THROW(
phi::errors::InvalidArgument("%s(): argument (position 3) must be "
"int, but got %s",
"split",
"float")); // NOLINT
}
int axis_value = axis.to<int>();
int rank = x.dims().size();
PADDLE_ENFORCE_EQ(
axis_value >= -rank && axis_value < rank,
true,
phi::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d",
-rank,
rank,
axis_value));
if (axis_value < 0) {
axis_value = axis_value + rank;
}
return axis_value;
}
}
void FillSplitOutDims(const MetaTensor& x,
const int axis_value,
const std::vector<int64_t>& sections_vec,
std::vector<MetaTensor*>* out) {
std::vector<phi::DDim> out_dims(sections_vec.size(), x.dims());
if (x.dims().at(axis_value) > 0) {
for (size_t i = 0; i < sections_vec.size(); ++i) {
out_dims[i][axis_value] = sections_vec[i];
}
} else {
for (size_t i = 0; i < sections_vec.size(); ++i) {
out_dims[i][axis_value] = -1;
}
}
for (size_t i = 0; i < sections_vec.size(); ++i) {
if (axis_value != 0) {
// Only pass LoD when not spliting along the first dim.
(*out)[i]->set_dtype(x.dtype());
(*out)[i]->set_dims(out_dims[i]);
(*out)[i]->set_layout(x.layout());
} else {
(*out)[i]->set_dtype(x.dtype());
(*out)[i]->set_dims(out_dims[i]);
(*out)[i]->set_layout(x.layout());
(*out)[i]->share_lod(x);
}
}
}
void SplitInferMeta(const MetaTensor& x,
const IntArray& num_or_sections,
const IntArray& sections,
const Scalar& axis,
std::vector<MetaTensor*> out,
MetaConfig config) {
if (axis.dtype() == DataType::FLOAT32 || axis.dtype() == DataType::FLOAT64) {
PADDLE_THROW(
phi::errors::InvalidArgument("%s(): argument (position 3) must be "
"int, but got %s",
"split",
"float")); // NOLINT
}
int axis_value = axis.to<int>();
int rank = x.dims().size();
PADDLE_ENFORCE_EQ(
axis_value >= -rank && axis_value < rank,
true,
phi::errors::InvalidArgument(
"The axis is expected to be in range of [%d, %d), but got %d",
-rank,
rank,
axis_value));
if (axis_value < 0) {
axis_value = axis_value + rank;
}
auto input_axis_dim = x.dims().at(axis_value);
auto num_or_sections_data = num_or_sections.GetData();
// step1: get formated sections
std::vector<int64_t> sections;
// num_or_sections is a number
if (num_or_sections_data.size() == 1 && num_or_sections_data[0] > 0) {
int num = num_or_sections_data.at(0);
PADDLE_ENFORCE_EQ(input_axis_dim % num,
0,
phi::errors::InvalidArgument(
"The input's size along the split dimension "
"must be evenly divisible by Attr(num_or_sections). "
"But received Attr(num_or_sections) "
"= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
num,
x.dims(),
axis_value));
for (int i = 0; i < num; ++i) {
sections.push_back(input_axis_dim / num);
// get axis value
int axis_value = GetSplitAxisValue(x, axis, config);
auto sections_data = sections.GetData();
// fill out dims with -1
if ((sections.FromTensor() && !config.is_runtime) || axis_value == -1 ||
(axis_value >= 0 && x.dims().at(axis_value) <= 0)) {
std::vector<phi::DDim> out_dims(
sections_data.size(),
phi::make_ddim(std::vector<int>(x.dims().size(), -1)));
for (size_t i = 0; i < sections_data.size(); ++i) {
if (axis_value != 0) {
// Only pass LoD when not spliting along the first dim.
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
}
}
} else {
// num_or_sections is a sections
auto input_axis_dim = x.dims().at(axis_value);
std::vector<int64_t> sections_vec;
const int unknow_dim_val = -1;
int unknow_dim_idx = -1;
int num_of_unknow = 0;
int sum_of_section = 0;
for (size_t i = 0; i < num_or_sections_data.size(); ++i) {
sections.push_back(num_or_sections_data[i]);
for (size_t i = 0; i < sections_data.size(); ++i) {
sections_vec.push_back(sections_data[i]);
if (num_or_sections_data[i] == unknow_dim_val) {
if (sections_data[i] == unknow_dim_val) {
num_of_unknow++;
unknow_dim_idx = i;
} else {
sum_of_section += num_or_sections_data[i];
sum_of_section += sections_data[i];
}
}
if (config.is_runtime) {
PADDLE_ENFORCE_LE(num_of_unknow,
1,
phi::errors::InvalidArgument(
"Only one dimension value of Attr(num_or_sections) "
"in SplitOp can be -1. "
"But received Attr(num_or_sections) = [%s].",
phi::make_ddim(num_or_sections_data)));
}
PADDLE_ENFORCE_LE(num_of_unknow,
1,
phi::errors::InvalidArgument(
"Only one dimension value of Attr(num_or_sections) "
"in SplitOp can be -1. "
"But received Attr(num_or_sections) = [%s].",
phi::make_ddim(sections_data)));
if (unknow_dim_idx != -1) {
// for example, input shape = [4 ,5], axis = 1, sections = [2, 3, -1].
......@@ -3173,13 +3214,11 @@ void SplitInferMeta(const MetaTensor& x,
"size "
"along the split dimension. But received Attr(num_or_sections) "
"= [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
phi::make_ddim(num_or_sections_data),
phi::make_ddim(sections_data),
x.dims(),
axis_value));
if (config.is_runtime) {
sections[unknow_dim_idx] = input_axis_dim - sum_of_section;
}
sections_vec[unknow_dim_idx] = input_axis_dim - sum_of_section;
} else {
PADDLE_ENFORCE_EQ(
sum_of_section,
......@@ -3189,36 +3228,59 @@ void SplitInferMeta(const MetaTensor& x,
"size "
"along the split dimension. But received Attr(num_or_sections)"
" = [%s], input(X)'s shape = [%s], Attr(dim) = %d.",
phi::make_ddim(num_or_sections_data),
phi::make_ddim(sections_data),
x.dims(),
axis_value));
}
// fill out dims
FillSplitOutDims(x, axis_value, sections_vec, &out);
}
}
void SplitWithNumInferMeta(const MetaTensor& x,
int num,
const Scalar& axis,
std::vector<MetaTensor*> out,
MetaConfig config) {
int axis_value = GetSplitAxisValue(x, axis, config);
// fill out dims with -1
if (axis_value == -1 || (axis_value >= 0 && x.dims().at(axis_value) <= 0)) {
std::vector<phi::DDim> out_dims(
num, phi::make_ddim(std::vector<int>(x.dims().size(), -1)));
// setp2: fill out dims
std::vector<phi::DDim> out_dims(sections.size(), x.dims());
if (config.is_runtime || input_axis_dim > 0) {
for (size_t i = 0; i < sections.size(); ++i) {
out_dims[i][axis_value] = sections[i];
for (int i = 0; i < num; ++i) {
if (axis_value != 0) {
// Only pass LoD when not spliting along the first dim.
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
}
}
} else {
for (size_t i = 0; i < sections.size(); ++i) {
out_dims[i][axis_value] = -1;
}
}
auto input_axis_dim = x.dims().at(axis_value);
// step1: get formated sections
std::vector<int64_t> sections_vec;
PADDLE_ENFORCE_EQ(input_axis_dim % num,
0,
phi::errors::InvalidArgument(
"The input's size along the split dimension "
"must be evenly divisible by Attr(num_or_sections). "
"But received Attr(num_or_sections) "
"= %d, input(X)'s shape = [%s], Attr(dim) = %d.",
num,
x.dims(),
axis_value));
for (size_t i = 0; i < sections.size(); ++i) {
if (axis_value != 0) {
// Only pass LoD when not spliting along the first dim.
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
} else {
out[i]->set_dtype(x.dtype());
out[i]->set_dims(out_dims[i]);
out[i]->set_layout(x.layout());
out[i]->share_lod(x);
for (int i = 0; i < num; ++i) {
sections_vec.push_back(input_axis_dim / num);
}
// setp2: fill out dims
FillSplitOutDims(x, axis_value, sections_vec, &out);
}
}
......@@ -4623,4 +4685,3 @@ void FoldInferMeta(const MetaTensor& x,
} // namespace phi
PD_REGISTER_INFER_META_FN(flatten, phi::FlattenInferMeta);
PD_REGISTER_INFER_META_FN(split, phi::SplitInferMeta);
......@@ -452,12 +452,27 @@ void SliceRawInferMeta(const MetaTensor& input,
void SoftmaxInferMeta(const MetaTensor& x, int axis, MetaTensor* out);
int GetSplitAxisValue(const MetaTensor& x,
const Scalar& axis,
MetaConfig config);
void FillSplitOutDims(const MetaTensor& x,
const int axis_value,
const std::vector<int64_t>& sections_vec,
std::vector<MetaTensor*>* out);
void SplitInferMeta(const MetaTensor& x_meta,
const IntArray& num_or_sections,
const IntArray& sections,
const Scalar& axis,
std::vector<MetaTensor*> out,
MetaConfig config = MetaConfig());
void SplitWithNumInferMeta(const MetaTensor& x_meta,
int num,
const Scalar& axis,
std::vector<MetaTensor*> out,
MetaConfig config = MetaConfig());
void SquaredL2NormInferMeta(const MetaTensor& x, MetaTensor* out);
void SqueezeInferMeta(const MetaTensor& x,
......
......@@ -14,54 +14,9 @@
#include "paddle/phi/kernels/split_kernel.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& num_or_sections,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
// need to infershape output
if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) {
std::vector<MetaTensor> out_metas;
out_metas.reserve(outs.size());
std::vector<MetaTensor*> out_metas_ptr;
for (size_t i = 0; i < outs.size(); ++i) {
out_metas.push_back(outs[i]);
out_metas_ptr.push_back(&out_metas.back());
}
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
for (size_t i = 0; i < out_metas.size(); ++i) {
outs[i]->Resize(out_metas[i].dims());
}
}
std::vector<const DenseTensor*> shape_refer;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
shape_refer.emplace_back(outs[j]);
}
int axis = axis_scalar.to<int>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
paddle::operators::StridedMemcpyWithAxis0<T>(
dev_ctx, x, shape_refer, &outs);
} else {
phi::funcs::SplitFunctor<Context, T> functor;
functor(dev_ctx, x, shape_refer, axis, &outs);
}
}
} // namespace phi
#include "paddle/phi/kernels/impl/split_kernel_impl.h"
PD_REGISTER_KERNEL(split,
CPU,
......@@ -76,3 +31,17 @@ PD_REGISTER_KERNEL(split,
int8_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(split_with_num,
CPU,
ALL_LAYOUT,
phi::SplitWithNumKernel,
float,
double,
int64_t,
int,
bool,
uint8_t,
int8_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
......@@ -14,53 +14,9 @@
#include "paddle/phi/kernels/split_kernel.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& num_or_sections,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
// need to infershape output
if (num_or_sections.FromTensor() || axis_scalar.FromTensor()) {
std::vector<MetaTensor> out_metas;
out_metas.reserve(outs.size());
std::vector<MetaTensor*> out_metas_ptr;
for (size_t i = 0; i < outs.size(); ++i) {
out_metas.push_back(outs[i]);
out_metas_ptr.push_back(&out_metas.back());
}
phi::SplitInferMeta(x, num_or_sections, axis_scalar, out_metas_ptr);
for (size_t i = 0; i < out_metas.size(); ++i) {
outs[i]->Resize(out_metas[i].dims());
}
}
std::vector<const DenseTensor*> shape_refer;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
shape_refer.emplace_back(outs[j]);
}
int axis = axis_scalar.to<int>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
paddle::operators::StridedMemcpyWithAxis0<T>(
dev_ctx, x, shape_refer, &outs);
} else {
phi::funcs::SplitFunctor<Context, T> functor;
functor(dev_ctx, x, shape_refer, axis, &outs);
}
}
} // namespace phi
#include "paddle/phi/kernels/impl/split_kernel_impl.h"
PD_REGISTER_KERNEL(split,
GPU,
......@@ -75,3 +31,17 @@ PD_REGISTER_KERNEL(split,
int8_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(split_with_num,
GPU,
ALL_LAYOUT,
phi::SplitWithNumKernel,
float,
double,
int64_t,
int,
bool,
uint8_t,
int8_t,
phi::dtype::float16,
phi::dtype::bfloat16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/kernels/split_kernel.h"
#include "paddle/fluid/operators/strided_memcpy.h"
#include "paddle/phi/common/int_array.h"
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/concat_and_split_functor.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& sections,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
std::vector<const DenseTensor*> shape_refer;
for (size_t j = 0; j < outs.size(); ++j) {
dev_ctx.template Alloc<T>(outs[j]);
shape_refer.emplace_back(outs[j]);
}
int axis = axis_scalar.to<int>();
// Sometimes direct copies will be faster, this maybe need deeply analysis.
if (axis == 0 && outs.size() < 10) {
paddle::operators::StridedMemcpyWithAxis0<T>(
dev_ctx, x, shape_refer, &outs);
} else {
phi::funcs::SplitFunctor<Context, T> functor;
functor(dev_ctx, x, shape_refer, axis, &outs);
}
}
template <typename T, typename Context>
void SplitWithNumKernel(const Context& dev_ctx,
const DenseTensor& x,
int num,
const Scalar& axis_scalar,
std::vector<DenseTensor*> outs) {
int axis_value = axis_scalar.to<int>();
auto input_axis_dim = x.dims().at(axis_value);
std::vector<int64_t> sections_vec;
for (int i = 0; i < num; ++i) {
sections_vec.push_back(input_axis_dim / num);
}
IntArray sections(sections_vec);
SplitKernel<T, Context>(dev_ctx, x, sections, axis_scalar, outs);
}
} // namespace phi
......@@ -18,42 +18,70 @@
#include "paddle/phi/common/scalar.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h"
#include "paddle/phi/kernels/empty_kernel.h"
namespace phi {
template <typename T, typename Context>
void SplitKernel(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& num_or_sections,
const IntArray& sections,
const Scalar& axis,
std::vector<DenseTensor*> out);
template <typename T, typename Context>
void SplitWithNumKernel(const Context& dev_ctx,
const DenseTensor& x,
int num,
const Scalar& axis,
std::vector<DenseTensor*> out);
template <typename T, typename Context>
std::vector<DenseTensor> Split(const Context& dev_ctx,
const DenseTensor& x,
const IntArray& num_or_sections,
const IntArray& sections,
const Scalar& axis) {
size_t out_number;
if (num_or_sections.GetData().size() == 1) {
out_number = num_or_sections.GetData()[0];
} else {
out_number = num_or_sections.GetData().size();
out_number = sections.GetData().size();
std::vector<MetaTensor> out_meta;
std::vector<MetaTensor*> out_meta_ptr;
out_meta.reserve(out_number);
out_meta_ptr.reserve(out_number);
std::vector<DenseTensor> result(out_number);
for (size_t i = 0; i < out_number; ++i) {
out_meta.emplace_back(&result[i]);
out_meta_ptr.push_back(&out_meta.back());
}
SplitInferMeta(x, sections, axis, out_meta_ptr);
std::vector<DenseTensor*> outs;
outs.reserve(out_meta.size());
for (size_t i = 0; i < out_meta.size(); ++i) {
outs.push_back(&result[i]);
}
SplitKernel<T, Context>(dev_ctx, x, sections, axis, outs);
return result;
}
template <typename T, typename Context>
std::vector<DenseTensor> SplitWithNum(const Context& dev_ctx,
const DenseTensor& x,
int num,
const Scalar& axis) {
size_t out_number = num;
std::vector<MetaTensor> out_meta;
std::vector<MetaTensor*> out_meta_ptr;
out_meta.reserve(out_number);
out_meta_ptr.reserve(out_number);
std::vector<DenseTensor> result;
result.reserve(out_number);
std::vector<DenseTensor> result(out_number);
for (size_t i = 0; i < out_number; ++i) {
result.emplace_back(DenseTensor());
out_meta.emplace_back(&result.back());
out_meta.emplace_back(&result[i]);
out_meta_ptr.push_back(&out_meta.back());
}
SplitInferMeta(x, num_or_sections, axis, out_meta_ptr);
SplitWithNumInferMeta(x, num, axis, out_meta_ptr);
std::vector<DenseTensor*> outs;
outs.reserve(out_meta.size());
......@@ -61,7 +89,7 @@ std::vector<DenseTensor> Split(const Context& dev_ctx,
outs.push_back(&result[i]);
}
SplitKernel<T, Context>(dev_ctx, x, num_or_sections, axis, outs);
SplitWithNumKernel<T, Context>(dev_ctx, x, num, axis, outs);
return result;
}
......
......@@ -21,9 +21,10 @@ KernelSignature SplitOpArgumentMapping(const ArgumentMappingContext& ctx) {
// priority: AxisTensor > axis
if (paddle::any_cast<int>(ctx.Attr("num")) > 0) {
if (ctx.HasInput("AxisTensor")) {
return KernelSignature("split", {"X"}, {"num", "AxisTensor"}, {"Out"});
return KernelSignature(
"split_with_num", {"X"}, {"num", "AxisTensor"}, {"Out"});
} else {
return KernelSignature("split", {"X"}, {"num", "axis"}, {"Out"});
return KernelSignature("split_with_num", {"X"}, {"num", "axis"}, {"Out"});
}
}
......
......@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/split_kernel.h"
namespace phi {
namespace tests {
......@@ -40,14 +41,12 @@ TEST(DEV_API, split) {
dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CPUPlace())
.get());
auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
for (size_t i = 0; i < 4; ++i) {
for (size_t j = 0; j < 10; ++j) {
dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
}
}
// 2. test API
auto out = phi::Split<float>(dev_ctx, dense_x, {2, 2}, 0);
......@@ -76,5 +75,50 @@ TEST(DEV_API, split) {
}
}
TEST(DEV_API, split_with_num) {
// 1. create tensor
const auto alloc =
std::make_unique<paddle::experimental::DefaultAllocator>(phi::CPUPlace());
phi::DenseTensor dense_x(alloc.get(),
phi::DenseTensorMeta(phi::DataType::FLOAT32,
phi::make_ddim({4, 10}),
phi::DataLayout::NCHW));
phi::CPUContext dev_ctx;
dev_ctx.SetAllocator(paddle::memory::allocation::AllocatorFacade::Instance()
.GetAllocator(paddle::platform::CPUPlace())
.get());
auto* dense_x_data = dev_ctx.Alloc<float>(&dense_x);
for (size_t i = 0; i < 4; ++i) {
for (size_t j = 0; j < 10; ++j) {
dense_x_data[i * 10 + j] = (i * 10 + j) * 1.0;
}
}
// 2. test API
auto out = phi::SplitWithNum<float>(dev_ctx, dense_x, 2, 0);
// 3. check result
ASSERT_EQ(out.size(), static_cast<size_t>(2));
ASSERT_EQ(out[0].dims().size(), 2);
ASSERT_EQ(out[0].dims()[0], 2);
ASSERT_EQ(out[0].dims()[1], 10);
ASSERT_EQ(out[0].meta().dtype, phi::DataType::FLOAT32);
ASSERT_EQ(out[0].meta().layout, phi::DataLayout::NCHW);
ASSERT_EQ(out[1].dims().size(), 2);
ASSERT_EQ(out[1].dims()[0], 2);
ASSERT_EQ(out[1].dims()[1], 10);
ASSERT_EQ(out[1].meta().dtype, phi::DataType::FLOAT32);
ASSERT_EQ(out[1].meta().layout, phi::DataLayout::NCHW);
auto out_data_0 = out[0].data<float>();
auto out_data_1 = out[1].data<float>();
for (size_t i = 0; i < 4; ++i) {
if (i < 20) {
ASSERT_NEAR(dense_x_data[i], out_data_0[i], 1e-6);
} else {
ASSERT_NEAR(dense_x_data[i], out_data_1[i - 20], 1e-6);
}
}
}
} // namespace tests
} // namespace phi
......@@ -5180,7 +5180,10 @@ def split(input, num_or_sections, dim=-1, name=None):
"The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but "
"received %s." % (type(num_or_sections)))
if in_dygraph_mode():
return _C_ops.split(input, [num], dim)
if isinstance(num_or_sections, int):
return _C_ops.split_with_num(input, num_or_sections, dim)
else:
return _C_ops.split(input, num_or_sections, dim)
elif _in_legacy_dygraph():
out = [_varbase_creator() for n in range(num)]
_legacy_C_ops.split(input, out, *attrs)
......
......@@ -421,6 +421,95 @@ class API_TestSplit4(unittest.TestCase):
np.testing.assert_allclose(ex_x1, r1, rtol=1e-05)
class API_TestSplit5(unittest.TestCase):
def test_out(self):
for use_cuda in ([False, True]
if core.is_compiled_with_cuda() else [False]):
place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
with fluid.program_guard(fluid.Program(), fluid.Program()):
input_1 = np.random.random([5, 4]).astype("int32")
# input is a variable which shape is [5, 4]
input = paddle.to_tensor(input_1)
n = paddle.full([1], 5, dtype='int32')
out = paddle.split(input, [n])
exe = paddle.static.Executor(place=place)
re = exe.run(fetch_list=[out])
re = re[0]
ex_out = np.split(input_1, [5])
ex_out = ex_out[0]
np.testing.assert_allclose(ex_out, re, rtol=1e-05)
class API_TestDygraphFluidSplit(unittest.TestCase):
def test_out1(self):
with fluid.dygraph.guard():
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
x0_out = x0.numpy()
x1_out = x1.numpy()
x2_out = x2.numpy()
ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
with _test_eager_guard():
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
input.stop_gradient = False
x0, x1, x2 = fluid.layers.split(input, num_or_sections=3, dim=1)
eager_x0_out = x0.numpy()
eager_x1_out = x1.numpy()
eager_x2_out = x2.numpy()
loss = x0.sum()
loss.backward()
manul_grad = np.zeros_like(input_1)
manul_grad[:, :2, :] = 1
np.testing.assert_allclose(input.gradient(),
manul_grad,
rtol=1e-05)
np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05)
np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
def test_out2(self):
with fluid.dygraph.guard():
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1)
x0_out = x0.numpy()
x1_out = x1.numpy()
x2_out = x2.numpy()
ex_x0, ex_x1, ex_x2 = np.split(input_1, 3, axis=1)
with _test_eager_guard():
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
input.stop_gradient = False
x0, x1, x2 = fluid.layers.split(input, [2, 2, 2], dim=1)
eager_x0_out = x0.numpy()
eager_x1_out = x1.numpy()
eager_x2_out = x2.numpy()
loss = x0.sum()
loss.backward()
manul_grad = np.zeros_like(input_1)
manul_grad[:, :2, :] = 1
np.testing.assert_allclose(input.gradient(),
manul_grad,
rtol=1e-05)
np.testing.assert_allclose(ex_x0, eager_x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, eager_x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, eager_x2_out, rtol=1e-05)
np.testing.assert_allclose(ex_x0, x0_out, rtol=1e-05)
np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
class API_TestDygraphSplit(unittest.TestCase):
def test_out1(self):
......@@ -471,6 +560,25 @@ class API_TestDygraphSplit(unittest.TestCase):
np.testing.assert_allclose(ex_x1, x1_out, rtol=1e-05)
np.testing.assert_allclose(ex_x2, x2_out, rtol=1e-05)
def test_out3(self):
with fluid.dygraph.guard():
np.random.seed(2021)
input_1 = np.random.random([4, 6, 6]).astype("int32")
# input is a variable which shape is [4, 6, 6]
input = paddle.to_tensor(input_1)
out_dy = paddle.split(input, [6], axis=1)
out_dy = out_dy[0]
out_dy_np = out_dy.numpy()
ex_out = np.split(input_1, [6], axis=1)
ex_out = ex_out[0]
with _test_eager_guard():
input = paddle.to_tensor(input_1)
out_eager = paddle.split(input, [6], axis=1)
out_eager = out_eager[0]
out_eager_np = out_dy.numpy()
np.testing.assert_allclose(ex_out, out_eager_np, rtol=1e-05)
np.testing.assert_allclose(ex_out, out_dy_np, rtol=1e-05)
def test_out_tensor_input(self):
with fluid.dygraph.guard():
input_1 = np.random.random([4, 6, 6]).astype("int32")
......
......@@ -1839,8 +1839,10 @@ def split(x, num_or_sections, axis=0, name=None):
"The type of 'num_or_sections' in split must be int, list or tuple in imperative mode, but "
"received %s." % (type(num_or_sections)))
if in_dygraph_mode():
return _C_ops.split(input, [num_or_sections] if isinstance(
num_or_sections, int) else num_or_sections, dim)
if isinstance(num_or_sections, int):
return _C_ops.split_with_num(input, num_or_sections, dim)
else:
return _C_ops.split(input, num_or_sections, dim)
elif _in_legacy_dygraph():
out = [_varbase_creator() for n in range(num)]
_legacy_C_ops.split(input, out, *attrs)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册