未验证 提交 7613129e 编写于 作者: Y YuanRisheng 提交者: GitHub

change infermeta and remove makePtenTenosr in reshape (#39186)

上级 09104d02
......@@ -557,7 +557,7 @@ class ReduceOp : public framework::OperatorWithKernel {
if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
if (!reduce_all) {
return framework::KernelSignature(
"sum", {"X"}, {"dim", "keep_dim", "out_dtype"}, {"Out"});
"sum", {"X"}, {"dim", "out_dtype", "keep_dim"}, {"Out"});
}
return framework::KernelSignature(
"sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"},
......
......@@ -38,33 +38,6 @@ namespace operators {
using Tensor = framework::Tensor;
inline std::vector<int> get_new_shape(
const std::vector<const Tensor *> &list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
PADDLE_ENFORCE_EQ(
tensor->dims(), framework::make_ddim({1}),
platform::errors::InvalidArgument(
"If the element type of 'shape' in ReshapeOp is Tensor, "
"the element's shape must be [1]. But received the element's shape "
"is [%s]",
tensor->dims()));
if (platform::is_gpu_place(tensor->place()) ||
platform::is_xpu_place(tensor->place())) {
framework::Tensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
} else {
vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
}
}
return vec_new_shape;
}
class ReshapeOp : public framework::OperatorWithKernel {
public:
ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
......@@ -370,30 +343,6 @@ class ReshapeKernel {
void operator()(const framework::ExecutionContext &ctx) const {
auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *in = ctx.Input<framework::LoDTensor>("X");
// framework::DDim out_dims = out->dims();
auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
// we can't MakePtenDenseTensor by out, because the out of reshape may have
// multiple states, some can MakePtenDenseTensor but other's cannot:
// 1. out tensor is not initialized
// 2. out tensor is input (complete inplace)
// 3. out tensor is view of input
// We can't MakePtenDenseTensor for case 2, so we solve this case by
// creating a temporary tensor here:
pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()),
in->dims(), in->layout()};
auto pt_out_tmp = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
ctx.GetPlace()),
std::move(meta));
pten::DenseTensor *pt_out = nullptr;
if (in != nullptr && out != nullptr && in->Holder() != nullptr &&
out->Holder() != nullptr &&
in->Holder()->ptr() == out->Holder()->ptr()) {
pt_out = pt_x.get();
} else {
pt_out = pt_out_tmp.get();
}
auto list_new_shape_tensor =
ctx.MultiInput<framework::Tensor>("ShapeTensor");
......@@ -410,55 +359,46 @@ class ReshapeKernel {
framework::Tensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
&temp);
pt_vec_shape.push_back(
std::move(*(paddle::experimental::MakePtenDenseTensor(temp))));
pt_vec_shape.push_back(std::move(temp));
} else {
pt_vec_shape.push_back(
std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor))));
pt_vec_shape.push_back(*tensor);
}
}
pt_scalar_shape = pten::ScalarArray(pt_vec_shape);
} else if (shape_tensor) {
std::unique_ptr<pten::DenseTensor> pt_shape;
pten::DenseTensor pt_shape;
if (platform::is_gpu_place(shape_tensor->place()) ||
platform::is_xpu_place(shape_tensor->place())) {
framework::Tensor temp;
paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(),
&temp);
pt_shape = paddle::experimental::MakePtenDenseTensor(temp);
pt_shape = std::move(temp);
} else {
pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor);
pt_shape = *shape_tensor;
}
pt_scalar_shape = pten::ScalarArray(*pt_shape.get());
pt_scalar_shape = pten::ScalarArray(pt_shape);
} else {
auto &shape_attr = ctx.Attr<std::vector<int>>("shape");
pt_scalar_shape = pten::ScalarArray(shape_attr);
}
if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx),
*pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx), *in,
pt_scalar_shape, out);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_x.get(), pt_scalar_shape, pt_out);
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), *in,
pt_scalar_shape, out);
}
#endif
// non-inplace need move all result from pt_out to out, inplace need set
// result dims.
if (in != out) {
paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
} else {
out->Resize(pt_out->dims());
}
}
};
......@@ -469,25 +409,22 @@ class ReshapeGradKernel {
auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->mutable_data(ctx.GetPlace(), d_out->type());
auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x);
auto pt_d_out = paddle::experimental::MakePtenDenseTensor(*d_out);
if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeGradKernel(static_cast<const pten::CPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get());
*d_out, d_x);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get());
pten::ReshapeGradKernel(dev_ctx, *d_out, d_x);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get());
*d_out, d_x);
}
#endif
}
......@@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel {
auto *dd_out = ctx.Output<framework::Tensor>("DDOut");
dd_out->mutable_data(ctx.GetPlace(), dd_x->type());
auto pt_dd_x = paddle::experimental::MakePtenDenseTensor(*dd_x);
auto pt_dd_out = paddle::experimental::MakePtenDenseTensor(*dd_out);
if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeDoubleGradKernel(
static_cast<const pten::CPUContext &>(dev_ctx), *pt_dd_x.get(),
pt_dd_out.get());
static_cast<const pten::CPUContext &>(dev_ctx), *dd_x, dd_out);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get());
pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out);
}
#endif
#ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeDoubleGradKernel(
static_cast<const pten::XPUContext &>(dev_ctx), *pt_dd_x.get(),
pt_dd_out.get());
static_cast<const pten::XPUContext &>(dev_ctx), *dd_x, dd_out);
}
#endif
}
......
......@@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&,
using sum_kernel = void (*)(const DeviceContext&,
const DenseTensor&,
const std::vector<int64_t>&,
bool,
DataType,
bool,
DenseTensor*);
using subtract_kernel = void (*)(const DeviceContext&,
......
......@@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
meta_ = std::move(meta);
}
void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE(
meta.valid(),
paddle::platform::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims;
meta_.dtype = meta.dtype;
meta_.is_scalar = meta.is_scalar;
meta_.layout = meta.layout;
meta_.lod = meta.lod;
meta_.offset = meta.offset;
}
/* @jim19930609: This interface will be further modified util we finalized the
design for Allocator - Allocation
For now, we have to temporarily accommodate two independent use cases:
......
......@@ -131,6 +131,8 @@ class DenseTensor : public TensorBase,
/// \param meta The meta information of the tensor.
void set_meta(DenseTensorMeta&& meta);
void set_meta(const DenseTensorMeta& meta);
/// \brief Test whether the metadata is valid.
/// \return Whether the metadata is valid.
bool valid() const noexcept override { return meta_.valid(); }
......
......@@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
}
DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis) {
const DenseTensorMeta& y_meta) {
return ElementwiseRawInferMeta(x_meta, y_meta, -1);
}
DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis) {
DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout);
if (x_meta.dims != y_meta.dims) {
auto x_dims = x_meta.dims;
......
......@@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
bool trans_y);
DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis);
const DenseTensorMeta& y_meta);
DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis);
} // namespace pten
......@@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta,
return InferMetaFromVecValue(x_meta, shape.GetData());
}
/* Why not use ReduceInferMeta directly?
Because we need make InferMetaFunction's args follow the design of api.yaml
*/
DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim) {
return ReduceInferMeta(x_meta, axis, keep_dim, dtype);
}
DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
bool keep_dim,
......
......@@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
bool keep_dim,
DataType dtype = DataType::UNDEFINED);
DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim);
} // namespace pten
......@@ -33,8 +33,8 @@ template <typename T, typename Context>
void SumKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
DataType out_dtype,
bool keep_dim,
DenseTensor* out) {
bool reduce_all = false;
SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
......
......@@ -50,8 +50,8 @@ template <typename T, typename Context>
void SumKernel(const Context& dev_ctx,
const DenseTensor& x,
const std::vector<int64_t>& dims,
bool keep_dim,
DataType out_dtype,
bool keep_dim,
DenseTensor* out);
template <typename T, typename Context>
......@@ -110,7 +110,7 @@ template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
......@@ -120,7 +120,7 @@ template <typename T, typename Context>
DenseTensor Subtract(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
......@@ -130,7 +130,7 @@ template <typename T, typename Context>
DenseTensor Divide(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
......@@ -140,7 +140,7 @@ template <typename T, typename Context>
DenseTensor Multiply(const Context& dev_ctx,
const DenseTensor& x,
const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1);
auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out;
......@@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim, dtype);
auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
SumKernel<T, Context>(dev_ctx, x, axis, keep_dim, dtype, &dense_out);
SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);
return dense_out;
}
......
......@@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx,
out->ResizeAndAllocate(out_meta.dims);
return;
}
out->Resize(x.dims());
out->mutable_data(x.place());
out->set_meta(out_meta);
out->mutable_data(dev_ctx.GetPlace());
pten::Copy(dev_ctx, x, false, out);
out->Resize(out_meta.dims);
out->ResetLoD(x.lod());
......
......@@ -3,7 +3,7 @@
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
param : [x, y]
kernel :
func : add
......@@ -40,7 +40,7 @@
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
param : [x, y]
kernel :
func : divide
......@@ -135,7 +135,7 @@
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
param : [x, y]
kernel :
func : multiply
......@@ -166,19 +166,19 @@
output : Tensor
infer_meta :
func : ElementwiseInferMeta
param : [x, y, -1]
param : [x, y]
kernel :
func : subtract
- api : sum
args : (const Tensor& x, const std::vector<int64_t>& axis={}, DataType dtype=DataType::UNDEFINED, bool keep_dim=false)
output : Tensor
infer_meta :
func : ReduceInferMeta
param: [x, axis, keep_dim, dtype]
kernel :
infer_meta :
func : SumInferMeta
param: [x, axis, dtype, keep_dim]
kernel :
func : sum
param : [x, axis, keep_dim, dtype]
param : [x, axis, dtype, keep_dim]
data_type : x
- api : zeros_like
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册