未验证 提交 7613129e 编写于 作者: Y YuanRisheng 提交者: GitHub

change infermeta and remove makePtenTenosr in reshape (#39186)

上级 09104d02
...@@ -557,7 +557,7 @@ class ReduceOp : public framework::OperatorWithKernel { ...@@ -557,7 +557,7 @@ class ReduceOp : public framework::OperatorWithKernel {
if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) { if (ctx.InputVar("X")->IsType<framework::LoDTensor>()) {
if (!reduce_all) { if (!reduce_all) {
return framework::KernelSignature( return framework::KernelSignature(
"sum", {"X"}, {"dim", "keep_dim", "out_dtype"}, {"Out"}); "sum", {"X"}, {"dim", "out_dtype", "keep_dim"}, {"Out"});
} }
return framework::KernelSignature( return framework::KernelSignature(
"sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"}, "sum_raw", {"X"}, {"dim", "keep_dim", "reduce_all", "out_dtype"},
......
...@@ -38,33 +38,6 @@ namespace operators { ...@@ -38,33 +38,6 @@ namespace operators {
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
inline std::vector<int> get_new_shape(
const std::vector<const Tensor *> &list_new_shape_tensor) {
// get tensor from
std::vector<int> vec_new_shape;
for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) {
auto tensor = list_new_shape_tensor[i];
PADDLE_ENFORCE_EQ(
tensor->dims(), framework::make_ddim({1}),
platform::errors::InvalidArgument(
"If the element type of 'shape' in ReshapeOp is Tensor, "
"the element's shape must be [1]. But received the element's shape "
"is [%s]",
tensor->dims()));
if (platform::is_gpu_place(tensor->place()) ||
platform::is_xpu_place(tensor->place())) {
framework::Tensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), &temp);
vec_new_shape.push_back(static_cast<int32_t>(*temp.data<int32_t>()));
} else {
vec_new_shape.push_back(static_cast<int32_t>(*tensor->data<int32_t>()));
}
}
return vec_new_shape;
}
class ReshapeOp : public framework::OperatorWithKernel { class ReshapeOp : public framework::OperatorWithKernel {
public: public:
ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs, ReshapeOp(const std::string &type, const framework::VariableNameMap &inputs,
...@@ -370,30 +343,6 @@ class ReshapeKernel { ...@@ -370,30 +343,6 @@ class ReshapeKernel {
void operator()(const framework::ExecutionContext &ctx) const { void operator()(const framework::ExecutionContext &ctx) const {
auto *out = ctx.Output<framework::LoDTensor>("Out"); auto *out = ctx.Output<framework::LoDTensor>("Out");
auto *in = ctx.Input<framework::LoDTensor>("X"); auto *in = ctx.Input<framework::LoDTensor>("X");
// framework::DDim out_dims = out->dims();
auto pt_x = paddle::experimental::MakePtenDenseTensor(*in);
// we can't MakePtenDenseTensor by out, because the out of reshape may have
// multiple states, some can MakePtenDenseTensor but other's cannot:
// 1. out tensor is not initialized
// 2. out tensor is input (complete inplace)
// 3. out tensor is view of input
// We can't MakePtenDenseTensor for case 2, so we solve this case by
// creating a temporary tensor here:
pten::DenseTensorMeta meta{pten::TransToPtenDataType(in->type()),
in->dims(), in->layout()};
auto pt_out_tmp = std::make_shared<pten::DenseTensor>(
pten::make_intrusive<paddle::experimental::SharedStorage>(
ctx.GetPlace()),
std::move(meta));
pten::DenseTensor *pt_out = nullptr;
if (in != nullptr && out != nullptr && in->Holder() != nullptr &&
out->Holder() != nullptr &&
in->Holder()->ptr() == out->Holder()->ptr()) {
pt_out = pt_x.get();
} else {
pt_out = pt_out_tmp.get();
}
auto list_new_shape_tensor = auto list_new_shape_tensor =
ctx.MultiInput<framework::Tensor>("ShapeTensor"); ctx.MultiInput<framework::Tensor>("ShapeTensor");
...@@ -410,55 +359,46 @@ class ReshapeKernel { ...@@ -410,55 +359,46 @@ class ReshapeKernel {
framework::Tensor temp; framework::Tensor temp;
paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(), paddle::framework::TensorCopySync(*tensor, platform::CPUPlace(),
&temp); &temp);
pt_vec_shape.push_back( pt_vec_shape.push_back(std::move(temp));
std::move(*(paddle::experimental::MakePtenDenseTensor(temp))));
} else { } else {
pt_vec_shape.push_back( pt_vec_shape.push_back(*tensor);
std::move(*(paddle::experimental::MakePtenDenseTensor(*tensor))));
} }
} }
pt_scalar_shape = pten::ScalarArray(pt_vec_shape); pt_scalar_shape = pten::ScalarArray(pt_vec_shape);
} else if (shape_tensor) { } else if (shape_tensor) {
std::unique_ptr<pten::DenseTensor> pt_shape; pten::DenseTensor pt_shape;
if (platform::is_gpu_place(shape_tensor->place()) || if (platform::is_gpu_place(shape_tensor->place()) ||
platform::is_xpu_place(shape_tensor->place())) { platform::is_xpu_place(shape_tensor->place())) {
framework::Tensor temp; framework::Tensor temp;
paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(), paddle::framework::TensorCopySync(*shape_tensor, platform::CPUPlace(),
&temp); &temp);
pt_shape = paddle::experimental::MakePtenDenseTensor(temp); pt_shape = std::move(temp);
} else { } else {
pt_shape = paddle::experimental::MakePtenDenseTensor(*shape_tensor); pt_shape = *shape_tensor;
} }
pt_scalar_shape = pten::ScalarArray(*pt_shape.get()); pt_scalar_shape = pten::ScalarArray(pt_shape);
} else { } else {
auto &shape_attr = ctx.Attr<std::vector<int>>("shape"); auto &shape_attr = ctx.Attr<std::vector<int>>("shape");
pt_scalar_shape = pten::ScalarArray(shape_attr); pt_scalar_shape = pten::ScalarArray(shape_attr);
} }
if (platform::is_cpu_place(ctx.GetPlace())) { if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>(); auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx), pten::ReshapeKernel(static_cast<const pten::CPUContext &>(dev_ctx), *in,
*pt_x.get(), pt_scalar_shape, pt_out); pt_scalar_shape, out);
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>(); auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeKernel(dev_ctx, *pt_x.get(), pt_scalar_shape, pt_out); pten::ReshapeKernel(dev_ctx, *in, pt_scalar_shape, out);
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) { if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>(); auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), pten::ReshapeKernel(static_cast<const pten::XPUContext &>(dev_ctx), *in,
*pt_x.get(), pt_scalar_shape, pt_out); pt_scalar_shape, out);
} }
#endif #endif
// non-inplace need move all result from pt_out to out, inplace need set
// result dims.
if (in != out) {
paddle::experimental::SharesStorage(pt_out, static_cast<Tensor *>(out));
} else {
out->Resize(pt_out->dims());
}
} }
}; };
...@@ -469,25 +409,22 @@ class ReshapeGradKernel { ...@@ -469,25 +409,22 @@ class ReshapeGradKernel {
auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X")); auto *d_x = ctx.Output<framework::Tensor>(framework::GradVarName("X"));
d_x->mutable_data(ctx.GetPlace(), d_out->type()); d_x->mutable_data(ctx.GetPlace(), d_out->type());
auto pt_d_x = paddle::experimental::MakePtenDenseTensor(*d_x);
auto pt_d_out = paddle::experimental::MakePtenDenseTensor(*d_out);
if (platform::is_cpu_place(ctx.GetPlace())) { if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>(); auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeGradKernel(static_cast<const pten::CPUContext &>(dev_ctx), pten::ReshapeGradKernel(static_cast<const pten::CPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get()); *d_out, d_x);
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>(); auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeGradKernel(dev_ctx, *pt_d_out.get(), pt_d_x.get()); pten::ReshapeGradKernel(dev_ctx, *d_out, d_x);
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) { if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>(); auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx), pten::ReshapeGradKernel(static_cast<const pten::XPUContext &>(dev_ctx),
*pt_d_out.get(), pt_d_x.get()); *d_out, d_x);
} }
#endif #endif
} }
...@@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel { ...@@ -500,27 +437,22 @@ class ReshapeDoubleGradKernel {
auto *dd_out = ctx.Output<framework::Tensor>("DDOut"); auto *dd_out = ctx.Output<framework::Tensor>("DDOut");
dd_out->mutable_data(ctx.GetPlace(), dd_x->type()); dd_out->mutable_data(ctx.GetPlace(), dd_x->type());
auto pt_dd_x = paddle::experimental::MakePtenDenseTensor(*dd_x);
auto pt_dd_out = paddle::experimental::MakePtenDenseTensor(*dd_out);
if (platform::is_cpu_place(ctx.GetPlace())) { if (platform::is_cpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>(); auto &dev_ctx = ctx.device_context<platform::CPUDeviceContext>();
pten::ReshapeDoubleGradKernel( pten::ReshapeDoubleGradKernel(
static_cast<const pten::CPUContext &>(dev_ctx), *pt_dd_x.get(), static_cast<const pten::CPUContext &>(dev_ctx), *dd_x, dd_out);
pt_dd_out.get());
} }
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (platform::is_gpu_place(ctx.GetPlace())) { if (platform::is_gpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>(); auto &dev_ctx = ctx.device_context<platform::CUDADeviceContext>();
pten::ReshapeDoubleGradKernel(dev_ctx, *pt_dd_x.get(), pt_dd_out.get()); pten::ReshapeDoubleGradKernel(dev_ctx, *dd_x, dd_out);
} }
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (platform::is_xpu_place(ctx.GetPlace())) { if (platform::is_xpu_place(ctx.GetPlace())) {
auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>(); auto &dev_ctx = ctx.device_context<platform::XPUDeviceContext>();
pten::ReshapeDoubleGradKernel( pten::ReshapeDoubleGradKernel(
static_cast<const pten::XPUContext &>(dev_ctx), *pt_dd_x.get(), static_cast<const pten::XPUContext &>(dev_ctx), *dd_x, dd_out);
pt_dd_out.get());
} }
#endif #endif
} }
......
...@@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&, ...@@ -102,8 +102,8 @@ using scale_kernel = void (*)(const DeviceContext&,
using sum_kernel = void (*)(const DeviceContext&, using sum_kernel = void (*)(const DeviceContext&,
const DenseTensor&, const DenseTensor&,
const std::vector<int64_t>&, const std::vector<int64_t>&,
bool,
DataType, DataType,
bool,
DenseTensor*); DenseTensor*);
using subtract_kernel = void (*)(const DeviceContext&, using subtract_kernel = void (*)(const DeviceContext&,
......
...@@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) { ...@@ -126,6 +126,19 @@ void DenseTensor::set_meta(DenseTensorMeta&& meta) {
meta_ = std::move(meta); meta_ = std::move(meta);
} }
void DenseTensor::set_meta(const DenseTensorMeta& meta) {
PADDLE_ENFORCE(
meta.valid(),
paddle::platform::errors::InvalidArgument(
"Input meta is invalid, please check the meta attribute."));
meta_.dims = meta.dims;
meta_.dtype = meta.dtype;
meta_.is_scalar = meta.is_scalar;
meta_.layout = meta.layout;
meta_.lod = meta.lod;
meta_.offset = meta.offset;
}
/* @jim19930609: This interface will be further modified util we finalized the /* @jim19930609: This interface will be further modified util we finalized the
design for Allocator - Allocation design for Allocator - Allocation
For now, we have to temporarily accommodate two independent use cases: For now, we have to temporarily accommodate two independent use cases:
......
...@@ -131,6 +131,8 @@ class DenseTensor : public TensorBase, ...@@ -131,6 +131,8 @@ class DenseTensor : public TensorBase,
/// \param meta The meta information of the tensor. /// \param meta The meta information of the tensor.
void set_meta(DenseTensorMeta&& meta); void set_meta(DenseTensorMeta&& meta);
void set_meta(const DenseTensorMeta& meta);
/// \brief Test whether the metadata is valid. /// \brief Test whether the metadata is valid.
/// \return Whether the metadata is valid. /// \return Whether the metadata is valid.
bool valid() const noexcept override { return meta_.valid(); } bool valid() const noexcept override { return meta_.valid(); }
......
...@@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, ...@@ -131,8 +131,13 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
} }
DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta, DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta, const DenseTensorMeta& y_meta) {
int axis) { return ElementwiseRawInferMeta(x_meta, y_meta, -1);
}
DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis) {
DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout); DenseTensorMeta return_meta(x_meta.dtype, x_meta.dims, x_meta.layout);
if (x_meta.dims != y_meta.dims) { if (x_meta.dims != y_meta.dims) {
auto x_dims = x_meta.dims; auto x_dims = x_meta.dims;
......
...@@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta, ...@@ -42,6 +42,10 @@ DenseTensorMeta MatmulInferMeta(const DenseTensorMeta& x_meta,
bool trans_y); bool trans_y);
DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta, DenseTensorMeta ElementwiseInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta, const DenseTensorMeta& y_meta);
int axis);
DenseTensorMeta ElementwiseRawInferMeta(const DenseTensorMeta& x_meta,
const DenseTensorMeta& y_meta,
int axis);
} // namespace pten } // namespace pten
...@@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta, ...@@ -232,6 +232,16 @@ DenseTensorMeta ReshapeInferMeta(const DenseTensorMeta& x_meta,
return InferMetaFromVecValue(x_meta, shape.GetData()); return InferMetaFromVecValue(x_meta, shape.GetData());
} }
/* Why not use ReduceInferMeta directly?
Because we need make InferMetaFunction's args follow the design of api.yaml
*/
DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim) {
return ReduceInferMeta(x_meta, axis, keep_dim, dtype);
}
DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
bool keep_dim, bool keep_dim,
......
...@@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta, ...@@ -58,4 +58,9 @@ DenseTensorMeta ReduceInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
bool keep_dim, bool keep_dim,
DataType dtype = DataType::UNDEFINED); DataType dtype = DataType::UNDEFINED);
DenseTensorMeta SumInferMeta(const DenseTensorMeta& x_meta,
const std::vector<int64_t>& axis,
DataType dtype,
bool keep_dim);
} // namespace pten } // namespace pten
...@@ -33,8 +33,8 @@ template <typename T, typename Context> ...@@ -33,8 +33,8 @@ template <typename T, typename Context>
void SumKernel(const Context& dev_ctx, void SumKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim,
DataType out_dtype, DataType out_dtype,
bool keep_dim,
DenseTensor* out) { DenseTensor* out) {
bool reduce_all = false; bool reduce_all = false;
SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out); SumRawKernel<T>(dev_ctx, x, dims, keep_dim, reduce_all, out_dtype, out);
......
...@@ -50,8 +50,8 @@ template <typename T, typename Context> ...@@ -50,8 +50,8 @@ template <typename T, typename Context>
void SumKernel(const Context& dev_ctx, void SumKernel(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const std::vector<int64_t>& dims, const std::vector<int64_t>& dims,
bool keep_dim,
DataType out_dtype, DataType out_dtype,
bool keep_dim,
DenseTensor* out); DenseTensor* out);
template <typename T, typename Context> template <typename T, typename Context>
...@@ -110,7 +110,7 @@ template <typename T, typename Context> ...@@ -110,7 +110,7 @@ template <typename T, typename Context>
DenseTensor Add(const Context& dev_ctx, DenseTensor Add(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta)); auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
AddKernel<T, Context>(dev_ctx, x, y, &dense_out); AddKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out; return dense_out;
...@@ -120,7 +120,7 @@ template <typename T, typename Context> ...@@ -120,7 +120,7 @@ template <typename T, typename Context>
DenseTensor Subtract(const Context& dev_ctx, DenseTensor Subtract(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta)); auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out); SubtractKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out; return dense_out;
...@@ -130,7 +130,7 @@ template <typename T, typename Context> ...@@ -130,7 +130,7 @@ template <typename T, typename Context>
DenseTensor Divide(const Context& dev_ctx, DenseTensor Divide(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta)); auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
DivideKernel<T, Context>(dev_ctx, x, y, &dense_out); DivideKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out; return dense_out;
...@@ -140,7 +140,7 @@ template <typename T, typename Context> ...@@ -140,7 +140,7 @@ template <typename T, typename Context>
DenseTensor Multiply(const Context& dev_ctx, DenseTensor Multiply(const Context& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
const DenseTensor& y) { const DenseTensor& y) {
auto out_meta = ElementwiseInferMeta(x.meta(), y.meta(), -1); auto out_meta = ElementwiseRawInferMeta(x.meta(), y.meta(), -1);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta)); auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out); MultiplyKernel<T, Context>(dev_ctx, x, y, &dense_out);
return dense_out; return dense_out;
...@@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx, ...@@ -163,10 +163,10 @@ DenseTensor Sum(const Context& dev_ctx,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
DataType dtype, DataType dtype,
bool keep_dim) { bool keep_dim) {
auto out_meta = ReduceInferMeta(x.meta(), axis, keep_dim, dtype); auto out_meta = SumInferMeta(x.meta(), axis, dtype, keep_dim);
auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta)); auto dense_out = pten::Empty<T, Context>(dev_ctx, std::move(out_meta));
SumKernel<T, Context>(dev_ctx, x, axis, keep_dim, dtype, &dense_out); SumKernel<T, Context>(dev_ctx, x, axis, dtype, keep_dim, &dense_out);
return dense_out; return dense_out;
} }
......
...@@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx, ...@@ -31,9 +31,8 @@ void ReshapeKernel(const Context& dev_ctx,
out->ResizeAndAllocate(out_meta.dims); out->ResizeAndAllocate(out_meta.dims);
return; return;
} }
out->set_meta(out_meta);
out->Resize(x.dims()); out->mutable_data(dev_ctx.GetPlace());
out->mutable_data(x.place());
pten::Copy(dev_ctx, x, false, out); pten::Copy(dev_ctx, x, false, out);
out->Resize(out_meta.dims); out->Resize(out_meta.dims);
out->ResetLoD(x.lod()); out->ResetLoD(x.lod());
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ElementwiseInferMeta func : ElementwiseInferMeta
param : [x, y, -1] param : [x, y]
kernel : kernel :
func : add func : add
...@@ -40,7 +40,7 @@ ...@@ -40,7 +40,7 @@
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ElementwiseInferMeta func : ElementwiseInferMeta
param : [x, y, -1] param : [x, y]
kernel : kernel :
func : divide func : divide
...@@ -135,7 +135,7 @@ ...@@ -135,7 +135,7 @@
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ElementwiseInferMeta func : ElementwiseInferMeta
param : [x, y, -1] param : [x, y]
kernel : kernel :
func : multiply func : multiply
...@@ -166,19 +166,19 @@ ...@@ -166,19 +166,19 @@
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ElementwiseInferMeta func : ElementwiseInferMeta
param : [x, y, -1] param : [x, y]
kernel : kernel :
func : subtract func : subtract
- api : sum - api : sum
args : (const Tensor& x, const std::vector<int64_t>& axis={}, DataType dtype=DataType::UNDEFINED, bool keep_dim=false) args : (const Tensor& x, const std::vector<int64_t>& axis={}, DataType dtype=DataType::UNDEFINED, bool keep_dim=false)
output : Tensor output : Tensor
infer_meta : infer_meta :
func : ReduceInferMeta func : SumInferMeta
param: [x, axis, keep_dim, dtype] param: [x, axis, dtype, keep_dim]
kernel : kernel :
func : sum func : sum
param : [x, axis, keep_dim, dtype] param : [x, axis, dtype, keep_dim]
data_type : x data_type : x
- api : zeros_like - api : zeros_like
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册