未验证 提交 bc1c3e3e 编写于 作者: Z zyfncg 提交者: GitHub

Create Tensor by paddle::empty in custom operator (#41840)

* create tensor by empty in custom op

* fix some bug
上级 694ac20f
......@@ -45,7 +45,7 @@ yaml_types_mapping = {
'int' : 'int', 'int32_t' : 'int32_t', 'int64_t' : 'int64_t', 'size_t' : 'size_t', \
'float' : 'float', 'double' : 'double', 'bool' : 'bool', \
'str' : 'std::string', \
'Place' : 'paddle::experimental::Place', 'DataLayout' : 'paddle::experimental::DataLayout', 'DataType' : 'paddle::experimental::DataType', \
'Place' : 'paddle::Place', 'DataLayout' : 'paddle::experimental::DataLayout', 'DataType' : 'paddle::experimental::DataType', \
'int64_t[]' : 'std::vector<int64_t>', 'int[]' : 'std::vector<int>',
'Tensor' : 'Tensor',
'Tensor[]' : 'std::vector<Tensor>',
......
......@@ -46,7 +46,7 @@ atype_to_parsing_function = {
"std::vector<std::string>": "CastPyArg2Strings",
"paddle::experimental::Scalar": "CastPyArg2Scalar",
"paddle::experimental::IntArray": "CastPyArg2IntArray",
"paddle::experimental::Place": "CastPyArg2Place",
"paddle::Place": "CastPyArg2Place",
"paddle::experimental::DataType": "CastPyArg2DataType",
}
......
......@@ -1151,15 +1151,13 @@ std::vector<paddle::framework::Scope*> GetScopePtrListFromArgs(
return result;
}
paddle::experimental::Place CastPyArg2Place(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos) {
paddle::Place CastPyArg2Place(PyObject* obj, const std::string& op_type,
ssize_t arg_pos) {
return CastPyArg2Place(obj, arg_pos);
}
paddle::experimental::DataType CastPyArg2DataType(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos) {
paddle::DataType CastPyArg2DataType(PyObject* obj, const std::string& op_type,
ssize_t arg_pos) {
if (obj == Py_None) {
PADDLE_THROW(platform::errors::InvalidArgument(
"%s(): argument (position %d) must be "
......
......@@ -162,13 +162,11 @@ paddle::experimental::IntArray CastPyArg2IntArray(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos);
paddle::experimental::Place CastPyArg2Place(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos);
paddle::Place CastPyArg2Place(PyObject* obj, const std::string& op_type,
ssize_t arg_pos);
paddle::experimental::DataType CastPyArg2DataType(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos);
paddle::DataType CastPyArg2DataType(PyObject* obj, const std::string& op_type,
ssize_t arg_pos);
paddle::optional<const paddle::experimental::Tensor&> GetOptionalTensorFromArgs(
const std::string& op_type, const std::string& arg_name, PyObject* args,
......
......@@ -213,9 +213,6 @@ std::ostream& operator<<(std::ostream&, const Place&);
namespace paddle {
namespace experimental {
using AllocationType = phi::AllocationType;
using Place = phi::Place;
using CPUPlace = phi::CPUPlace;
using GPUPlace = phi::GPUPlace;
using GPUPinnedPlace = phi::GPUPinnedPlace;
using XPUPlace = phi::XPUPlace;
using NPUPlace = phi::NPUPlace;
......
......@@ -37,13 +37,11 @@ namespace tests {
// TODO(chenweihang): Remove this test after the API is used in the dygraph
TEST(API, data_transform_same_place) {
// 1. create tensor
auto x = paddle::experimental::full({3, 3},
1.0,
experimental::DataType::COMPLEX128,
experimental::CPUPlace());
auto x =
paddle::experimental::full({3, 3}, 1.0, DataType::COMPLEX128, CPUPlace());
auto y = paddle::experimental::full(
{3, 3}, 2.0, experimental::DataType::FLOAT32, experimental::CPUPlace());
auto y =
paddle::experimental::full({3, 3}, 2.0, DataType::FLOAT32, CPUPlace());
std::vector<phi::dtype::complex<double>> sum(9, 6.0);
......@@ -75,10 +73,10 @@ TEST(API, data_transform_same_place) {
TEST(Tensor, data_transform_diff_place) {
// 1. create tensor
auto x = paddle::experimental::full(
{3, 3}, 1.0, experimental::DataType::FLOAT64, experimental::CPUPlace());
{3, 3}, 1.0, experimental::DataType::FLOAT64, CPUPlace());
auto y = paddle::experimental::full(
{3, 3}, 2.0, experimental::DataType::FLOAT64, experimental::GPUPlace());
{3, 3}, 2.0, experimental::DataType::FLOAT64, GPUPlace());
std::vector<float> sum(9, 6.0);
......@@ -93,10 +91,9 @@ TEST(Tensor, data_transform_diff_place) {
ASSERT_EQ(out.dtype(), phi::DataType::FLOAT64);
ASSERT_EQ(out.layout(), phi::DataLayout::NCHW);
ASSERT_EQ(out.initialized(), true);
ASSERT_EQ(out.impl()->place(),
phi::TransToPhiPlace(experimental::Backend::GPU));
ASSERT_EQ(out.impl()->place(), phi::TransToPhiPlace(phi::Backend::GPU));
auto ref_out = experimental::copy_to(out, experimental::CPUPlace(), true);
auto ref_out = experimental::copy_to(out, CPUPlace(), true);
auto dense_out = std::dynamic_pointer_cast<phi::DenseTensor>(ref_out.impl());
for (size_t i = 0; i < 9; i++) {
......
......@@ -30,7 +30,7 @@ namespace tests {
TEST(API, scale) {
auto x = experimental::full(
{3, 4}, 1.0, experimental::DataType::FLOAT32, experimental::CPUPlace());
{3, 4}, 1.0, experimental::DataType::FLOAT32, CPUPlace());
const size_t cycles = 300;
phi::tests::Timer timer;
......
......@@ -22,8 +22,7 @@
std::vector<paddle::Tensor> ContextPoolTest(const paddle::Tensor& x) {
// 1. test cpu context
paddle::experimental::Place cpu_place(
paddle::experimental::AllocationType::CPU);
paddle::Place cpu_place(paddle::experimental::AllocationType::CPU);
auto* cpu_ctx =
paddle::experimental::DeviceContextPool::Instance()
.Get<paddle::experimental::AllocationType::CPU>(cpu_place);
......@@ -34,8 +33,7 @@ std::vector<paddle::Tensor> ContextPoolTest(const paddle::Tensor& x) {
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
// 2. test gpu context
paddle::experimental::Place gpu_place(
paddle::experimental::AllocationType::GPU);
paddle::Place gpu_place(paddle::experimental::AllocationType::GPU);
auto* gpu_ctx =
paddle::experimental::DeviceContextPool::Instance()
.Get<paddle::experimental::AllocationType::GPU>(gpu_place);
......
......@@ -75,7 +75,7 @@ std::vector<paddle::Tensor> ConcatForwardDynamicAxis(
auto out_shape = ComputeOutShape(in_shapes, axis);
// create output
auto out = paddle::Tensor(paddle::PlaceType::kCPU, out_shape);
auto out = paddle::empty(out_shape, inputs[0].type(), paddle::CPUPlace());
// calc
PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(
......@@ -106,7 +106,7 @@ std::vector<paddle::Tensor> ConcatBackwardDynamicAxis(
// create outputs
std::vector<paddle::Tensor> grad_inputs;
for (auto& t : inputs) {
auto grad = paddle::Tensor(paddle::PlaceType::kCPU, t.shape());
auto grad = paddle::empty(t.shape(), t.dtype(), t.place());
grad_inputs.emplace_back(grad);
}
......@@ -161,7 +161,7 @@ std::vector<paddle::Tensor> ConcatForwardStaticAxis(
auto out_shape = ComputeOutShape(in_shapes, final_axis);
// create output
auto out = paddle::Tensor(paddle::PlaceType::kCPU, out_shape);
auto out = paddle::empty(out_shape, inputs[0].type(), paddle::CPUPlace());
// calc
PD_DISPATCH_FLOATING_AND_INTEGRAL_TYPES(
......@@ -190,7 +190,7 @@ std::vector<paddle::Tensor> ConcatBackwardStaticAxis(
// create outputs
std::vector<paddle::Tensor> grad_inputs;
for (auto& t : inputs) {
auto grad = paddle::Tensor(paddle::PlaceType::kCPU, t.shape());
auto grad = paddle::empty(t.shape(), t.dtype(), t.place());
grad_inputs.emplace_back(grad);
}
......
......@@ -71,7 +71,7 @@ void ConjCPUKernel(const data_t* x_data, int64_t numel, data_t* out_data) {
std::vector<paddle::Tensor> ConjFunction(const paddle::Tensor& x) {
CHECK_INPUT(x);
paddle::Tensor out(x.place(), x.shape());
paddle::Tensor out = paddle::empty(x.shape(), x.dtype(), x.place());
PD_DISPATCH_FLOATING_AND_COMPLEX_TYPES(
x.type(), "ConjCPUKernel", ([&] {
......
......@@ -54,7 +54,7 @@ void relu_cpu_double_backward_kernel(const data_t* out_data,
}
std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape());
auto out = paddle::empty(x.shape(), x.dtype(), x.place());
PD_DISPATCH_FLOATING_TYPES(
x.type(), "relu_cpu_forward", ([&] {
......@@ -68,7 +68,7 @@ std::vector<paddle::Tensor> relu_cpu_forward(const paddle::Tensor& x) {
std::vector<paddle::Tensor> relu_cpu_backward(const paddle::Tensor& x,
const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, x.shape());
auto grad_x = paddle::empty(x.shape(), x.dtype(), x.place());
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] {
relu_cpu_backward_kernel<data_t>(
......@@ -85,7 +85,7 @@ std::vector<paddle::Tensor> relu_cpu_double_backward(
const paddle::Tensor& out, const paddle::Tensor& ddx) {
CHECK_CPU_INPUT(out);
CHECK_CPU_INPUT(ddx);
auto ddout = paddle::Tensor(paddle::PlaceType::kCPU, out.shape());
auto ddout = paddle::empty(out.shape(), out.dtype(), out.place());
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_double_backward", ([&] {
relu_cpu_double_backward_kernel<data_t>(
......@@ -165,7 +165,7 @@ PD_BUILD_DOUBLE_GRAD_OP(custom_relu)
std::vector<paddle::Tensor> relu_cpu_backward_without_x(
const paddle::Tensor& out, const paddle::Tensor& grad_out) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, out.shape());
auto grad_x = paddle::empty(out.shape(), out.dtype(), out.place());
PD_DISPATCH_FLOATING_TYPES(out.type(), "relu_cpu_backward", ([&] {
relu_cpu_backward_kernel<data_t>(
......
......@@ -54,7 +54,7 @@ __global__ void relu_cuda_double_backward_kernel(const data_t* out_data,
std::vector<paddle::Tensor> relu_cuda_forward(const paddle::Tensor& x) {
CHECK_GPU_INPUT(x);
auto out = paddle::Tensor(paddle::PlaceType::kGPU, x.shape());
auto out = paddle::empty(x.shape(), x.dtype(), x.place());
int numel = x.size();
int block = 512;
......@@ -74,7 +74,7 @@ std::vector<paddle::Tensor> relu_cuda_backward(const paddle::Tensor& x,
CHECK_GPU_INPUT(x);
CHECK_GPU_INPUT(out);
CHECK_GPU_INPUT(grad_out);
auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU, x.shape());
auto grad_x = paddle::empty(x.shape(), x.dtype(), x.place());
int numel = out.size();
int block = 512;
......@@ -95,7 +95,7 @@ std::vector<paddle::Tensor> relu_cuda_double_backward(
const paddle::Tensor& out, const paddle::Tensor& ddx) {
CHECK_GPU_INPUT(out);
CHECK_GPU_INPUT(ddx);
auto ddout = paddle::Tensor(paddle::PlaceType::kGPU, out.shape());
auto ddout = paddle::empty(out.shape(), out.dtype(), out.place());
int64_t numel = out.size();
int64_t block = 512;
......@@ -117,7 +117,7 @@ std::vector<paddle::Tensor> relu_cuda_double_backward(
std::vector<paddle::Tensor> relu_cuda_backward_without_x(
const paddle::Tensor& out, const paddle::Tensor& grad_out) {
auto grad_x = paddle::Tensor(paddle::PlaceType::kGPU, out.shape());
auto grad_x = paddle::empty(out.shape(), out.dtype(), out.place());
int numel = out.size();
int block = 512;
......
......@@ -68,7 +68,7 @@ void tanh_cpu_double_backward_kernel(const data_t* out_data,
std::vector<paddle::Tensor> TanhForward(const paddle::Tensor& x) {
CHECK_CPU_INPUT(x);
auto out = paddle::Tensor(paddle::PlaceType::kCPU, x.shape());
auto out = paddle::empty(x.shape(), x.dtype(), x.place());
PD_DISPATCH_FLOATING_TYPES(
x.dtype(), "tanh_cpu_forward", ([&] {
......@@ -82,7 +82,7 @@ std::vector<paddle::Tensor> TanhForward(const paddle::Tensor& x) {
std::vector<paddle::Tensor> TanhBackward(const paddle::Tensor& out,
const paddle::Tensor& grad_out) {
CHECK_CPU_INPUT(out);
auto grad_x = paddle::Tensor(paddle::PlaceType::kCPU, out.shape());
auto grad_x = paddle::empty(out.shape(), out.dtype(), out.place());
PD_DISPATCH_FLOATING_TYPES(out.dtype(), "tanh_cpu_backward", ([&] {
tanh_cpu_backward_kernel<data_t>(
......@@ -101,8 +101,8 @@ std::vector<paddle::Tensor> TanhDoubleBackward(const paddle::Tensor& out,
CHECK_CPU_INPUT(out);
CHECK_CPU_INPUT(ddx);
CHECK_CPU_INPUT(dout);
auto dout_new = paddle::Tensor(paddle::PlaceType::kCPU, out.shape());
auto ddout = paddle::Tensor(paddle::PlaceType::kCPU, out.shape());
auto dout_new = paddle::empty(out.shape(), out.dtype(), out.place());
auto ddout = paddle::empty(out.shape(), out.dtype(), out.place());
PD_DISPATCH_FLOATING_TYPES(out.dtype(), "tanh_cpu_double_backward", ([&] {
tanh_cpu_double_backward_kernel<data_t>(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册