【PTen】Add empty and empty_like kernel in pten (#38334)

* add empty and empty_like kernel in pten * add empty dev_api

【PTen】Add empty and empty_like kernel in pten (#38334)
* add empty and empty_like kernel in pten * add empty dev_api
4221cd33 · zyfncg · GitHub · 15ad7ee4 · 4221cd33 · 4221cd33
12 changed file
--- a/paddle/fluid/operators/empty_op.cc
+++ b/paddle/fluid/operators/empty_op.cc
@@ -109,6 +109,20 @@ class EmptyOp : public framework::OperatorWithKernel {
        framework::proto::VarType::Type(context.Attr<int>("dtype")),
        context.GetPlace());
  }
+  framework::KernelSignature GetExpectedPtenKernelArgs(
+      const framework::ExecutionContext& ctx) const override {
+    std::string shape;
+    if (ctx.HasInput("ShapeTensor")) {
+      shape = "ShapeTensor";
+    } else if (ctx.MultiInput<framework::Tensor>("ShapeTensorList").size()) {
+      shape = "ShapeTensorList";
+    } else {
+      shape = "shape";
+    }
+    return framework::KernelSignature("empty", {}, {shape}, {"Out"});
+  }
 };
 class EmptyOpVarTypeInference : public framework::VarTypeInference {

--- a/paddle/pten/api/include/kernel_signature.h
+++ b/paddle/pten/api/include/kernel_signature.h
@@ -50,6 +50,11 @@ using dot_kernel = void (*)(const DeviceContext&,
 using flatten_kernel =
    void (*)(const DeviceContext&, const DenseTensor&, int, int, DenseTensor*);
+using empty_kernel = void (*)(const DeviceContext&,
+                              const ScalarArray&,
+                              DenseTensor*);
+using empty_like_kernel = void (*)(const DeviceContext&, DenseTensor*);
 using full_kernel = void (*)(const DeviceContext&,
                             const ScalarArray&,
                             const Scalar&,

--- a/paddle/pten/include/creation.h
+++ b/paddle/pten/include/creation.h
@@ -16,12 +16,60 @@
 #include "paddle/pten/api/lib/utils/storage.h"
 #include "paddle/pten/include/infermeta.h"
+#include "paddle/pten/kernels/empty_kernel.h"
 #include "paddle/pten/kernels/full_kernel.h"
 namespace pten {
 // TODO(YuanRisheng) This function name should be same as User API name.
 // TODO(zyfncg) Automatic code generation
+template <typename T, typename ContextT>
+DenseTensor Empty(const ContextT& dev_ctx,
+                  const ScalarArray& shape,
+                  DataType dtype = DataType::FLOAT32,
+                  Backend backend = Backend::CPU,  // Is backend needed here?
+                  DataLayout layout = DataLayout::NCHW) {
+  auto out_meta = CreateInferMeta(shape, dtype, layout);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
+  Empty<T, ContextT>(dev_ctx, shape, &dense_out);
+  return dense_out;
+}
+template <typename T, typename ContextT>
+DenseTensor EmptyLike(
+    const ContextT& dev_ctx,
+    const DenseTensor& x,
+    DataType dtype = DataType::UNDEFINED,
+    Backend backend = Backend::UNDEFINED,  // Is backend needed here?
+    DataLayout layout = DataLayout::UNDEFINED) {
+  auto out_meta = CreateLikeInferMeta(x.meta(), dtype, layout);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
+  EmptyLike<T, ContextT>(dev_ctx, &dense_out);
+  return dense_out;
+}
+template <typename T, typename ContextT>
+DenseTensor Full(const ContextT& dev_ctx,
+                 const ScalarArray& shape,
+                 const Scalar& val,
+                 DataType dtype = DataType::FLOAT32,
+                 Backend backend = Backend::CPU,  // Is backend needed here?
+                 DataLayout layout = DataLayout::NCHW) {
+  auto out_meta = CreateInferMeta(shape, dtype, layout);
+  pten::DenseTensor dense_out(
+      pten::make_intrusive<paddle::experimental::SharedStorage>(
+          dev_ctx.GetPlace()),
+      std::move(out_meta));
+  Full<T, ContextT>(dev_ctx, shape, val, &dense_out);
+  return dense_out;
+}
 template <typename T, typename ContextT>
 DenseTensor FullLike(
    const ContextT& dev_ctx,

--- a/paddle/pten/kernels/empty_kernel.cc
+++ b/paddle/pten/kernels/empty_kernel.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/pten/kernels/empty_kernel.h"
+#include "paddle/pten/backends/all_context.h"
+#include "paddle/pten/core/kernel_registry.h"
+namespace pten {
+template <typename T, typename ContextT>
+void Empty(const ContextT& dev_ctx,
+           const ScalarArray& shape,
+           DenseTensor* out) {
+  out->Resize(paddle::framework::make_ddim(shape.GetData()));
+}
+template <typename T, typename ContextT>
+void EmptyLike(const ContextT& dev_ctx, DenseTensor* out) {
+  out->mutable_data<T>();
+}
+}  // namespace pten
+PT_REGISTER_CTX_KERNEL(empty,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::Empty,
+                       bool,
+                       int,
+                       int64_t,
+                       float,
+                       double,
+                       paddle::platform::float16) {}
+PT_REGISTER_CTX_KERNEL(empty_like,
+                       CPU,
+                       ALL_LAYOUT,
+                       pten::EmptyLike,
+                       bool,
+                       int,
+                       int64_t,
+                       float,
+                       double,
+                       paddle::platform::float16) {}
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PT_REGISTER_CTX_KERNEL(empty,
+                       GPU,
+                       ALL_LAYOUT,
+                       pten::Empty,
+                       bool,
+                       int,
+                       int64_t,
+                       float,
+                       double,
+                       paddle::platform::float16) {}
+PT_REGISTER_CTX_KERNEL(empty_like,
+                       GPU,
+                       ALL_LAYOUT,
+                       pten::EmptyLike,
+                       bool,
+                       int,
+                       int64_t,
+                       float,
+                       double,
+                       paddle::platform::float16) {}
+#endif
--- a/paddle/pten/kernels/empty_kernel.h
+++ b/paddle/pten/kernels/empty_kernel.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+#pragma once
+#include "paddle/pten/common/scalar_array.h"
+#include "paddle/pten/core/dense_tensor.h"
+namespace pten {
+template <typename T, typename ContextT>
+void Empty(const ContextT& dev_ctx, const ScalarArray& shape, DenseTensor* out);
+template <typename T, typename ContextT>
+void EmptyLike(const ContextT& dev_ctx, DenseTensor* out);
+}  // namespace pten
--- a/paddle/pten/kernels/full_kernel.h
+++ b/paddle/pten/kernels/full_kernel.h
@@ -14,7 +14,6 @@
 #pragma once
-#include "paddle/pten/backends/cpu/cpu_context.h"
 #include "paddle/pten/common/scalar.h"
 #include "paddle/pten/common/scalar_array.h"
 #include "paddle/pten/core/dense_tensor.h"

--- a/paddle/pten/tests/api/CMakeLists.txt
+++ b/paddle/pten/tests/api/CMakeLists.txt
@@ -12,6 +12,7 @@ cc_test(test_framework_place_utils storage SRCS test_place_utils.cc DEPS pten_ap
 cc_test(test_mean_api SRCS test_mean_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_dot_api SRCS test_dot_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_matmul_api SRCS test_matmul_api.cc DEPS pten_tensor pten_api pten_api_utils)
+cc_test(test_empty_api SRCS test_empty_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_fill_api SRCS test_fill_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_flatten_api SRCS test_flatten_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_elementwise_api SRCS test_elementwise_api.cc DEPS pten_tensor pten_api pten_api_utils)

--- a/paddle/pten/tests/api/test_empty_api.cc
+++ b/paddle/pten/tests/api/test_empty_api.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <gtest/gtest.h>
+#include <memory>
+#include "paddle/pten/api/include/api.h"
+#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/core/dense_tensor.h"
+#include "paddle/pten/core/kernel_registry.h"
+namespace paddle {
+namespace tests {
+namespace framework = paddle::framework;
+using DDim = paddle::framework::DDim;
+// TODO(chenweihang): Remove this test after the API is used in the dygraph
+TEST(API, empty_like) {
+  // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
+  auto dense_x = std::make_shared<pten::DenseTensor>(
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                            framework::make_ddim({3, 2}),
+                            pten::DataLayout::NCHW));
+  paddle::experimental::Tensor x(dense_x);
+  // 2. test API
+  auto out = paddle::experimental::empty_like(x, pten::DataType::FLOAT32);
+  // 3. check result
+  ASSERT_EQ(out.dims().size(), 2);
+  ASSERT_EQ(out.dims()[0], 3);
+  ASSERT_EQ(out.numel(), 6);
+  ASSERT_EQ(out.is_cpu(), true);
+  ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
+  ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
+  ASSERT_EQ(out.initialized(), true);
+}
+TEST(API, empty1) {
+  // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
+  auto dense_shape = std::make_shared<pten::DenseTensor>(
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::INT64,
+                            framework::make_ddim({2}),
+                            pten::DataLayout::NCHW));
+  auto* shape_data = dense_shape->mutable_data<int64_t>();
+  shape_data[0] = 2;
+  shape_data[1] = 3;
+  paddle::experimental::Tensor tensor_shape(dense_shape);
+  // 2. test API
+  auto out = paddle::experimental::empty(tensor_shape, pten::DataType::FLOAT32);
+  // 3. check result
+  ASSERT_EQ(out.shape().size(), 2UL);
+  ASSERT_EQ(out.shape()[0], 2);
+  ASSERT_EQ(out.numel(), 6);
+  ASSERT_EQ(out.is_cpu(), true);
+  ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
+  ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
+  ASSERT_EQ(out.initialized(), true);
+}
+TEST(API, empty2) {
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
+  auto dense_scalar = std::make_shared<pten::DenseTensor>(
+      alloc,
+      pten::DenseTensorMeta(pten::DataType::INT32,
+                            framework::make_ddim({1}),
+                            pten::DataLayout::NCHW));
+  dense_scalar->mutable_data<int32_t>()[0] = 2;
+  paddle::experimental::Tensor shape_scalar1(dense_scalar);
+  paddle::experimental::Tensor shape_scalar2(dense_scalar);
+  std::vector<paddle::experimental::Tensor> list_shape{shape_scalar1,
+                                                       shape_scalar2};
+  auto out = paddle::experimental::empty(list_shape, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.shape().size(), 2UL);
+  ASSERT_EQ(out.shape()[0], 2);
+  ASSERT_EQ(out.numel(), 4);
+  ASSERT_EQ(out.is_cpu(), true);
+  ASSERT_EQ(out.type(), pten::DataType::FLOAT32);
+  ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
+  ASSERT_EQ(out.initialized(), true);
+}
+TEST(API, empty3) {
+  std::vector<int64_t> vector_shape{2, 3};
+  auto out = paddle::experimental::empty(vector_shape, pten::DataType::INT32);
+  ASSERT_EQ(out.shape().size(), 2UL);
+  ASSERT_EQ(out.shape()[0], 2);
+  ASSERT_EQ(out.numel(), 6);
+  ASSERT_EQ(out.is_cpu(), true);
+  ASSERT_EQ(out.type(), pten::DataType::INT32);
+  ASSERT_EQ(out.layout(), pten::DataLayout::NCHW);
+  ASSERT_EQ(out.initialized(), true);
+}
+}  // namespace tests
+}  // namespace paddle
--- a/paddle/pten/tests/kernels/CMakeLists.txt
+++ b/paddle/pten/tests/kernels/CMakeLists.txt
 cc_test(test_copy_dev_api SRCS test_copy_dev_api.cc DEPS pten pten_api_utils)
 cc_test(test_dot_dev_api SRCS test_dot_dev_api.cc DEPS pten pten_api_utils)
-cc_test(test_fill_dev_api SRCS test_fill_dev_api.cc DEPS pten pten_api_utils)
+cc_test(test_creation_dev_api SRCS test_creation_dev_api.cc DEPS pten pten_api_utils)
 cc_test(test_flatten_dev_api SRCS test_flatten_dev_api.cc DEPS pten pten_api_utils)
 cc_test(test_mean_dev_api SRCS test_mean_dev_api.cc DEPS pten pten_api_utils)
 cc_test(test_scale_dev_api SRCS test_scale_dev_api.cc DEPS pten pten_api_utils)

--- a/paddle/pten/tests/kernels/test_fill_dev_api.cc
+++ b/paddle/pten/tests/kernels/test_fill_dev_api.cc
@@ -27,7 +27,82 @@ namespace tests {
 namespace framework = paddle::framework;
 using DDim = paddle::framework::DDim;
-TEST(DEV_API, fill_any_like) {
+TEST(DEV_API, empty) {
+  // 1. create input
+  paddle::platform::DeviceContextPool& pool =
+      paddle::platform::DeviceContextPool::Instance();
+  auto* dev_ctx = pool.Get(paddle::platform::CPUPlace());
+  // 2. test API
+  auto out = pten::Empty<float>(
+      *(static_cast<paddle::platform::CPUDeviceContext*>(dev_ctx)),
+      {3, 2},
+      pten::DataType::INT32);
+  // 3. check result
+  ASSERT_EQ(out.dims().size(), 2);
+  ASSERT_EQ(out.dims()[0], 3);
+  ASSERT_EQ(out.numel(), 6);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::INT32);
+  ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
+}
+TEST(DEV_API, empty_like) {
+  // 1. create tensor
+  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
+      paddle::platform::CPUPlace());
+  pten::DenseTensor dense_x(alloc,
+                            pten::DenseTensorMeta(pten::DataType::FLOAT32,
+                                                  framework::make_ddim({3, 2}),
+                                                  pten::DataLayout::NCHW));
+  auto* dense_x_data = dense_x.mutable_data<float>();
+  dense_x_data[0] = 0;
+  paddle::platform::DeviceContextPool& pool =
+      paddle::platform::DeviceContextPool::Instance();
+  auto* dev_ctx = pool.Get(paddle::platform::CPUPlace());
+  // 2. test API
+  auto out = pten::EmptyLike<float>(
+      *(static_cast<paddle::platform::CPUDeviceContext*>(dev_ctx)), dense_x);
+  // 3. check result
+  ASSERT_EQ(out.dims().size(), 2);
+  ASSERT_EQ(out.dims()[0], 3);
+  ASSERT_EQ(out.numel(), 6);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
+}
+TEST(DEV_API, full) {
+  // 1. create input
+  float val = 1.0;
+  paddle::platform::DeviceContextPool& pool =
+      paddle::platform::DeviceContextPool::Instance();
+  auto* dev_ctx = pool.Get(paddle::platform::CPUPlace());
+  // 2. test API
+  auto out = pten::Full<float>(
+      *(static_cast<paddle::platform::CPUDeviceContext*>(dev_ctx)),
+      {3, 2},
+      val,
+      pten::DataType::FLOAT32);
+  // 3. check result
+  ASSERT_EQ(out.dims().size(), 2);
+  ASSERT_EQ(out.dims()[0], 3);
+  ASSERT_EQ(out.numel(), 6);
+  ASSERT_EQ(out.meta().dtype, pten::DataType::FLOAT32);
+  ASSERT_EQ(out.meta().layout, pten::DataLayout::NCHW);
+  auto* actual_result = out.data<float>();
+  for (auto i = 0; i < 6; i++) {
+    ASSERT_NEAR(actual_result[i], val, 1e-6f);
+  }
+}
+TEST(DEV_API, full_like) {
  // 1. create tensor
  const auto alloc = std::make_shared<paddle::experimental::DefaultAllocator>(
      paddle::platform::CPUPlace());

--- a/python/paddle/utils/code_gen/api.yaml
+++ b/python/paddle/utils/code_gen/api.yaml
@@ -36,6 +36,32 @@
  kernel : 
    func : dot
+- api : empty
+  args : (const ScalarArray& shape, DataType dtype=DataType::FLOAT32, Backend place=Backend::CPU, DataLayout layout=DataLayout::NCHW)
+  output: Tensor
+  infer_meta : 
+    func : CreateInferMeta
+    param : [shape, dtype, layout]
+  kernel : 
+    func : empty
+    param : [shape]
+    data_type : dtype
+    backend : place
+    layout : layout
+- api : empty_like
+  args : (const Tensor& x, DataType dtype = DataType::UNDEFINED, Backend place = Backend::UNDEFINED, DataLayout layout = DataLayout::UNDEFINED)
+  output: Tensor
+  infer_meta : 
+    func : CreateLikeInferMeta
+    param : [x, dtype, layout]
+  kernel : 
+    func : empty_like
+    param : []
+    data_type : dtype > x
+    backend : place > x
+    layout : layout > x
 - api : flatten
  args : (const Tensor& x, int start_axis, int stop_axis)
  output : Tensor

--- a/python/paddle/utils/code_gen/api_gen.py
+++ b/python/paddle/utils/code_gen/api_gen.py
@@ -43,12 +43,11 @@ class API:
            if 'data_type' not in self.kernel or len(self.kernel[
                    'data_type']) == 0:
                self.kernel['data_type'] = None
-            if 'param' not in self.kernel or len(self.kernel['param']) == 0:
+            if 'param' not in self.kernel:
                self.kernel['param'] = None
            self.infer_meta = api_item_yaml['infer_meta']
-            if 'param' not in self.infer_meta or len(self.infer_meta[
+            if 'param' not in self.infer_meta:
-                    'param']) == 0:
                self.infer_meta['param'] = None
    def parse_args(self, args_str):