add scale api and test (#37683)

0c8b9994 · Chen Weihang · GitHub · 5f916c37 · 0c8b9994 · 0c8b9994
10 changed file
--- a/paddle/fluid/operators/scale_op.cc
+++ b/paddle/fluid/operators/scale_op.cc
@@ -75,14 +75,14 @@ class ScaleOp : public framework::OperatorWithKernel {
      const framework::ExecutionContext &ctx) const override {
    if (ctx.InputVar("X")->IsType<framework::LoDTensor>() ||
        ctx.InputVar("X")->IsType<framework::Tensor>()) {
+      std::string scale_attr;
      if (ctx.HasInput("ScaleTensor")) {
-        return framework::KernelSignature("scale.host", {"X", "ScaleTensor"},
+        scale_attr = "ScaleTensor";
-                                          {"bias", "bias_after_scale"},
-                                          {"Out"});
      } else {
-        return framework::KernelSignature(
+        scale_attr = "scale";
-            "scale", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"});
      }
+      return framework::KernelSignature(
+          "scale", {"X"}, {scale_attr, "bias", "bias_after_scale"}, {"Out"});
    }
    // TODO(chenweihang): support other cases after selected rows added
    return framework::KernelSignature("scale.unregistered", {}, {}, {});

--- a/paddle/pten/api/include/math.h
+++ b/paddle/pten/api/include/math.h
@@ -15,16 +15,11 @@ limitations under the License. */
 #pragma once
 #include "paddle/pten/api/include/tensor.h"
+#include "paddle/pten/common/scalar.h"
 namespace paddle {
 namespace experimental {
-// TODO(chenweihang): add scale API
-// TODO(chenweihang): move mean API into stat.h/cc
-PD_DLL_DECL Tensor mean(const Tensor& x,
-                        const std::vector<int64_t>& axis,
-                        bool keep_dim);
 PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y);
 PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y);
@@ -33,10 +28,21 @@ PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y);
 PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y);
+// TODO(chenweihang): move mean API into stat.h/cc
+PD_DLL_DECL Tensor mean(const Tensor& x,
+                        const std::vector<int64_t>& axis,
+                        bool keep_dim);
 PD_DLL_DECL Tensor sum(const Tensor& x,
                       const std::vector<int64_t>& axis,
                       DataType dtype,
                       bool keep_dim);
+// TODO(chenweihang): Follow-up discussion on the handling of `act` argument
+PD_DLL_DECL Tensor scale(const Tensor& x,
+                         const Scalar& scale,
+                         float bias,
+                         bool bias_after_scale);
 }  // namespace experimental
 }  // namespace paddle
--- a/paddle/pten/api/lib/math.cc
+++ b/paddle/pten/api/lib/math.cc
@@ -274,6 +274,45 @@ PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) {
  return out;
 }
+PD_DLL_DECL Tensor scale(const Tensor& x,
+                         const Scalar& scale,
+                         float bias,
+                         bool bias_after_scale) {
+  // 1. Get kernel signature and kernel
+  auto kernel_key_set = ParseKernelKeyByInputArgs(x);
+  auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
+  auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
+      "scale", kernel_key);
+  // 2. Get Device Context
+  auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
+  auto kernel_context = pten::KernelContext(dev_ctx);
+  // 3. Auto data transform
+  auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
+  kernel_context.EmplaceBackInput(dense_x);
+  kernel_context.EmplaceBackAttr(pten::Scalar(scale));
+  kernel_context.EmplaceBackAttr(bias);
+  kernel_context.EmplaceBackAttr(bias_after_scale);
+  // 4. InferMeta
+  auto out_meta = UnchangedInferMeta(dense_x->meta());
+  // 5. Prepare outputs
+  Tensor out;
+  const auto allocator = std::make_shared<DefaultAllocator>(
+      pten::TransToFluidPlace(kernel_key.backend()));
+  auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
+  kernel_context.EmplaceBackOutput(dense_out);
+  out.set_impl(dense_out);
+  // 6. Call kernel
+  kernel(&kernel_context);
+  return out;
+}
 }  // namespace experimental
 }  // namespace paddle

--- a/paddle/pten/include/math.h
+++ b/paddle/pten/include/math.h
@@ -78,7 +78,7 @@ DenseTensor Sum(const ContextT& dev_ctx,
 template <typename T, typename ContextT>
 DenseTensor Scale(const ContextT& dev_ctx,
                  const DenseTensor& x,
-                  float scale,
+                  const Scalar& scale,
                  float bias,
                  bool bias_after_scale) {
  auto out_meta = UnchangedInferMeta(x.meta());
@@ -90,21 +90,6 @@ DenseTensor Scale(const ContextT& dev_ctx,
  return dense_out;
 }
-template <typename T, typename ContextT>
-DenseTensor Scale(const ContextT& dev_ctx,
-                  const DenseTensor& x,
-                  const DenseTensor& scale,
-                  float bias,
-                  bool bias_after_scale) {
-  auto out_meta = UnchangedInferMeta(x.meta());
-  const auto allocator =
-      std::make_shared<paddle::experimental::DefaultAllocator>(
-          dev_ctx.GetPlace());
-  pten::DenseTensor dense_out(allocator, out_meta);
-  ScaleHost<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out);
-  return dense_out;
-}
 template <typename T, typename ContextT>
 DenseTensor Add(const ContextT& dev_ctx,
                const DenseTensor& x,

--- a/paddle/pten/kernels/cpu/math.cc
+++ b/paddle/pten/kernels/cpu/math.cc
@@ -50,28 +50,12 @@ void Mean(const CPUContext& dev_ctx,
 template <typename T>
 void Scale(const CPUContext& dev_ctx,
           const DenseTensor& x,
-           float scale,
+           const Scalar& scale,
           float bias,
           bool bias_after_scale,
           DenseTensor* out) {
-  eigen::Scale<CPUContext, T>(dev_ctx, x, scale, bias, bias_after_scale, out);
+  eigen::Scale<CPUContext, T>(
-}
+      dev_ctx, x, scale.to<float>(), bias, bias_after_scale, out);
-// TODO(chenweihang): now the ScaleTensor's dtype are same as x, so we cannot
-// register its dtype def
-template <typename T>
-void ScaleHost(const CPUContext& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& scale,
-               float bias,
-               bool bias_after_scale,
-               DenseTensor* out) {
-  eigen::Scale<CPUContext, T>(dev_ctx,
-                              x,
-                              static_cast<float>(*scale.data<T>()),
-                              bias,
-                              bias_after_scale,
-                              out);
 }
 template <typename T>
@@ -145,20 +129,7 @@ PT_REGISTER_KERNEL("scale",
                   int16_t,
                   int,
                   int64_t) {}
-PT_REGISTER_KERNEL("scale.host",
-                   CPU,
-                   ANY,
-                   pten::ScaleHost,
-                   float,
-                   double,
-                   paddle::platform::bfloat16,
-                   uint8_t,
-                   int8_t,
-                   int16_t,
-                   int,
-                   int64_t) {
-  kernel->InputAt(1).SetBackend(pten::Backend::CPU);
-}
 PT_REGISTER_KERNEL("elementwise_add",
                   CPU,
                   ANY,

--- a/paddle/pten/kernels/cpu/math.h
+++ b/paddle/pten/kernels/cpu/math.h
@@ -14,6 +14,7 @@ limitations under the License. */
 #pragma once
+#include "paddle/pten/common/scalar.h"
 #include "paddle/pten/core/dense_tensor.h"
 #include "paddle/pten/core/kernel_registry.h"
@@ -40,19 +41,11 @@ void Mean(const CPUContext& dev_ctx,
 template <typename T>
 void Scale(const CPUContext& dev_ctx,
           const DenseTensor& x,
-           float scale,
+           const Scalar& scale,
           float bias,
           bool bias_after_scale,
           DenseTensor* out);
-template <typename T>
-void ScaleHost(const CPUContext& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& scale,
-               float bias,
-               bool bias_after_scale,
-               DenseTensor* out);
 template <typename T>
 void ElementwiseAdd(const CPUContext& dev_ctx,
                    const DenseTensor& x,

--- a/paddle/pten/kernels/cuda/math.cu
+++ b/paddle/pten/kernels/cuda/math.cu
@@ -79,30 +79,12 @@ void Mean(const CUDAContext& dev_ctx,
 template <typename T>
 void Scale(const CUDAContext& dev_ctx,
           const DenseTensor& x,
-           float scale,
+           const Scalar& scale,
           float bias,
           bool bias_after_scale,
           DenseTensor* out) {
-  eigen::Scale<CUDAContext, T>(dev_ctx, x, scale, bias, bias_after_scale, out);
+  eigen::Scale<CUDAContext, T>(
-}
+      dev_ctx, x, scale.to<float>(), bias, bias_after_scale, out);
-template <typename T>
-void ScaleHost(const CUDAContext& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& scale,
-               float bias,
-               bool bias_after_scale,
-               DenseTensor* out) {
-  PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(scale.place()),
-                    false,
-                    paddle::platform::errors::InvalidArgument(
-                        "Scale argument isn't a host tensor."));
-  eigen::Scale<CUDAContext, T>(dev_ctx,
-                               x,
-                               static_cast<float>(*scale.data<T>()),
-                               bias,
-                               bias_after_scale,
-                               out);
 }
 // Create the definition of ElementwiseAdd
@@ -150,20 +132,6 @@ PT_REGISTER_KERNEL("scale",
                   int16_t,
                   int,
                   int64_t) {}
-PT_REGISTER_KERNEL("scale.host",
-                   CUDA,
-                   ANY,
-                   pten::ScaleHost,
-                   float,
-                   double,
-                   float16,
-                   uint8_t,
-                   int8_t,
-                   int16_t,
-                   int,
-                   int64_t) {
-  kernel->InputAt(1).SetBackend(pten::Backend::CPU);
-}
 PT_REGISTER_KERNEL("elementwise_add",
                   CUDA,
                   ANY,

--- a/paddle/pten/kernels/cuda/math.h
+++ b/paddle/pten/kernels/cuda/math.h
@@ -17,6 +17,7 @@ limitations under the License. */
 // CUDA and HIP use same api
 #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+#include "paddle/pten/common/scalar.h"
 #include "paddle/pten/core/dense_tensor.h"
 // See Note [ Why still include the fluid headers? ]
@@ -42,19 +43,11 @@ void Mean(const CUDAContext& dev_ctx,
 template <typename T>
 void Scale(const CUDAContext& dev_ctx,
           const DenseTensor& x,
-           float scale,
+           const Scalar& scale,
           float bias,
           bool bias_after_scale,
           DenseTensor* out);
-template <typename T>
-void ScaleHost(const CUDAContext& dev_ctx,
-               const DenseTensor& x,
-               const DenseTensor& scale,
-               float bias,
-               bool bias_after_scale,
-               DenseTensor* out);
 template <typename T>
 void ElementwiseAdd(const CUDAContext& dev_ctx,
                    const DenseTensor& x,

--- a/paddle/pten/tests/api/CMakeLists.txt
+++ b/paddle/pten/tests/api/CMakeLists.txt
@@ -20,3 +20,4 @@ cc_test(test_reshape_api SRCS test_reshape_api.cc DEPS pten_tensor pten_api pten
 cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils)
 cc_test(test_sum_api SRCS test_sum_api.cc DEPS pten_tensor pten_api pten_api_utils)
+cc_test(test_scale_api SRCS test_scale_api.cc DEPS pten_tensor pten_api pten_api_utils)
--- a/paddle/pten/tests/api/test_scale_api.cc
+++ b/paddle/pten/tests/api/test_scale_api.cc
+/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <gtest/gtest.h>
+#include <memory>
+#include "paddle/pten/api/include/creation.h"
+#include "paddle/pten/api/include/math.h"
+#include "paddle/pten/api/lib/utils/allocator.h"
+#include "paddle/pten/core/dense_tensor.h"
+namespace paddle {
+namespace tests {
+namespace framework = paddle::framework;
+using DDim = paddle::framework::DDim;
+void CheckScaleResult(Tensor* out) {
+  ASSERT_EQ(out->dims().size(), 2);
+  ASSERT_EQ(out->dims()[0], 3);
+  ASSERT_EQ(out->dims()[1], 4);
+  ASSERT_EQ(out->numel(), 12);
+  ASSERT_EQ(out->is_cpu(), true);
+  ASSERT_EQ(out->type(), pten::DataType::FLOAT32);
+  ASSERT_EQ(out->layout(), pten::DataLayout::NCHW);
+  ASSERT_EQ(out->initialized(), true);
+  for (int64_t i = 0; i < out->numel(); ++i) {
+    ASSERT_EQ(out->mutable_data<float>()[i], 3.0);
+  }
+}
+TEST(API, scale) {
+  // 1. check `scale` is float value
+  auto x = experimental::full({3, 4}, 1.0, pten::DataType::FLOAT32);
+  auto out1 = experimental::scale(x, 2.0, 1.0, true);
+  CheckScaleResult(&out1);
+  // 2. check `scale` is Tensor with shape [1]
+  auto scale = experimental::full({1}, 2.0, pten::DataType::FLOAT32);
+  auto out2 = experimental::scale(x, scale, 1.0, true);
+  CheckScaleResult(&out2);
+}
+}  // namespace tests
+}  // namespace paddle