[Phi] Move fill_constant_batch_size_like op kernel into phi (#40784)

* add full_batch_size_like phi kernel * remove fill constant bs like * update year

[Phi] Move fill_constant_batch_size_like op kernel into phi (#40784)
* add full_batch_size_like phi kernel * remove fill constant bs like * update year
b03ef424 · Chen Weihang · GitHub · 7a78aec7 · b03ef424 · 7a78aec7
5 changed file
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.cc
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/fluid/operators/fill_constant_batch_size_like_op.h"
 #include "paddle/fluid/operators/batch_size_like.h"

 namespace paddle {
@@ -23,9 +22,13 @@ class FillConstantBatchSizeLikeOp : public BatchSizeLikeOp {
  using BatchSizeLikeOp::BatchSizeLikeOp;
  framework::OpKernelType GetExpectedKernelType(
      const framework::ExecutionContext &ctx) const override {
-    return framework::OpKernelType(
+    framework::OpKernelType kernel_type = framework::OpKernelType(
        static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype")),
        ctx.device_context());
+    if (ctx.Attr<bool>("force_cpu")) {
+      kernel_type.place_ = platform::CPUPlace();
+    }
+    return kernel_type;
  }
 };

@@ -64,15 +67,3 @@ REGISTER_OPERATOR(
    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>,
    ops::FillConstantBatchSizeLikeOpMaker,
    ops::BatchSizeLikeNoNeedBufferVarsInferer);
-REGISTER_OP_CPU_KERNEL(
-    fill_constant_batch_size_like,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUDeviceContext,
-                                           float>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUDeviceContext,
-                                           double>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUDeviceContext,
-                                           int>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUDeviceContext,
-                                           int64_t>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CPUDeviceContext,
-                                           bool>);
--- a/paddle/fluid/operators/fill_constant_batch_size_like_op.h
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.h
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#pragma once
-#include <string>
-#include "paddle/fluid/framework/convert_utils.h"
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/phi/kernels/funcs/math_function.h"
-
-namespace paddle {
-namespace operators {
-
-template <typename DeviceContext, typename T>
-class FillConstantBatchSizeLikeOpKernel : public framework::OpKernel<T> {
- public:
-  void Compute(const framework::ExecutionContext &ctx) const override {
-    auto data_type =
-        static_cast<framework::proto::VarType::Type>(ctx.Attr<int>("dtype"));
-    auto float_value = ctx.Attr<float>("value");
-    auto str_value = ctx.Attr<std::string>("str_value");
-    auto force_cpu = ctx.Attr<bool>("force_cpu");
-
-    auto *out = ctx.Output<framework::Tensor>("Out");
-    auto *in = ctx.Input<framework::LoDTensor>("Input");
-    if (in->lod().size() && ctx.Attr<int>("input_dim_idx") == 0) {
-      // set the correct batch size for the LoDTensor.
-      auto odims = out->dims();
-      int output_dim_idx = ctx.Attr<int>("output_dim_idx");
-      odims[output_dim_idx] = static_cast<int>(in->lod().back().size()) - 1;
-      out->mutable_data<T>(odims, ctx.GetPlace());
-    }
-
-    T value;
-    if (str_value.empty()) {
-      value = static_cast<T>(float_value);
-    } else {
-      std::stringstream convert_stream(str_value);
-      if (std::is_same<int64_t, T>::value) {
-        int64_t tmp_value;
-        convert_stream >> tmp_value;
-        value = static_cast<T>(tmp_value);
-      } else {
-        double tmp_value;
-        convert_stream >> tmp_value;
-        value = static_cast<T>(tmp_value);
-      }
-    }
-
-    platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance();
-    bool cpu_place = force_cpu || ctx.GetPlace() == platform::CPUPlace();
-    if (cpu_place) {
-      auto &dev_ctx = *pool.Get(platform::CPUPlace());
-      phi::funcs::SetConstant<platform::CPUDeviceContext, T> functor;
-      out->mutable_data(platform::CPUPlace(),
-                        framework::TransToPhiDataType(data_type));
-      functor(reinterpret_cast<const platform::CPUDeviceContext &>(dev_ctx),
-              out, static_cast<T>(value));
-    }
-#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
-    if (!cpu_place) {
-      auto &dev_ctx = *pool.Get(ctx.GetPlace());
-      phi::funcs::SetConstant<platform::CUDADeviceContext, T> functor;
-      out->mutable_data(ctx.GetPlace(),
-                        framework::TransToPhiDataType(data_type));
-      functor(reinterpret_cast<const platform::CUDADeviceContext &>(dev_ctx),
-              out, static_cast<T>(value));
-    }
-#endif
-  }
-};
-
-}  // namespace operators
-}  // namespace paddle
--- a/paddle/phi/kernels/full_kernel.cc
+++ b/paddle/phi/kernels/full_kernel.cc
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "paddle/phi/kernels/full_kernel.h"
+
+#include "paddle/phi/core/kernel_registry.h"
+
+namespace phi {
+
+template <typename T, typename Context>
+void FullBatchSizeLikeKernel(const Context& dev_ctx,
+                             const DenseTensor& x,
+                             const std::vector<int>& shape,
+                             const Scalar& val,
+                             DataType dtype,
+                             int x_batch_size_dim,
+                             int out_batch_size_dim,
+                             DenseTensor* out) {
+  if (x.lod().size() && x_batch_size_dim == 0) {
+    // set the correct batch size for the LoDTensor.
+    auto odims = out->dims();
+    odims[out_batch_size_dim] = static_cast<int>(x.lod().back().size()) - 1;
+    FullKernel<T, Context>(dev_ctx, phi::vectorize(odims), val, dtype, out);
+  }
+  FullLikeKernel<T, Context>(dev_ctx, x, val, dtype, out);
+}
+
+}  // namespace phi
+
+PD_REGISTER_KERNEL(full_batch_size_like,
+                   CPU,
+                   ALL_LAYOUT,
+                   phi::FullBatchSizeLikeKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   bool) {
+  kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
+}
+#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
+PD_REGISTER_KERNEL(full_batch_size_like,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::FullBatchSizeLikeKernel,
+                   float,
+                   double,
+                   int,
+                   int64_t,
+                   bool,
+                   phi::dtype::float16) {
+  kernel->InputAt(0).SetBackend(phi::Backend::ALL_BACKEND);
+}
+#endif
--- a/paddle/phi/kernels/full_kernel.h
+++ b/paddle/phi/kernels/full_kernel.h
@@ -14,6 +14,8 @@

 #pragma once

+#include <vector>
+
 #include "paddle/phi/common/scalar.h"
 #include "paddle/phi/common/scalar_array.h"
 #include "paddle/phi/core/dense_tensor.h"
@@ -37,6 +39,18 @@ void FullLikeKernel(const Context& dev_ctx,
                    DataType dtype,
                    DenseTensor* out);

+// In order to be compatible with fill_constant_batch_size_like op
+// that are still used in the 2.x APIs
+template <typename T, typename Context>
+void FullBatchSizeLikeKernel(const Context& dev_ctx,
+                             const DenseTensor& x,
+                             const std::vector<int>& shape,
+                             const Scalar& val,
+                             DataType dtype,
+                             int x_batch_size_dim,
+                             int out_batch_size_dim,
+                             DenseTensor* out);
+
 template <typename T, typename Context>
 void Full(const Context& dev_ctx,
          const ScalarArray& shape,

--- a/paddle/fluid/operators/fill_constant_batch_size_like_op.cu.cc
+++ b/paddle/fluid/operators/fill_constant_batch_size_like_op.cu.cc
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
@@ -12,21 +12,32 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "paddle/fluid/operators/fill_constant_batch_size_like_op.h"
-#include "paddle/fluid/framework/op_registry.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
-    fill_constant_batch_size_like,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CUDADeviceContext,
-                                           paddle::platform::float16>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CUDADeviceContext,
-                                           float>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CUDADeviceContext,
-                                           double>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CUDADeviceContext,
-                                           int>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CUDADeviceContext,
-                                           int64_t>,
-    ops::FillConstantBatchSizeLikeOpKernel<paddle::platform::CUDADeviceContext,
-                                           bool>);
+#include "paddle/phi/core/compat/op_utils.h"
+
+namespace phi {
+
+KernelSignature FillConstantBatchSizeLikeOpArgumentMapping(
+    const ArgumentMappingContext& ctx) {
+  const auto& str_value = paddle::any_cast<std::string>(ctx.Attr("str_value"));
+  if (str_value.empty()) {
+    return KernelSignature(
+        "full_batch_size_like",
+        {"Input"},
+        {"shape", "value", "dtype", "input_dim_idx", "output_dim_idx"},
+        {"Out"});
+  } else {
+    return KernelSignature(
+        "full_batch_size_like",
+        {"Input"},
+        {"shape", "str_value", "dtype", "input_dim_idx", "output_dim_idx"},
+        {"Out"});
+  }
+}
+
+}  // namespace phi
+
+PD_REGISTER_BASE_KERNEL_NAME(fill_constant_batch_size_like,
+                             full_batch_size_like);
+
+PD_REGISTER_ARG_MAPPING_FN(fill_constant_batch_size_like,
+                           phi::FillConstantBatchSizeLikeOpArgumentMapping);