diff --git a/paddle/fluid/operators/scale_op.cc b/paddle/fluid/operators/scale_op.cc index c24f924313fb90d33b17f727260578271f67ae88..4e9c84ef4c9503dc81d1258202ac4b37f867cfcf 100644 --- a/paddle/fluid/operators/scale_op.cc +++ b/paddle/fluid/operators/scale_op.cc @@ -75,14 +75,14 @@ class ScaleOp : public framework::OperatorWithKernel { const framework::ExecutionContext &ctx) const override { if (ctx.InputVar("X")->IsType() || ctx.InputVar("X")->IsType()) { + std::string scale_attr; if (ctx.HasInput("ScaleTensor")) { - return framework::KernelSignature("scale.host", {"X", "ScaleTensor"}, - {"bias", "bias_after_scale"}, - {"Out"}); + scale_attr = "ScaleTensor"; } else { - return framework::KernelSignature( - "scale", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"}); + scale_attr = "scale"; } + return framework::KernelSignature( + "scale", {"X"}, {scale_attr, "bias", "bias_after_scale"}, {"Out"}); } // TODO(chenweihang): support other cases after selected rows added return framework::KernelSignature("scale.unregistered", {}, {}, {}); diff --git a/paddle/pten/api/include/math.h b/paddle/pten/api/include/math.h index 149500c546dfd3db4f1623ac9bbc565d9d71b6eb..700af6d2d591162ecc2ec5fbfd00d9f39d128dc8 100644 --- a/paddle/pten/api/include/math.h +++ b/paddle/pten/api/include/math.h @@ -15,16 +15,11 @@ limitations under the License. */ #pragma once #include "paddle/pten/api/include/tensor.h" +#include "paddle/pten/common/scalar.h" namespace paddle { namespace experimental { -// TODO(chenweihang): add scale API -// TODO(chenweihang): move mean API into stat.h/cc -PD_DLL_DECL Tensor mean(const Tensor& x, - const std::vector& axis, - bool keep_dim); - PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y); @@ -33,10 +28,21 @@ PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y); +// TODO(chenweihang): move mean API into stat.h/cc +PD_DLL_DECL Tensor mean(const Tensor& x, + const std::vector& axis, + bool keep_dim); + PD_DLL_DECL Tensor sum(const Tensor& x, const std::vector& axis, DataType dtype, bool keep_dim); +// TODO(chenweihang): Follow-up discussion on the handling of `act` argument +PD_DLL_DECL Tensor scale(const Tensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale); + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/api/lib/math.cc b/paddle/pten/api/lib/math.cc index bd2567ddb15063fb4aade7f4a8ef0956a2a2a410..a97d78b5a9d6f07fafeba39fab8a80dc4f6e4a3d 100644 --- a/paddle/pten/api/lib/math.cc +++ b/paddle/pten/api/lib/math.cc @@ -274,6 +274,45 @@ PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) { return out; } + +PD_DLL_DECL Tensor scale(const Tensor& x, + const Scalar& scale, + float bias, + bool bias_after_scale) { + // 1. Get kernel signature and kernel + auto kernel_key_set = ParseKernelKeyByInputArgs(x); + auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey(); + auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError( + "scale", kernel_key); + + // 2. Get Device Context + auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); + auto kernel_context = pten::KernelContext(dev_ctx); + + // 3. Auto data transform + auto dense_x = std::dynamic_pointer_cast(x.impl()); + kernel_context.EmplaceBackInput(dense_x); + kernel_context.EmplaceBackAttr(pten::Scalar(scale)); + kernel_context.EmplaceBackAttr(bias); + kernel_context.EmplaceBackAttr(bias_after_scale); + + // 4. InferMeta + auto out_meta = UnchangedInferMeta(dense_x->meta()); + + // 5. Prepare outputs + Tensor out; + const auto allocator = std::make_shared( + pten::TransToFluidPlace(kernel_key.backend())); + auto dense_out = std::make_shared(allocator, out_meta); + kernel_context.EmplaceBackOutput(dense_out); + out.set_impl(dense_out); + + // 6. Call kernel + kernel(&kernel_context); + + return out; +} + } // namespace experimental } // namespace paddle diff --git a/paddle/pten/include/math.h b/paddle/pten/include/math.h index c6528d85c27cc03bdd8dee82ae83469cda64701d..c2b9f75bda0449d00adf1962db425bf493886e1c 100644 --- a/paddle/pten/include/math.h +++ b/paddle/pten/include/math.h @@ -78,7 +78,7 @@ DenseTensor Sum(const ContextT& dev_ctx, template DenseTensor Scale(const ContextT& dev_ctx, const DenseTensor& x, - float scale, + const Scalar& scale, float bias, bool bias_after_scale) { auto out_meta = UnchangedInferMeta(x.meta()); @@ -90,21 +90,6 @@ DenseTensor Scale(const ContextT& dev_ctx, return dense_out; } -template -DenseTensor Scale(const ContextT& dev_ctx, - const DenseTensor& x, - const DenseTensor& scale, - float bias, - bool bias_after_scale) { - auto out_meta = UnchangedInferMeta(x.meta()); - const auto allocator = - std::make_shared( - dev_ctx.GetPlace()); - pten::DenseTensor dense_out(allocator, out_meta); - ScaleHost(dev_ctx, x, scale, bias, bias_after_scale, &dense_out); - return dense_out; -} - template DenseTensor Add(const ContextT& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/cpu/math.cc b/paddle/pten/kernels/cpu/math.cc index 634b5231da266d56eb3ecbc1ebcb9c9e61ab9333..05ca7a3ae52446eb0024ece502a4ed33479d1ac1 100644 --- a/paddle/pten/kernels/cpu/math.cc +++ b/paddle/pten/kernels/cpu/math.cc @@ -50,28 +50,12 @@ void Mean(const CPUContext& dev_ctx, template void Scale(const CPUContext& dev_ctx, const DenseTensor& x, - float scale, + const Scalar& scale, float bias, bool bias_after_scale, DenseTensor* out) { - eigen::Scale(dev_ctx, x, scale, bias, bias_after_scale, out); -} - -// TODO(chenweihang): now the ScaleTensor's dtype are same as x, so we cannot -// register its dtype def -template -void ScaleHost(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& scale, - float bias, - bool bias_after_scale, - DenseTensor* out) { - eigen::Scale(dev_ctx, - x, - static_cast(*scale.data()), - bias, - bias_after_scale, - out); + eigen::Scale( + dev_ctx, x, scale.to(), bias, bias_after_scale, out); } template @@ -145,20 +129,7 @@ PT_REGISTER_KERNEL("scale", int16_t, int, int64_t) {} -PT_REGISTER_KERNEL("scale.host", - CPU, - ANY, - pten::ScaleHost, - float, - double, - paddle::platform::bfloat16, - uint8_t, - int8_t, - int16_t, - int, - int64_t) { - kernel->InputAt(1).SetBackend(pten::Backend::CPU); -} + PT_REGISTER_KERNEL("elementwise_add", CPU, ANY, diff --git a/paddle/pten/kernels/cpu/math.h b/paddle/pten/kernels/cpu/math.h index c06d40e57799fb2e7b4825229eadbb2a07271752..31532f38f6e49f754944e69c06dedd0fc4baaaca 100644 --- a/paddle/pten/kernels/cpu/math.h +++ b/paddle/pten/kernels/cpu/math.h @@ -14,6 +14,7 @@ limitations under the License. */ #pragma once +#include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/kernel_registry.h" @@ -40,19 +41,11 @@ void Mean(const CPUContext& dev_ctx, template void Scale(const CPUContext& dev_ctx, const DenseTensor& x, - float scale, + const Scalar& scale, float bias, bool bias_after_scale, DenseTensor* out); -template -void ScaleHost(const CPUContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& scale, - float bias, - bool bias_after_scale, - DenseTensor* out); - template void ElementwiseAdd(const CPUContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/kernels/cuda/math.cu b/paddle/pten/kernels/cuda/math.cu index bc5582926a40068d5cf8fe8a9b30117569276b3a..8d6abc92855305efabad347c81d09b0a67fbc011 100644 --- a/paddle/pten/kernels/cuda/math.cu +++ b/paddle/pten/kernels/cuda/math.cu @@ -79,30 +79,12 @@ void Mean(const CUDAContext& dev_ctx, template void Scale(const CUDAContext& dev_ctx, const DenseTensor& x, - float scale, + const Scalar& scale, float bias, bool bias_after_scale, DenseTensor* out) { - eigen::Scale(dev_ctx, x, scale, bias, bias_after_scale, out); -} - -template -void ScaleHost(const CUDAContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& scale, - float bias, - bool bias_after_scale, - DenseTensor* out) { - PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(scale.place()), - false, - paddle::platform::errors::InvalidArgument( - "Scale argument isn't a host tensor.")); - eigen::Scale(dev_ctx, - x, - static_cast(*scale.data()), - bias, - bias_after_scale, - out); + eigen::Scale( + dev_ctx, x, scale.to(), bias, bias_after_scale, out); } // Create the definition of ElementwiseAdd @@ -150,20 +132,6 @@ PT_REGISTER_KERNEL("scale", int16_t, int, int64_t) {} -PT_REGISTER_KERNEL("scale.host", - CUDA, - ANY, - pten::ScaleHost, - float, - double, - float16, - uint8_t, - int8_t, - int16_t, - int, - int64_t) { - kernel->InputAt(1).SetBackend(pten::Backend::CPU); -} PT_REGISTER_KERNEL("elementwise_add", CUDA, ANY, diff --git a/paddle/pten/kernels/cuda/math.h b/paddle/pten/kernels/cuda/math.h index dcee649d7d82d59e363b05c512cad40432cf1ed5..0ac55f1f8795070eb3cd1163b98de0b555ab9e46 100644 --- a/paddle/pten/kernels/cuda/math.h +++ b/paddle/pten/kernels/cuda/math.h @@ -17,6 +17,7 @@ limitations under the License. */ // CUDA and HIP use same api #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) +#include "paddle/pten/common/scalar.h" #include "paddle/pten/core/dense_tensor.h" // See Note [ Why still include the fluid headers? ] @@ -42,19 +43,11 @@ void Mean(const CUDAContext& dev_ctx, template void Scale(const CUDAContext& dev_ctx, const DenseTensor& x, - float scale, + const Scalar& scale, float bias, bool bias_after_scale, DenseTensor* out); -template -void ScaleHost(const CUDAContext& dev_ctx, - const DenseTensor& x, - const DenseTensor& scale, - float bias, - bool bias_after_scale, - DenseTensor* out); - template void ElementwiseAdd(const CUDAContext& dev_ctx, const DenseTensor& x, diff --git a/paddle/pten/tests/api/CMakeLists.txt b/paddle/pten/tests/api/CMakeLists.txt index 207d8f35b4c453e0963434e052fff9f704bc73a8..c670d094810198bb2108971e9f6d1ee8340579d7 100644 --- a/paddle/pten/tests/api/CMakeLists.txt +++ b/paddle/pten/tests/api/CMakeLists.txt @@ -20,3 +20,4 @@ cc_test(test_reshape_api SRCS test_reshape_api.cc DEPS pten_tensor pten_api pten cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_sum_api SRCS test_sum_api.cc DEPS pten_tensor pten_api pten_api_utils) +cc_test(test_scale_api SRCS test_scale_api.cc DEPS pten_tensor pten_api pten_api_utils) diff --git a/paddle/pten/tests/api/test_scale_api.cc b/paddle/pten/tests/api/test_scale_api.cc new file mode 100644 index 0000000000000000000000000000000000000000..2c0cd5cc71d8ee4c6aa918d983d86b10a99ec669 --- /dev/null +++ b/paddle/pten/tests/api/test_scale_api.cc @@ -0,0 +1,57 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include + +#include "paddle/pten/api/include/creation.h" +#include "paddle/pten/api/include/math.h" + +#include "paddle/pten/api/lib/utils/allocator.h" +#include "paddle/pten/core/dense_tensor.h" + +namespace paddle { +namespace tests { + +namespace framework = paddle::framework; +using DDim = paddle::framework::DDim; + +void CheckScaleResult(Tensor* out) { + ASSERT_EQ(out->dims().size(), 2); + ASSERT_EQ(out->dims()[0], 3); + ASSERT_EQ(out->dims()[1], 4); + ASSERT_EQ(out->numel(), 12); + ASSERT_EQ(out->is_cpu(), true); + ASSERT_EQ(out->type(), pten::DataType::FLOAT32); + ASSERT_EQ(out->layout(), pten::DataLayout::NCHW); + ASSERT_EQ(out->initialized(), true); + for (int64_t i = 0; i < out->numel(); ++i) { + ASSERT_EQ(out->mutable_data()[i], 3.0); + } +} + +TEST(API, scale) { + // 1. check `scale` is float value + auto x = experimental::full({3, 4}, 1.0, pten::DataType::FLOAT32); + auto out1 = experimental::scale(x, 2.0, 1.0, true); + CheckScaleResult(&out1); + + // 2. check `scale` is Tensor with shape [1] + auto scale = experimental::full({1}, 2.0, pten::DataType::FLOAT32); + auto out2 = experimental::scale(x, scale, 1.0, true); + CheckScaleResult(&out2); +} + +} // namespace tests +} // namespace paddle