未验证 提交 0c8b9994 编写于 作者: C Chen Weihang 提交者: GitHub

add scale api and test (#37683)

上级 5f916c37
...@@ -75,14 +75,14 @@ class ScaleOp : public framework::OperatorWithKernel { ...@@ -75,14 +75,14 @@ class ScaleOp : public framework::OperatorWithKernel {
const framework::ExecutionContext &ctx) const override { const framework::ExecutionContext &ctx) const override {
if (ctx.InputVar("X")->IsType<framework::LoDTensor>() || if (ctx.InputVar("X")->IsType<framework::LoDTensor>() ||
ctx.InputVar("X")->IsType<framework::Tensor>()) { ctx.InputVar("X")->IsType<framework::Tensor>()) {
std::string scale_attr;
if (ctx.HasInput("ScaleTensor")) { if (ctx.HasInput("ScaleTensor")) {
return framework::KernelSignature("scale.host", {"X", "ScaleTensor"}, scale_attr = "ScaleTensor";
{"bias", "bias_after_scale"},
{"Out"});
} else { } else {
return framework::KernelSignature( scale_attr = "scale";
"scale", {"X"}, {"scale", "bias", "bias_after_scale"}, {"Out"});
} }
return framework::KernelSignature(
"scale", {"X"}, {scale_attr, "bias", "bias_after_scale"}, {"Out"});
} }
// TODO(chenweihang): support other cases after selected rows added // TODO(chenweihang): support other cases after selected rows added
return framework::KernelSignature("scale.unregistered", {}, {}, {}); return framework::KernelSignature("scale.unregistered", {}, {}, {});
......
...@@ -15,16 +15,11 @@ limitations under the License. */ ...@@ -15,16 +15,11 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/api/include/tensor.h" #include "paddle/pten/api/include/tensor.h"
#include "paddle/pten/common/scalar.h"
namespace paddle { namespace paddle {
namespace experimental { namespace experimental {
// TODO(chenweihang): add scale API
// TODO(chenweihang): move mean API into stat.h/cc
PD_DLL_DECL Tensor mean(const Tensor& x,
const std::vector<int64_t>& axis,
bool keep_dim);
PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor add(const Tensor& x, const Tensor& y);
PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor subtract(const Tensor& x, const Tensor& y);
...@@ -33,10 +28,21 @@ PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y); ...@@ -33,10 +28,21 @@ PD_DLL_DECL Tensor divide(const Tensor& x, const Tensor& y);
PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y); PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y);
// TODO(chenweihang): move mean API into stat.h/cc
PD_DLL_DECL Tensor mean(const Tensor& x,
const std::vector<int64_t>& axis,
bool keep_dim);
PD_DLL_DECL Tensor sum(const Tensor& x, PD_DLL_DECL Tensor sum(const Tensor& x,
const std::vector<int64_t>& axis, const std::vector<int64_t>& axis,
DataType dtype, DataType dtype,
bool keep_dim); bool keep_dim);
// TODO(chenweihang): Follow-up discussion on the handling of `act` argument
PD_DLL_DECL Tensor scale(const Tensor& x,
const Scalar& scale,
float bias,
bool bias_after_scale);
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
...@@ -274,6 +274,45 @@ PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) { ...@@ -274,6 +274,45 @@ PD_DLL_DECL Tensor multiply(const Tensor& x, const Tensor& y) {
return out; return out;
} }
PD_DLL_DECL Tensor scale(const Tensor& x,
const Scalar& scale,
float bias,
bool bias_after_scale) {
// 1. Get kernel signature and kernel
auto kernel_key_set = ParseKernelKeyByInputArgs(x);
auto kernel_key = kernel_key_set.GetHigestPriorityKernelKey();
auto kernel = pten::KernelFactory::Instance().SelectKernelOrThrowError(
"scale", kernel_key);
// 2. Get Device Context
auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend());
auto kernel_context = pten::KernelContext(dev_ctx);
// 3. Auto data transform
auto dense_x = std::dynamic_pointer_cast<pten::DenseTensor>(x.impl());
kernel_context.EmplaceBackInput(dense_x);
kernel_context.EmplaceBackAttr(pten::Scalar(scale));
kernel_context.EmplaceBackAttr(bias);
kernel_context.EmplaceBackAttr(bias_after_scale);
// 4. InferMeta
auto out_meta = UnchangedInferMeta(dense_x->meta());
// 5. Prepare outputs
Tensor out;
const auto allocator = std::make_shared<DefaultAllocator>(
pten::TransToFluidPlace(kernel_key.backend()));
auto dense_out = std::make_shared<pten::DenseTensor>(allocator, out_meta);
kernel_context.EmplaceBackOutput(dense_out);
out.set_impl(dense_out);
// 6. Call kernel
kernel(&kernel_context);
return out;
}
} // namespace experimental } // namespace experimental
} // namespace paddle } // namespace paddle
......
...@@ -78,7 +78,7 @@ DenseTensor Sum(const ContextT& dev_ctx, ...@@ -78,7 +78,7 @@ DenseTensor Sum(const ContextT& dev_ctx,
template <typename T, typename ContextT> template <typename T, typename ContextT>
DenseTensor Scale(const ContextT& dev_ctx, DenseTensor Scale(const ContextT& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
float scale, const Scalar& scale,
float bias, float bias,
bool bias_after_scale) { bool bias_after_scale) {
auto out_meta = UnchangedInferMeta(x.meta()); auto out_meta = UnchangedInferMeta(x.meta());
...@@ -90,21 +90,6 @@ DenseTensor Scale(const ContextT& dev_ctx, ...@@ -90,21 +90,6 @@ DenseTensor Scale(const ContextT& dev_ctx,
return dense_out; return dense_out;
} }
template <typename T, typename ContextT>
DenseTensor Scale(const ContextT& dev_ctx,
const DenseTensor& x,
const DenseTensor& scale,
float bias,
bool bias_after_scale) {
auto out_meta = UnchangedInferMeta(x.meta());
const auto allocator =
std::make_shared<paddle::experimental::DefaultAllocator>(
dev_ctx.GetPlace());
pten::DenseTensor dense_out(allocator, out_meta);
ScaleHost<T>(dev_ctx, x, scale, bias, bias_after_scale, &dense_out);
return dense_out;
}
template <typename T, typename ContextT> template <typename T, typename ContextT>
DenseTensor Add(const ContextT& dev_ctx, DenseTensor Add(const ContextT& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
......
...@@ -50,28 +50,12 @@ void Mean(const CPUContext& dev_ctx, ...@@ -50,28 +50,12 @@ void Mean(const CPUContext& dev_ctx,
template <typename T> template <typename T>
void Scale(const CPUContext& dev_ctx, void Scale(const CPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
float scale, const Scalar& scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
DenseTensor* out) { DenseTensor* out) {
eigen::Scale<CPUContext, T>(dev_ctx, x, scale, bias, bias_after_scale, out); eigen::Scale<CPUContext, T>(
} dev_ctx, x, scale.to<float>(), bias, bias_after_scale, out);
// TODO(chenweihang): now the ScaleTensor's dtype are same as x, so we cannot
// register its dtype def
template <typename T>
void ScaleHost(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& scale,
float bias,
bool bias_after_scale,
DenseTensor* out) {
eigen::Scale<CPUContext, T>(dev_ctx,
x,
static_cast<float>(*scale.data<T>()),
bias,
bias_after_scale,
out);
} }
template <typename T> template <typename T>
...@@ -145,20 +129,7 @@ PT_REGISTER_KERNEL("scale", ...@@ -145,20 +129,7 @@ PT_REGISTER_KERNEL("scale",
int16_t, int16_t,
int, int,
int64_t) {} int64_t) {}
PT_REGISTER_KERNEL("scale.host",
CPU,
ANY,
pten::ScaleHost,
float,
double,
paddle::platform::bfloat16,
uint8_t,
int8_t,
int16_t,
int,
int64_t) {
kernel->InputAt(1).SetBackend(pten::Backend::CPU);
}
PT_REGISTER_KERNEL("elementwise_add", PT_REGISTER_KERNEL("elementwise_add",
CPU, CPU,
ANY, ANY,
......
...@@ -14,6 +14,7 @@ limitations under the License. */ ...@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once #pragma once
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
#include "paddle/pten/core/kernel_registry.h" #include "paddle/pten/core/kernel_registry.h"
...@@ -40,15 +41,7 @@ void Mean(const CPUContext& dev_ctx, ...@@ -40,15 +41,7 @@ void Mean(const CPUContext& dev_ctx,
template <typename T> template <typename T>
void Scale(const CPUContext& dev_ctx, void Scale(const CPUContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
float scale, const Scalar& scale,
float bias,
bool bias_after_scale,
DenseTensor* out);
template <typename T>
void ScaleHost(const CPUContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
DenseTensor* out); DenseTensor* out);
......
...@@ -79,30 +79,12 @@ void Mean(const CUDAContext& dev_ctx, ...@@ -79,30 +79,12 @@ void Mean(const CUDAContext& dev_ctx,
template <typename T> template <typename T>
void Scale(const CUDAContext& dev_ctx, void Scale(const CUDAContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
float scale, const Scalar& scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
DenseTensor* out) { DenseTensor* out) {
eigen::Scale<CUDAContext, T>(dev_ctx, x, scale, bias, bias_after_scale, out); eigen::Scale<CUDAContext, T>(
} dev_ctx, x, scale.to<float>(), bias, bias_after_scale, out);
template <typename T>
void ScaleHost(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& scale,
float bias,
bool bias_after_scale,
DenseTensor* out) {
PADDLE_ENFORCE_EQ(paddle::platform::is_gpu_place(scale.place()),
false,
paddle::platform::errors::InvalidArgument(
"Scale argument isn't a host tensor."));
eigen::Scale<CUDAContext, T>(dev_ctx,
x,
static_cast<float>(*scale.data<T>()),
bias,
bias_after_scale,
out);
} }
// Create the definition of ElementwiseAdd // Create the definition of ElementwiseAdd
...@@ -150,20 +132,6 @@ PT_REGISTER_KERNEL("scale", ...@@ -150,20 +132,6 @@ PT_REGISTER_KERNEL("scale",
int16_t, int16_t,
int, int,
int64_t) {} int64_t) {}
PT_REGISTER_KERNEL("scale.host",
CUDA,
ANY,
pten::ScaleHost,
float,
double,
float16,
uint8_t,
int8_t,
int16_t,
int,
int64_t) {
kernel->InputAt(1).SetBackend(pten::Backend::CPU);
}
PT_REGISTER_KERNEL("elementwise_add", PT_REGISTER_KERNEL("elementwise_add",
CUDA, CUDA,
ANY, ANY,
......
...@@ -17,6 +17,7 @@ limitations under the License. */ ...@@ -17,6 +17,7 @@ limitations under the License. */
// CUDA and HIP use same api // CUDA and HIP use same api
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/pten/common/scalar.h"
#include "paddle/pten/core/dense_tensor.h" #include "paddle/pten/core/dense_tensor.h"
// See Note [ Why still include the fluid headers? ] // See Note [ Why still include the fluid headers? ]
...@@ -42,15 +43,7 @@ void Mean(const CUDAContext& dev_ctx, ...@@ -42,15 +43,7 @@ void Mean(const CUDAContext& dev_ctx,
template <typename T> template <typename T>
void Scale(const CUDAContext& dev_ctx, void Scale(const CUDAContext& dev_ctx,
const DenseTensor& x, const DenseTensor& x,
float scale, const Scalar& scale,
float bias,
bool bias_after_scale,
DenseTensor* out);
template <typename T>
void ScaleHost(const CUDAContext& dev_ctx,
const DenseTensor& x,
const DenseTensor& scale,
float bias, float bias,
bool bias_after_scale, bool bias_after_scale,
DenseTensor* out); DenseTensor* out);
......
...@@ -20,3 +20,4 @@ cc_test(test_reshape_api SRCS test_reshape_api.cc DEPS pten_tensor pten_api pten ...@@ -20,3 +20,4 @@ cc_test(test_reshape_api SRCS test_reshape_api.cc DEPS pten_tensor pten_api pten
cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_to_api SRCS test_to_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_slice_api SRCS test_slice_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_sum_api SRCS test_sum_api.cc DEPS pten_tensor pten_api pten_api_utils) cc_test(test_sum_api SRCS test_sum_api.cc DEPS pten_tensor pten_api pten_api_utils)
cc_test(test_scale_api SRCS test_scale_api.cc DEPS pten_tensor pten_api pten_api_utils)
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <gtest/gtest.h>
#include <memory>
#include "paddle/pten/api/include/creation.h"
#include "paddle/pten/api/include/math.h"
#include "paddle/pten/api/lib/utils/allocator.h"
#include "paddle/pten/core/dense_tensor.h"
namespace paddle {
namespace tests {
namespace framework = paddle::framework;
using DDim = paddle::framework::DDim;
void CheckScaleResult(Tensor* out) {
ASSERT_EQ(out->dims().size(), 2);
ASSERT_EQ(out->dims()[0], 3);
ASSERT_EQ(out->dims()[1], 4);
ASSERT_EQ(out->numel(), 12);
ASSERT_EQ(out->is_cpu(), true);
ASSERT_EQ(out->type(), pten::DataType::FLOAT32);
ASSERT_EQ(out->layout(), pten::DataLayout::NCHW);
ASSERT_EQ(out->initialized(), true);
for (int64_t i = 0; i < out->numel(); ++i) {
ASSERT_EQ(out->mutable_data<float>()[i], 3.0);
}
}
TEST(API, scale) {
// 1. check `scale` is float value
auto x = experimental::full({3, 4}, 1.0, pten::DataType::FLOAT32);
auto out1 = experimental::scale(x, 2.0, 1.0, true);
CheckScaleResult(&out1);
// 2. check `scale` is Tensor with shape [1]
auto scale = experimental::full({1}, 2.0, pten::DataType::FLOAT32);
auto out2 = experimental::scale(x, scale, 1.0, true);
CheckScaleResult(&out2);
}
} // namespace tests
} // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册