未验证 提交 6dd13152 编写于 作者: Z zhangyikun02 提交者: GitHub

Move prior_box, softmax and softmax_grad kernel to phi, test=kunlun (#45510)

上级 6fc15986
......@@ -41,7 +41,7 @@ endif()
if(WITH_XPU)
detection_library(iou_similarity_op SRCS iou_similarity_op.cc
iou_similarity_op_xpu.cc)
detection_library(prior_box_op SRCS prior_box_op.cc prior_box_op_xpu.cc)
detection_library(prior_box_op SRCS prior_box_op.cc)
detection_library(generate_proposals_v2_op SRCS generate_proposals_v2_op.cc)
elseif(WITH_MLU)
detection_library(iou_similarity_op SRCS iou_similarity_op.cc
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/detection/prior_box_op.h"
#include "paddle/fluid/platform/device/device_wrapper.h"
namespace paddle {
namespace operators {
template <typename T, typename K>
class PriorBoxOpXPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<paddle::framework::Tensor>("Input");
auto* image = ctx.Input<paddle::framework::Tensor>("Image");
auto* boxes = ctx.Output<paddle::framework::Tensor>("Boxes");
auto* vars = ctx.Output<paddle::framework::Tensor>("Variances");
auto min_sizes = ctx.Attr<std::vector<float>>("min_sizes");
auto max_sizes = ctx.Attr<std::vector<float>>("max_sizes");
auto input_aspect_ratio = ctx.Attr<std::vector<float>>("aspect_ratios");
auto variances = ctx.Attr<std::vector<float>>("variances");
auto flip = ctx.Attr<bool>("flip");
auto clip = ctx.Attr<bool>("clip");
auto min_max_aspect_ratios_order =
ctx.Attr<bool>("min_max_aspect_ratios_order");
std::vector<float> aspect_ratios;
ExpandAspectRatios(input_aspect_ratio, flip, &aspect_ratios);
K step_w = static_cast<K>(ctx.Attr<float>("step_w"));
K step_h = static_cast<K>(ctx.Attr<float>("step_h"));
K offset = static_cast<K>(ctx.Attr<float>("offset"));
auto img_width = image->dims()[3];
auto img_height = image->dims()[2];
auto feature_width = input->dims()[3];
auto feature_height = input->dims()[2];
K step_width, step_height;
if (step_w == 0 || step_h == 0) {
step_width = static_cast<K>(img_width) / feature_width;
step_height = static_cast<K>(img_height) / feature_height;
} else {
step_width = step_w;
step_height = step_h;
}
int num_priors = aspect_ratios.size() * min_sizes.size();
if (max_sizes.size() > 0) {
num_priors += max_sizes.size();
}
boxes->mutable_data<K>(ctx.GetPlace());
vars->mutable_data<K>(ctx.GetPlace());
const auto& dev_ctx =
ctx.template device_context<paddle::platform::XPUDeviceContext>();
auto boxes_data = boxes->data<K>();
auto vars_data = vars->data<K>();
xpu::VectorParam<float> aspect_ratios_param{
aspect_ratios.data(), static_cast<int>(aspect_ratios.size()), nullptr};
xpu::VectorParam<float> min_sizes_param{
min_sizes.data(), static_cast<int>(min_sizes.size()), nullptr};
xpu::VectorParam<float> max_sizes_param{
max_sizes.data(), static_cast<int>(max_sizes.size()), nullptr};
int ret = xpu::gen_prior_box(dev_ctx.x_context(),
boxes_data,
aspect_ratios_param,
min_sizes_param,
max_sizes_param,
feature_height,
feature_width,
img_height,
img_width,
offset,
step_height,
step_width,
clip,
min_max_aspect_ratios_order);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gen_prior_box");
int box_num = feature_height * feature_width * num_priors;
int vlen = variances.size();
std::vector<K> var_cpu(vlen * box_num);
for (int i = 0; i < box_num; ++i) {
std::copy(variances.begin(), variances.end(), var_cpu.begin() + i * vlen);
}
ret = xpu_memcpy(vars_data,
var_cpu.data(),
var_cpu.size() * sizeof(K),
XPUMemcpyKind::XPU_HOST_TO_DEVICE);
PADDLE_ENFORCE_XPU_SUCCESS(ret);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(prior_box, ops::PriorBoxOpXPUKernel<float, float>);
#endif
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
using DDim = framework::DDim;
template <typename DeviceContext, typename T>
class SoftmaxXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* x = context.Input<Tensor>("X");
auto* out = context.Output<Tensor>("Out");
const int rank = x->dims().size();
int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
// allocate memory on device.
out->mutable_data<T>(context.GetPlace());
std::vector<int> x_dims;
for (int i = 0; i < rank; i++) {
x_dims.push_back(x->dims()[i]);
}
if (axis < 0) {
axis += rank;
}
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = XPU_SUCCESS;
auto version = platform::get_xpu_version(context.GetPlace().GetDeviceId());
if (version == phi::backends::xpu::XPUVersion::XPU1) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* clip_x_data_l3 = RAII_GUARD.alloc_l3_or_gm<XPUType>(x->numel());
r = xpu::clip_v2(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x->data<T>()),
clip_x_data_l3,
x->numel(),
static_cast<XPUType>(-1e20),
static_cast<XPUType>(1e20));
PADDLE_ENFORCE_EQ(r,
XPU_SUCCESS,
platform::errors::External(
"XPU API(clip_v2) return wrong value[%d %s]",
r,
XPUAPIErrorMsg[r]));
r = xpu::softmax<XPUType>(dev_ctx.x_context(),
clip_x_data_l3,
reinterpret_cast<XPUType*>(out->data<T>()),
x_dims,
axis);
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
platform::errors::External("XPU API(softmax2d_forward) return wrong "
"value[%d %s]",
r,
XPUAPIErrorMsg[r]));
} else {
r = xpu::softmax<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x->data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x_dims,
axis);
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
platform::errors::External("XPU API(softmax2d_forward) return wrong "
"value[%d %s]",
r,
XPUAPIErrorMsg[r]));
}
}
};
template <typename DeviceContext, typename T>
class SoftmaxGradXPUKernel : public framework::OpKernel<T> {
using XPUType = typename XPUTypeTrait<T>::Type;
public:
void Compute(const framework::ExecutionContext& context) const override {
auto* out = context.Input<Tensor>("Out");
auto* dout = context.Input<Tensor>(framework::GradVarName("Out"));
auto* dx = context.Output<Tensor>(framework::GradVarName("X"));
const int rank = dx->dims().size();
int axis = phi::funcs::CanonicalAxis(context.Attr<int>("axis"), rank);
// allocate memory on device.
dx->mutable_data<T>(context.GetPlace());
std::vector<int> x_dims;
for (int i = 0; i < rank; i++) {
x_dims.push_back(dx->dims()[i]);
}
if (axis < 0) {
axis += rank;
}
auto& dev_ctx = context.template device_context<DeviceContext>();
int r = xpu::softmax_grad<XPUType>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(out->data<T>()),
reinterpret_cast<const XPUType*>(dout->data<T>()),
reinterpret_cast<XPUType*>(dx->data<T>()),
x_dims,
axis);
PADDLE_ENFORCE_EQ(
r,
XPU_SUCCESS,
platform::errors::External("XPU API(softmax2d_backward) return wrong "
"value[%d %s]",
r,
XPUAPIErrorMsg[r]));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_XPU_KERNEL(
softmax,
ops::SoftmaxXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::SoftmaxXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
REGISTER_OP_XPU_KERNEL(
softmax_grad,
ops::SoftmaxGradXPUKernel<paddle::platform::XPUDeviceContext, float>,
ops::SoftmaxGradXPUKernel<paddle::platform::XPUDeviceContext,
paddle::platform::float16>);
#endif // PADDLE_WITH_XPU
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/prior_box_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/eigen/common.h"
namespace phi {
template <typename T, typename Context>
void PriorBoxKernel(const Context& ctx,
const DenseTensor& input,
const DenseTensor& image,
const std::vector<float>& min_sizes,
const std::vector<float>& aspect_ratios,
const std::vector<float>& variances,
const std::vector<float>& max_sizes,
bool flip,
bool clip,
float step_w,
float step_h,
float offset,
bool min_max_aspect_ratios_order,
DenseTensor* out,
DenseTensor* var) {
std::vector<float> new_aspect_ratios;
ExpandAspectRatios(aspect_ratios, flip, &new_aspect_ratios);
T new_step_w = static_cast<T>(step_w);
T new_step_h = static_cast<T>(step_h);
T new_offset = static_cast<T>(offset);
auto img_width = image.dims()[3];
auto img_height = image.dims()[2];
auto feature_width = input.dims()[3];
auto feature_height = input.dims()[2];
T step_width, step_height;
if (new_step_w == 0 || new_step_h == 0) {
step_width = static_cast<T>(img_width) / feature_width;
step_height = static_cast<T>(img_height) / feature_height;
} else {
step_width = new_step_w;
step_height = new_step_h;
}
int num_priors = new_aspect_ratios.size() * min_sizes.size();
if (max_sizes.size() > 0) {
num_priors += max_sizes.size();
}
ctx.template Alloc<T>(out);
ctx.template Alloc<T>(var);
auto boxes_data = out->data<T>();
auto var_data = var->data<T>();
xpu::VectorParam<float> aspect_ratios_param{
new_aspect_ratios.data(),
static_cast<int>(new_aspect_ratios.size()),
nullptr};
xpu::VectorParam<float> min_sizes_param{
min_sizes.data(), static_cast<int>(min_sizes.size()), nullptr};
xpu::VectorParam<float> max_sizes_param{
max_sizes.data(), static_cast<int>(max_sizes.size()), nullptr};
int ret = xpu::gen_prior_box(ctx.x_context(),
boxes_data,
aspect_ratios_param,
min_sizes_param,
max_sizes_param,
feature_height,
feature_width,
img_height,
img_width,
new_offset,
step_height,
step_width,
clip,
min_max_aspect_ratios_order);
PADDLE_ENFORCE_XDNN_SUCCESS(ret, "gen_prior_box");
int box_num = feature_height * feature_width * num_priors;
int vlen = variances.size();
std::vector<T> var_cpu(vlen * box_num);
for (int i = 0; i < box_num; ++i) {
std::copy(variances.begin(), variances.end(), var_cpu.begin() + i * vlen);
}
ctx.Wait();
ret = xpu_memcpy(var_data,
var_cpu.data(),
var_cpu.size() * sizeof(T),
XPUMemcpyKind::XPU_HOST_TO_DEVICE);
PADDLE_ENFORCE_XPU_SUCCESS(ret);
}
} // namespace phi
PD_REGISTER_KERNEL(prior_box, XPU, ALL_LAYOUT, phi::PriorBoxKernel, float) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_grad_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& out_grad,
int axis,
DenseTensor* x_grad) {
using XPUType = typename XPUTypeTrait<T>::Type;
const int rank = x_grad->dims().size();
const int calc_axis = phi::funcs::CanonicalAxis(axis, rank);
// allocate memory on device.
dev_ctx.template Alloc<T>(x_grad);
if (x_grad->numel() == 0) {
return;
}
std::vector<int> x_dims;
for (int i = 0; i < rank; i++) {
x_dims.push_back(x_grad->dims()[i]);
}
int r = xpu::softmax_grad<XPUType>(
dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(out.data<T>()),
reinterpret_cast<const XPUType*>(out_grad.data<T>()),
reinterpret_cast<XPUType*>(x_grad->data<T>()),
x_dims,
calc_axis);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax_grad");
}
} // namespace phi
PD_REGISTER_KERNEL(softmax_grad,
XPU,
ALL_LAYOUT,
phi::SoftmaxGradKernel,
float,
phi::dtype::float16) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/softmax_kernel.h"
#include "paddle/phi/backends/xpu/enforce_xpu.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/axis_utils.h"
namespace phi {
template <typename T, typename Context>
void SoftmaxKernel(const Context& dev_ctx,
const DenseTensor& x,
int axis,
DenseTensor* out) {
using XPUType = typename XPUTypeTrait<T>::Type;
const int rank = x.dims().size();
const int calc_axis = phi::funcs::CanonicalAxis(axis, rank);
// allocate memory on device.
dev_ctx.template Alloc<T>(out);
if (out->numel() == 0) {
return;
}
std::vector<int> x_dims;
for (int i = 0; i < rank; i++) {
x_dims.push_back(x.dims()[i]);
}
int r = XPU_SUCCESS;
auto version =
phi::backends::xpu::get_xpu_version(dev_ctx.GetPlace().GetDeviceId());
if (version == phi::backends::xpu::XPUVersion::XPU1) {
xpu::ctx_guard RAII_GUARD(dev_ctx.x_context());
XPUType* clip_x_data_l3 = RAII_GUARD.alloc_l3_or_gm<XPUType>(x.numel());
r = xpu::clip_v2(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
clip_x_data_l3,
x.numel(),
static_cast<XPUType>(-1e20),
static_cast<XPUType>(1e20));
PADDLE_ENFORCE_XDNN_SUCCESS(r, "clip_v2");
r = xpu::softmax<XPUType>(dev_ctx.x_context(),
clip_x_data_l3,
reinterpret_cast<XPUType*>(out->data<T>()),
x_dims,
calc_axis);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax");
} else {
r = xpu::softmax<XPUType>(dev_ctx.x_context(),
reinterpret_cast<const XPUType*>(x.data<T>()),
reinterpret_cast<XPUType*>(out->data<T>()),
x_dims,
calc_axis);
PADDLE_ENFORCE_XDNN_SUCCESS(r, "softmax");
}
}
} // namespace phi
PD_REGISTER_KERNEL(
softmax, XPU, ALL_LAYOUT, phi::SoftmaxKernel, float, phi::dtype::float16) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册