未验证 提交 b305629c 编写于 作者: 陈沧夜 提交者: GitHub

remove *npu.cc (#53342)

上级 cf6ed7cb
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/phi/kernels/impl/box_coder.h"
namespace paddle {
namespace operators {
template <typename T>
struct BoxCoderFunction {
public:
explicit BoxCoderFunction(const framework::ExecutionContext& ctx) : ctx(ctx) {
place = ctx.GetPlace();
stream = ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
}
phi::DenseTensor Adds(const phi::DenseTensor& x, float scalar) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Adds", {x}, {y}, {{"value", scalar}});
runner.Run(stream);
return y;
}
phi::DenseTensor Muls(const phi::DenseTensor& x, float scalar) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Muls", {x}, {y}, {{"value", scalar}});
runner.Run(stream);
return y;
}
phi::DenseTensor Mul(const phi::DenseTensor& x, const phi::DenseTensor& y) {
phi::DenseTensor z;
z.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Mul", {x, y}, {z}, {});
runner.Run(stream);
return z;
}
phi::DenseTensor SubWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
z.mutable_data<T>(shape, place);
const auto& runner = NpuOpRunner("Sub", {x, y}, {z}, {});
runner.Run(stream);
return z;
}
void DivWithBroadCastVoid(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape,
phi::DenseTensor* z) {
z->mutable_data<T>(shape, place);
const auto& runner = NpuOpRunner("Div", {x, y}, {*z}, {});
runner.Run(stream);
}
phi::DenseTensor DivWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
DivWithBroadCastVoid(x, y, shape, &z);
return z;
}
void MulWithBroadCastVoid(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape,
phi::DenseTensor* z) {
z->mutable_data<T>(shape, place);
const auto& runner = NpuOpRunner("Mul", {x, y}, {*z}, {});
runner.Run(stream);
}
phi::DenseTensor MulWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
MulWithBroadCastVoid(x, y, shape, &z);
return z;
}
void AddWithBroadCastVoid(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape,
phi::DenseTensor* z) {
z->mutable_data<T>(shape, place);
const auto& runner = NpuOpRunner("AddV2", {x, y}, {*z}, {});
runner.Run(stream);
}
phi::DenseTensor AddWithBroadCast(const phi::DenseTensor& x,
const phi::DenseTensor& y,
const framework::DDim& shape) {
phi::DenseTensor z;
AddWithBroadCastVoid(x, y, shape, &z);
return z;
}
phi::DenseTensor Abs(const phi::DenseTensor& x) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Abs", {x}, {y}, {});
runner.Run(stream);
return y;
}
phi::DenseTensor Log(const phi::DenseTensor& x) {
phi::DenseTensor t_x_m1 = Adds(x, -1);
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Log1p", {t_x_m1}, {y}, {});
runner.Run(stream);
return y;
}
phi::DenseTensor Exp(const phi::DenseTensor& x) {
phi::DenseTensor y;
y.mutable_data<T>(x.dims(), place);
const auto& runner = NpuOpRunner("Exp", {x}, {y}, {});
runner.Run(stream);
return y;
}
phi::DenseTensor Dot(const phi::DenseTensor& x, const phi::DenseTensor& y) {
auto dim_x = x.dims();
auto dim_y = y.dims();
PADDLE_ENFORCE_EQ(
dim_x.size(),
2,
platform::errors::InvalidArgument(
"x should be a 2-dim tensor, but got %d-dim.", dim_x.size()));
PADDLE_ENFORCE_EQ(
dim_y.size(),
2,
platform::errors::InvalidArgument(
"y should be a 2-dim tensor, but got %d-dim.", dim_y.size()));
PADDLE_ENFORCE_EQ(
dim_x[1],
dim_y[0],
platform::errors::InvalidArgument("Expect dim_x[1] == dim_y[0], but "
"got dim_x[1] = %d, dim_y[0] = %d.",
dim_x[1],
dim_y[0]));
phi::DenseTensor z;
z.mutable_data<T>({dim_x[0], dim_y[1]}, place);
const auto& runner =
NpuOpRunner("MatMul",
{x, y},
{z},
{{"transpose_x1", false}, {"transpose_x2", false}});
runner.Run(stream);
return z;
}
void ConcatVoid(const std::vector<phi::DenseTensor>& inputs,
const framework::DDim& shape_out,
int axis,
phi::DenseTensor* output) {
output->mutable_data<T>(shape_out, place);
std::vector<std::string> names;
for (size_t i = 0; i < inputs.size(); i++) {
names.push_back("x" + std::to_string(i));
}
NpuOpRunner runner{
"ConcatD",
{inputs},
{*output},
{{"concat_dim", axis}, {"N", static_cast<int>(inputs.size())}}};
runner.AddInputNames(names);
runner.Run(stream);
}
phi::DenseTensor Concat(const std::vector<phi::DenseTensor>& inputs,
const framework::DDim& shape_out,
int axis) {
phi::DenseTensor output;
ConcatVoid(inputs, shape_out, axis, &output);
return output;
}
phi::DenseTensor Slice(const phi::DenseTensor& x,
const std::vector<int>& offsets,
const std::vector<int>& size,
const framework::DDim& shape) {
phi::DenseTensor y;
y.mutable_data<T>(shape, place);
const auto& runner =
NpuOpRunner("SliceD", {x}, {y}, {{"offsets", offsets}, {"size", size}});
runner.Run(stream);
return y;
}
private:
platform::Place place;
aclrtStream stream;
const framework::ExecutionContext& ctx;
};
template <typename T>
void Vector2Tensor(const framework::ExecutionContext& ctx,
const std::vector<T>& vec,
const framework::DDim& ddim,
phi::DenseTensor* tsr) {
framework::TensorFromVector<T>(vec, ctx.device_context(), tsr);
ctx.template device_context<paddle::platform::NPUDeviceContext>().Wait();
tsr->Resize(ddim);
}
template <typename T>
void BoxCoderEnc(const framework::ExecutionContext& ctx,
const phi::DenseTensor* tb,
const phi::DenseTensor* pb,
const phi::DenseTensor* pbv,
const bool norm,
const std::vector<float>& variance,
phi::DenseTensor* out) {
auto M = pb->dims()[0];
auto N = tb->dims()[0];
auto shape_0 = phi::make_ddim({4, 2});
phi::DenseTensor m_diff;
phi::DenseTensor m_aver;
std::vector<T> vec_diff = {static_cast<T>(-1),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(-1),
static_cast<T>(1),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(1)};
std::vector<T> vec_aver = {static_cast<T>(0.5),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(0.5),
static_cast<T>(0.5),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(0.5)};
Vector2Tensor<T>(ctx, vec_diff, shape_0, &m_diff);
Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver);
BoxCoderFunction<T> F(ctx);
phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
phi::DenseTensor tb_xy = F.Dot(*tb, m_aver);
phi::DenseTensor tb_wh = F.Adds(F.Dot(*tb, m_diff), (norm ? 0 : 1));
pb_xy.Resize({1, M, 2});
pb_wh.Resize({1, M, 2});
tb_xy.Resize({N, 1, 2});
tb_wh.Resize({N, 1, 2});
auto shape_half = phi::make_ddim({N, M, 2});
auto shape_full = phi::make_ddim({N, M, 4});
phi::DenseTensor out_xy_0 = F.DivWithBroadCast(
F.SubWithBroadCast(tb_xy, pb_xy, shape_half), pb_wh, shape_half);
phi::DenseTensor out_wh_0 =
F.Log(F.Abs(F.DivWithBroadCast(tb_wh, pb_wh, shape_half)));
phi::DenseTensor out_0 = F.Concat({out_xy_0, out_wh_0}, shape_full, 2);
if (pbv) {
F.DivWithBroadCastVoid(out_0, *pbv, shape_full, out);
} else {
phi::DenseTensor t_var;
std::vector<T> vec_var(4);
for (auto i = 0; i < 4; i++) {
vec_var[i] = static_cast<T>(variance[i]);
}
Vector2Tensor(ctx, vec_var, phi::make_ddim({1, 1, 4}), &t_var);
F.DivWithBroadCastVoid(out_0, t_var, shape_full, out);
}
}
template <typename T>
void BoxCoderDec(const framework::ExecutionContext& ctx,
const phi::DenseTensor* tb,
const phi::DenseTensor* pb,
const phi::DenseTensor* pbv,
const bool norm,
const std::vector<float>& variance,
int axis,
phi::DenseTensor* out) {
auto shape_0 = phi::make_ddim({4, 2});
phi::DenseTensor m_diff;
phi::DenseTensor m_aver;
std::vector<T> vec_diff = {static_cast<T>(-1),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(-1),
static_cast<T>(1),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(1)};
std::vector<T> vec_aver = {static_cast<T>(0.5),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(0.5),
static_cast<T>(0.5),
static_cast<T>(0),
static_cast<T>(0),
static_cast<T>(0.5)};
Vector2Tensor<T>(ctx, vec_diff, shape_0, &m_diff);
Vector2Tensor<T>(ctx, vec_aver, shape_0, &m_aver);
BoxCoderFunction<T> F(ctx);
phi::DenseTensor pb_xy = F.Adds(F.Dot(*pb, m_aver), (norm ? 0 : 0.5));
phi::DenseTensor pb_wh = F.Adds(F.Dot(*pb, m_diff), (norm ? 0 : 1));
auto pb_resize_shape = axis == 0 ? phi::make_ddim({1, pb->dims()[0], 2})
: phi::make_ddim({pb->dims()[0], 1, 2});
pb_xy.Resize(pb_resize_shape);
pb_wh.Resize(pb_resize_shape);
auto tbox_slice_shape = phi::make_ddim({tb->dims()[0], tb->dims()[1], 2});
std::vector<int> tbox_slice_size = {
static_cast<int>(tb->dims()[0]), static_cast<int>(tb->dims()[1]), 2};
phi::DenseTensor tbox01 =
F.Slice(*tb, {0, 0, 0}, tbox_slice_size, tbox_slice_shape);
phi::DenseTensor tbox23 =
F.Slice(*tb, {0, 0, 2}, tbox_slice_size, tbox_slice_shape);
phi::DenseTensor tb_xy;
phi::DenseTensor tb_wh;
if (pbv) {
auto pbvt_slice_shape = phi::make_ddim({pbv->dims()[0], 2});
auto pbvt_resize_shape = axis == 0 ? phi::make_ddim({1, pbv->dims()[0], 2})
: phi::make_ddim({pbv->dims()[0], 1, 2});
std::vector<int> pbvt_slice_size = {static_cast<int>(pbv->dims()[0]), 2};
phi::DenseTensor pbv_t01 =
F.Slice(*pbv, {0, 0}, pbvt_slice_size, pbvt_slice_shape);
phi::DenseTensor pbv_t23 =
F.Slice(*pbv, {0, 2}, pbvt_slice_size, pbvt_slice_shape);
pbv_t01.Resize(pbvt_resize_shape);
pbv_t23.Resize(pbvt_resize_shape);
F.AddWithBroadCastVoid(
F.MulWithBroadCast(tbox01, F.Mul(pb_wh, pbv_t01), tbox_slice_shape),
pb_xy,
tbox_slice_shape,
&tb_xy);
F.MulWithBroadCastVoid(
F.Exp(F.MulWithBroadCast(pbv_t23, tbox23, tbox_slice_shape)),
pb_wh,
tbox_slice_shape,
&tb_wh);
} else if (variance.empty()) {
F.AddWithBroadCastVoid(F.MulWithBroadCast(tbox01, pb_wh, tbox_slice_shape),
pb_xy,
tbox_slice_shape,
&tb_xy);
F.MulWithBroadCastVoid(F.Exp(tbox23), pb_wh, tbox_slice_shape, &tb_wh);
} else {
phi::DenseTensor t_var01, t_var23;
auto t_var_shape = phi::make_ddim({1, 1, 2});
std::vector<T> vec_var01 = {static_cast<T>(variance[0]),
static_cast<T>(variance[1])};
std::vector<T> vec_var23 = {static_cast<T>(variance[2]),
static_cast<T>(variance[3])};
Vector2Tensor(ctx, vec_var01, t_var_shape, &t_var01);
Vector2Tensor(ctx, vec_var23, t_var_shape, &t_var23);
F.AddWithBroadCastVoid(
F.MulWithBroadCast(tbox01,
F.MulWithBroadCast(pb_wh, t_var01, pb_resize_shape),
tbox_slice_shape),
pb_xy,
tbox_slice_shape,
&tb_xy);
F.MulWithBroadCastVoid(
F.Exp(F.MulWithBroadCast(t_var23, tbox23, tbox_slice_shape)),
pb_wh,
tbox_slice_shape,
&tb_wh);
}
phi::DenseTensor obox01 =
F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, -0.5), tbox_slice_shape);
phi::DenseTensor obox23 =
F.Adds(F.AddWithBroadCast(tb_xy, F.Muls(tb_wh, 0.5), tbox_slice_shape),
(norm ? 0 : -1));
F.ConcatVoid({obox01, obox23}, out->dims(), 2, out);
}
template <typename T>
class BoxCoderNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* prior_box = ctx.Input<phi::DenseTensor>("PriorBox");
auto* prior_box_var = ctx.Input<phi::DenseTensor>("PriorBoxVar");
auto* target_box = ctx.Input<phi::DenseTensor>("TargetBox");
auto* output_box = ctx.Output<phi::DenseTensor>("OutputBox");
std::vector<float> variance = ctx.Attr<std::vector<float>>("variance");
const int axis = ctx.Attr<int>("axis");
if (prior_box_var) {
PADDLE_ENFORCE_EQ(variance.empty(),
true,
platform::errors::InvalidArgument(
"Input 'PriorBoxVar' and attribute 'variance'"
" of BoxCoder operator should not be used at the "
"same time."));
}
if (!(variance.empty())) {
PADDLE_ENFORCE_EQ(static_cast<int>(variance.size()),
4,
platform::errors::InvalidArgument(
"Size of attribute 'variance' in BoxCoder operator"
" should be 4. But received size is %d",
variance.size()));
}
if (target_box->lod().size()) {
PADDLE_ENFORCE_EQ(target_box->lod().size(),
1,
platform::errors::InvalidArgument(
"Input 'TargetBox' of BoxCoder operator only"
" supports LoD with one level."));
}
auto code_type =
phi::funcs::GetBoxCodeType(ctx.Attr<std::string>("code_type"));
bool normalized = ctx.Attr<bool>("box_normalized");
if (code_type == phi::funcs::BoxCodeType::kEncodeCenterSize) {
BoxCoderEnc<T>(ctx,
target_box,
prior_box,
prior_box_var,
normalized,
variance,
output_box);
} else {
BoxCoderDec<T>(ctx,
target_box,
prior_box,
prior_box_var,
normalized,
variance,
axis,
output_box);
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL(box_coder,
ops::BoxCoderNPUKernel<float>,
ops::BoxCoderNPUKernel<plat::float16>);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/density_prior_box_op.h"
namespace paddle {
namespace operators {
using fp16 = paddle::platform::float16;
template <typename T>
struct DensityPriorBoxFunction {
public:
explicit DensityPriorBoxFunction(const framework::ExecutionContext& ctx)
: ctx(ctx) {
place = ctx.GetPlace();
stream = ctx.template device_context<platform::NPUDeviceContext>().stream();
t0.mutable_data<float>({1}, place);
t1.mutable_data<float>({1}, place);
tn.mutable_data<float>({1}, place);
FillNpuTensorWithConstant<float>(&t0, static_cast<float>(0));
FillNpuTensorWithConstant<float>(&t1, static_cast<float>(1));
}
void Arange(int n, phi::DenseTensor* x) {
// x should be init first
FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n));
const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {*x}, {});
runner.Run(stream);
}
void Add(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// z should be init first
const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Cast(const phi::DenseTensor* x, phi::DenseTensor* y) {
auto dst_dtype =
ConvertToNpuDtype(framework::TransToProtoVarType(y->type()));
const auto& runner = NpuOpRunner(
"Cast", {*x}, {*y}, {{"dst_type", static_cast<int>(dst_dtype)}});
runner.Run(stream);
}
void Sub(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// z should be init first
const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Mul(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) {
// y should be init first
const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}});
runner.Run(stream);
}
void Muls(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) {
// y should be init first
const auto& runner = NpuOpRunner("Muls", {*x}, {*y}, {{"value", scalar}});
runner.Run(stream);
}
void Maximum(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Minimum(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Concat(const std::vector<phi::DenseTensor>& inputs,
int axis,
phi::DenseTensor* output) {
// output should be init first
std::vector<std::string> names;
for (size_t i = 0; i < inputs.size(); i++) {
names.push_back("x" + std::to_string(i));
}
NpuOpRunner runner{
"ConcatD",
{inputs},
{*output},
{{"concat_dim", axis}, {"N", static_cast<int>(inputs.size())}}};
runner.AddInputNames(names);
runner.Run(stream);
}
void Tile(const phi::DenseTensor* x,
phi::DenseTensor* y,
const std::vector<int>& multiples) {
// y should be init first
if (x->dims() == y->dims()) {
framework::TensorCopy(
*x,
place,
ctx.template device_context<platform::NPUDeviceContext>(),
y);
return;
}
const auto& runner =
NpuOpRunner("TileD", {*x}, {*y}, {{"multiples", multiples}});
runner.Run(stream);
}
void FloatVec2Tsr(const std::vector<float>& vec, phi::DenseTensor* tsr_dst) {
//
framework::TensorFromVector<T>(vec, ctx.device_context(), tsr_dst);
ctx.template device_context<platform::NPUDeviceContext>().Wait();
}
private:
platform::Place place;
aclrtStream stream;
const framework::ExecutionContext& ctx;
phi::DenseTensor t0;
phi::DenseTensor t1;
phi::DenseTensor tn;
};
template <>
void DensityPriorBoxFunction<fp16>::Arange(int n, phi::DenseTensor* x) {
phi::DenseTensor x_fp32(phi::DataType::FLOAT32);
x_fp32.mutable_data<float>(x->dims(), place);
FillNpuTensorWithConstant<float>(&tn, static_cast<float>(n));
const auto& runner = NpuOpRunner("Range", {t0, tn, t1}, {x_fp32}, {});
runner.Run(stream);
Cast(&x_fp32, x);
}
template <>
void DensityPriorBoxFunction<fp16>::FloatVec2Tsr(const std::vector<float>& vec,
phi::DenseTensor* tsr_dst) {
phi::DenseTensor tsr_fp32(phi::DataType::FLOAT32);
tsr_fp32.mutable_data<float>(tsr_dst->dims(), place);
framework::TensorFromVector<float>(vec, ctx.device_context(), &tsr_fp32);
ctx.template device_context<paddle::platform::NPUDeviceContext>().Wait();
Cast(&tsr_fp32, tsr_dst);
}
template <typename T>
class DensityPriorBoxOpNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<phi::DenseTensor>("Input");
auto* image = ctx.Input<phi::DenseTensor>("Image");
auto* boxes = ctx.Output<phi::DenseTensor>("Boxes");
auto* vars = ctx.Output<phi::DenseTensor>("Variances");
auto variances = ctx.Attr<std::vector<float>>("variances");
auto clip = ctx.Attr<bool>("clip");
auto fixed_sizes = ctx.Attr<std::vector<float>>("fixed_sizes");
auto fixed_ratios = ctx.Attr<std::vector<float>>("fixed_ratios");
auto densities = ctx.Attr<std::vector<int>>("densities");
float step_w = ctx.Attr<float>("step_w");
float step_h = ctx.Attr<float>("step_h");
float offset = ctx.Attr<float>("offset");
int image_w = image->dims()[3];
int image_h = image->dims()[2];
int layer_w = input->dims()[3];
int layer_h = input->dims()[2];
auto _type = input->dtype();
auto place = ctx.GetPlace();
DensityPriorBoxFunction<T> F(ctx);
phi::DenseTensor h(_type);
h.mutable_data<T>({layer_h}, place);
phi::DenseTensor w(_type);
w.mutable_data<T>({layer_w}, place);
F.Arange(layer_h, &h);
F.Arange(layer_w, &w);
h.Resize({layer_h, 1, 1, 1});
w.Resize({1, layer_w, 1, 1});
step_w = step_w > 0 ? step_w : static_cast<float>(image_w) / layer_w;
step_h = step_h > 0 ? step_h : static_cast<float>(image_h) / layer_h;
int step_average = static_cast<int>((step_w + step_h) * 0.5);
int ratios_size = fixed_ratios.size();
int num_priors_per_ratio = 0;
for (size_t i = 0; i < densities.size(); ++i) {
num_priors_per_ratio += densities[i] * densities[i];
}
phi::DenseTensor di(_type);
phi::DenseTensor dj(_type);
phi::DenseTensor shifts(_type);
phi::DenseTensor box_w_ratio(_type);
phi::DenseTensor box_h_ratio(_type);
di.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
dj.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
shifts.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
box_w_ratio.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
box_h_ratio.mutable_data<T>({ratios_size * num_priors_per_ratio}, place);
int64_t start = 0;
std::vector<int> vec_tile = {0, 0, 0};
for (size_t i = 0; i < densities.size(); ++i) {
// Range = start:start+ratios_size*density_sqr, density = densities[i]
int density_sqr = densities[i] * densities[i];
// shifts[Range] = [step_average/density]*ratios_size*density_sqr
phi::DenseTensor shifts_part =
shifts.Slice(start, start + ratios_size * density_sqr);
FillNpuTensorWithConstant<T>(&shifts_part,
static_cast<T>(step_average / densities[i]));
// di[Range] = [ i // density for i in range(density_sqr) ] * ratios_size
// dj[Range] = [ i % density for i in range(density_sqr) ] * ratios_size
phi::DenseTensor di_part =
di.Slice(start, start + ratios_size * density_sqr);
phi::DenseTensor dj_part =
dj.Slice(start, start + ratios_size * density_sqr);
if (densities[i] > 1) {
di_part.Resize({ratios_size, densities[i], densities[i]});
dj_part.Resize({ratios_size, densities[i], densities[i]});
phi::DenseTensor range_n(_type);
range_n.mutable_data<T>({densities[i]}, place);
F.Arange(densities[i], &range_n);
range_n.Resize({1, densities[i], 1});
vec_tile[0] = ratios_size;
vec_tile[1] = 1;
vec_tile[2] = densities[i];
F.Tile(&range_n, &di_part, vec_tile);
range_n.Resize({1, 1, densities[i]});
vec_tile[1] = densities[i];
vec_tile[2] = 1;
F.Tile(&range_n, &dj_part, vec_tile);
} else {
FillNpuTensorWithConstant<T>(&di_part, static_cast<T>(0));
FillNpuTensorWithConstant<T>(&dj_part, static_cast<T>(0));
}
int start_box_ratio = start;
for (float ar : fixed_ratios) {
// Range_mini = start_box_ratio:start_box_ratio+density_sqr
// box_h_ratio[Range_mini] = [fixed_sizes[i] * sqrt(ar)] * density_sqr
// box_w_ratio[Range_mini] = [fixed_sizes[i] / sqrt(ar)] * density_sqr
phi::DenseTensor box_h_ratio_part =
box_h_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr);
phi::DenseTensor box_w_ratio_part =
box_w_ratio.Slice(start_box_ratio, start_box_ratio + density_sqr);
FillNpuTensorWithConstant<T>(&box_w_ratio_part,
static_cast<T>(fixed_sizes[i] * sqrt(ar)));
FillNpuTensorWithConstant<T>(&box_h_ratio_part,
static_cast<T>(fixed_sizes[i] / sqrt(ar)));
start_box_ratio += density_sqr;
}
start = start_box_ratio;
}
di.Resize({1, 1, ratios_size * num_priors_per_ratio, 1});
dj.Resize({1, 1, ratios_size * num_priors_per_ratio, 1});
shifts.Resize({1, 1, ratios_size * num_priors_per_ratio, 1});
box_w_ratio.Resize({1, 1, ratios_size * num_priors_per_ratio, 1});
box_h_ratio.Resize({1, 1, ratios_size * num_priors_per_ratio, 1});
// c_x = (w+offset)*step_w - 0.5*step_average + 0.5*shifts + dj*shifts
// c_y = (h+offset)*step_h - 0.5*step_average + 0.5*shifts + di*shifts
phi::DenseTensor c_x(_type);
phi::DenseTensor c_y(_type);
auto dim0 =
phi::make_ddim({1, layer_w, ratios_size * num_priors_per_ratio, 1});
auto dim1 =
phi::make_ddim({layer_h, 1, ratios_size * num_priors_per_ratio, 1});
c_x.mutable_data<T>(dim0, place);
c_y.mutable_data<T>(dim1, place);
F.Adds(&w, offset, &w);
F.Muls(&w, step_w, &w);
F.Adds(&w, static_cast<float>(-step_average) * static_cast<float>(0.5), &w);
F.Adds(&h, offset, &h);
F.Muls(&h, step_h, &h);
F.Adds(&h, static_cast<float>(-step_average) * static_cast<float>(0.5), &h);
F.Mul(&di, &shifts, &di);
F.Mul(&dj, &shifts, &dj);
F.Muls(&shifts, static_cast<float>(0.5), &shifts);
F.Add(&di, &shifts, &di);
F.Add(&dj, &shifts, &dj);
F.Add(&dj, &w, &c_x);
F.Add(&di, &h, &c_y);
// box_w_ratio = box_w_ratio / 2
// box_h_ratio = box_h_ratio / 2
F.Muls(&box_w_ratio, static_cast<float>(0.5), &box_w_ratio);
F.Muls(&box_h_ratio, static_cast<float>(0.5), &box_h_ratio);
phi::DenseTensor zero_t(_type);
phi::DenseTensor one_t(_type);
zero_t.mutable_data<T>({1}, place);
one_t.mutable_data<T>({1}, place);
FillNpuTensorWithConstant<T>(&zero_t, static_cast<T>(0));
FillNpuTensorWithConstant<T>(&one_t, static_cast<T>(1));
phi::DenseTensor outbox0(_type);
phi::DenseTensor outbox1(_type);
phi::DenseTensor outbox2(_type);
phi::DenseTensor outbox3(_type);
outbox0.mutable_data<T>(dim0, place);
outbox1.mutable_data<T>(dim1, place);
outbox2.mutable_data<T>(dim0, place);
outbox3.mutable_data<T>(dim1, place);
// outbox0 = max ( (c_x - box_w_ratio)/image_w, 0 )
// outbox1 = max ( (c_y - box_h_ratio)/image_h, 0 )
// outbox2 = min ( (c_x + box_w_ratio)/image_w, 1 )
// outbox3 = min ( (c_y + box_h_ratio)/image_h, 1 )
F.Sub(&c_x, &box_w_ratio, &outbox0);
F.Sub(&c_y, &box_h_ratio, &outbox1);
F.Add(&c_x, &box_w_ratio, &outbox2);
F.Add(&c_y, &box_h_ratio, &outbox3);
F.Muls(&outbox0, static_cast<float>(1.0 / image_w), &outbox0);
F.Muls(&outbox1, static_cast<float>(1.0 / image_h), &outbox1);
F.Muls(&outbox2, static_cast<float>(1.0 / image_w), &outbox2);
F.Muls(&outbox3, static_cast<float>(1.0 / image_h), &outbox3);
F.Maximum(&outbox0, &zero_t, &outbox0);
F.Maximum(&outbox1, &zero_t, &outbox1);
F.Minimum(&outbox2, &one_t, &outbox2);
F.Minimum(&outbox3, &one_t, &outbox3);
if (clip) {
// outbox0 = min ( outbox0, 1 )
// outbox1 = min ( outbox1, 1 )
// outbox2 = max ( outbox2, 0 )
// outbox3 = max ( outbox3, 0 )
F.Minimum(&outbox0, &one_t, &outbox0);
F.Minimum(&outbox1, &one_t, &outbox1);
F.Maximum(&outbox2, &zero_t, &outbox2);
F.Maximum(&outbox3, &zero_t, &outbox3);
}
auto out_dim = phi::make_ddim(
{layer_h, layer_w, ratios_size * num_priors_per_ratio, 4});
boxes->mutable_data<T>(place);
vars->mutable_data<T>(place);
phi::DenseTensor boxes_share(_type);
phi::DenseTensor vars_share(_type);
boxes_share.ShareDataWith(*boxes);
boxes_share.Resize(out_dim);
vars_share.ShareDataWith(*vars);
vars_share.Resize(out_dim);
phi::DenseTensor box0(_type);
phi::DenseTensor box1(_type);
phi::DenseTensor box2(_type);
phi::DenseTensor box3(_type);
// out_dim = {layer_h, layer_w, ratios_size*num_priors_per_ratio, 1}
out_dim[3] = 1;
box0.mutable_data<T>(out_dim, place);
box1.mutable_data<T>(out_dim, place);
box2.mutable_data<T>(out_dim, place);
box3.mutable_data<T>(out_dim, place);
std::vector<int> vec_exp_out02 = {layer_h, 1, 1, 1};
std::vector<int> vec_exp_out13 = {1, layer_w, 1, 1};
F.Tile(&outbox0, &box0, vec_exp_out02);
F.Tile(&outbox1, &box1, vec_exp_out13);
F.Tile(&outbox2, &box2, vec_exp_out02);
F.Tile(&outbox3, &box3, vec_exp_out13);
F.Concat({box0, box1, box2, box3}, 3, &boxes_share);
std::vector<int> multiples = {
layer_h, layer_w, ratios_size * num_priors_per_ratio, 1};
phi::DenseTensor variances_t(_type);
// variances.size() == 4
variances_t.mutable_data<T>({4}, place);
F.FloatVec2Tsr(variances, &variances_t);
F.Tile(&variances_t, &vars_share, multiples);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL(density_prior_box,
ops::DensityPriorBoxOpNPUKernel<plat::float16>,
ops::DensityPriorBoxOpNPUKernel<float>);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/iou_similarity_op.h"
namespace paddle {
namespace operators {
template <typename T>
struct IouFunction {
public:
explicit IouFunction(const framework::ExecutionContext& ctx) : ctx(ctx) {
place = ctx.GetPlace();
stream = ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
}
void Transpose(const phi::DenseTensor* x,
phi::DenseTensor* y,
const std::vector<int>& axis) {
// y should be init first
const auto& runner =
NpuOpRunner("TransposeD", {*x}, {*y}, {{"perm", axis}});
runner.Run(stream);
}
void Add(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("AddV2", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Sub(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("Sub", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Mul(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("Mul", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void DivNoNan(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// y should be init first
const auto& runner = NpuOpRunner("DivNoNan", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Adds(const phi::DenseTensor* x, float scalar, phi::DenseTensor* y) {
// y should be init first
const auto& runner = NpuOpRunner("Adds", {*x}, {*y}, {{"value", scalar}});
runner.Run(stream);
}
void Maximum(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// z should be init first
const auto& runner = NpuOpRunner("Maximum", {*x, *y}, {*z}, {});
runner.Run(stream);
}
void Minimum(const phi::DenseTensor* x,
const phi::DenseTensor* y,
phi::DenseTensor* z) {
// z should be init first
const auto& runner = NpuOpRunner("Minimum", {*x, *y}, {*z}, {});
runner.Run(stream);
}
private:
platform::Place place;
aclrtStream stream;
const framework::ExecutionContext& ctx;
};
template <typename T>
class IouSimilarityNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* x = ctx.Input<phi::DenseTensor>("X");
auto* y = ctx.Input<phi::DenseTensor>("Y");
bool normalized = ctx.Attr<bool>("box_normalized");
auto* out = ctx.Output<phi::DenseTensor>("Out");
auto _type = x->dtype();
auto place = ctx.GetPlace();
IouFunction<T> F(ctx);
auto N = x->dims()[0];
auto M = y->dims()[0];
out->mutable_data<T>({N, M}, place);
phi::DenseTensor xt(_type);
phi::DenseTensor yt(_type);
xt.mutable_data<T>({4, N}, place);
yt.mutable_data<T>({4, M}, place);
std::vector<int> vec_trans = {1, 0};
F.Transpose(x, &xt, vec_trans);
F.Transpose(y, &yt, vec_trans);
phi::DenseTensor xmin1 = xt.Slice(0, 1);
phi::DenseTensor ymin1 = xt.Slice(1, 2);
phi::DenseTensor xmax1 = xt.Slice(2, 3);
phi::DenseTensor ymax1 = xt.Slice(3, 4);
phi::DenseTensor xmin2 = yt.Slice(0, 1);
phi::DenseTensor ymin2 = yt.Slice(1, 2);
phi::DenseTensor xmax2 = yt.Slice(2, 3);
phi::DenseTensor ymax2 = yt.Slice(3, 4);
xmin1.Resize({N, 1});
ymin1.Resize({N, 1});
xmax1.Resize({N, 1});
ymax1.Resize({N, 1});
xmin2.Resize({1, M});
ymin2.Resize({1, M});
xmax2.Resize({1, M});
ymax2.Resize({1, M});
phi::DenseTensor w1(_type);
phi::DenseTensor h1(_type);
phi::DenseTensor w2(_type);
phi::DenseTensor h2(_type);
phi::DenseTensor area1(_type);
phi::DenseTensor area2(_type);
w1.mutable_data<T>({N, 1}, place);
h1.mutable_data<T>({N, 1}, place);
w2.mutable_data<T>({1, M}, place);
h2.mutable_data<T>({1, M}, place);
area1.mutable_data<T>({N, 1}, place);
area2.mutable_data<T>({1, M}, place);
F.Sub(&xmax1, &xmin1, &w1);
F.Sub(&ymax1, &ymin1, &h1);
F.Sub(&xmax2, &xmin2, &w2);
F.Sub(&ymax2, &ymin2, &h2);
if (!normalized) {
F.Adds(&w1, 1.0f, &w1);
F.Adds(&h1, 1.0f, &h1);
F.Adds(&w2, 1.0f, &w2);
F.Adds(&h2, 1.0f, &h2);
}
F.Mul(&w1, &h1, &area1);
F.Mul(&w2, &h2, &area2);
phi::DenseTensor inter_xmax(_type);
phi::DenseTensor inter_ymax(_type);
phi::DenseTensor inter_xmin(_type);
phi::DenseTensor inter_ymin(_type);
inter_xmax.mutable_data<T>({N, M}, place);
inter_ymax.mutable_data<T>({N, M}, place);
inter_xmin.mutable_data<T>({N, M}, place);
inter_ymin.mutable_data<T>({N, M}, place);
F.Minimum(&xmax1, &xmax2, &inter_xmax);
F.Minimum(&ymax1, &ymax2, &inter_ymax);
F.Maximum(&xmin1, &xmin2, &inter_xmin);
F.Maximum(&ymin1, &ymin2, &inter_ymin);
phi::DenseTensor inter_w(_type);
phi::DenseTensor inter_h(_type);
inter_w.mutable_data<T>({N, M}, place);
inter_h.mutable_data<T>({N, M}, place);
F.Sub(&inter_xmax, &inter_xmin, &inter_w);
F.Sub(&inter_ymax, &inter_ymin, &inter_h);
if (!normalized) {
F.Adds(&inter_w, 1.0f, &inter_w);
F.Adds(&inter_h, 1.0f, &inter_h);
}
phi::DenseTensor zeros(_type);
zeros.mutable_data<T>({1}, place);
FillNpuTensorWithConstant<T>(&zeros, static_cast<T>(0));
F.Maximum(&inter_w, &zeros, &inter_w);
F.Maximum(&inter_h, &zeros, &inter_h);
F.Mul(&inter_w, &inter_h, out);
phi::DenseTensor union_area(_type);
union_area.mutable_data<T>({N, M}, place);
F.Add(&area1, &area2, &union_area);
F.Sub(&union_area, out, &union_area);
F.DivNoNan(out, &union_area, out);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL(iou_similarity,
ops::IouSimilarityNPUKernel<float>,
ops::IouSimilarityNPUKernel<plat::float16>);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/detection/prior_box_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class PriorBoxNPUKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<phi::DenseTensor>("Input");
auto* image = ctx.Input<phi::DenseTensor>("Image");
auto* boxes = ctx.Output<phi::DenseTensor>("Boxes");
auto* variances = ctx.Output<phi::DenseTensor>("Variances");
PADDLE_ENFORCE_EQ(boxes->dims(),
variances->dims(),
platform::errors::Unimplemented(
"the shape of boxes and variances must be same in "
"the npu kernel of prior_box, but got boxes->dims() "
"= [%s], variances->dims() = [%s]",
boxes->dims(),
variances->dims()));
auto min_sizes = ctx.Attr<std::vector<float>>("min_sizes");
auto max_sizes = ctx.Attr<std::vector<float>>("max_sizes");
auto aspect_ratios = ctx.Attr<std::vector<float>>("aspect_ratios");
auto variances_attr = ctx.Attr<std::vector<float>>("variances");
bool flip = ctx.Attr<bool>("flip");
bool clip = ctx.Attr<bool>("clip");
float step_w = ctx.Attr<float>("step_w");
float step_h = ctx.Attr<float>("step_h");
float offset = ctx.Attr<float>("offset");
auto place = ctx.GetPlace();
phi::DenseTensor out(input->type());
auto out_dims = phi::vectorize(boxes->dims());
out_dims.insert(out_dims.begin(), 2);
out.Resize(phi::make_ddim(out_dims));
out.mutable_data<T>(place);
framework::NPUAttributeMap attr_input = {{"min_size", min_sizes},
{"max_size", max_sizes},
{"aspect_ratio", aspect_ratios},
{"step_h", step_h},
{"step_w", step_w},
{"flip", flip},
{"clip", clip},
{"offset", offset},
{"variance", variances_attr}};
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
const auto& runner =
NpuOpRunner("PriorBox", {*input, *image}, {out}, attr_input);
runner.Run(stream);
out.Resize(phi::make_ddim({out.numel()}));
phi::DenseTensor out_boxes = out.Slice(0, boxes->numel());
phi::DenseTensor out_variances = out.Slice(boxes->numel(), out.numel());
out_boxes.Resize(boxes->dims());
out_variances.Resize(variances->dims());
boxes->mutable_data<T>(place);
variances->mutable_data<T>(place);
framework::TensorCopy(
out_boxes,
place,
ctx.template device_context<platform::NPUDeviceContext>(),
boxes);
framework::TensorCopy(
out_variances,
place,
ctx.template device_context<platform::NPUDeviceContext>(),
variances);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
REGISTER_OP_NPU_KERNEL(
prior_box,
ops::PriorBoxNPUKernel<plat::NPUDeviceContext, float>,
ops::PriorBoxNPUKernel<plat::NPUDeviceContext, plat::float16>);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册