提交 266965e2 编写于 作者: Z zhangwen31

[arm][kernel][math] feat: add i32 support for elementwise div

上级 dfcd059b
...@@ -44,6 +44,12 @@ static T naive_sub(T l, T r) { ...@@ -44,6 +44,12 @@ static T naive_sub(T l, T r) {
return l - r; return l - r;
} }
// todo: remove this function when all elementwise div works
template <typename T>
static T naive_div(T l, T r) {
return l / r;
}
// todo: use arm intrinsics // todo: use arm intrinsics
template <> template <>
void elementwise_add<int32_t>(const int32_t* dinx, void elementwise_add<int32_t>(const int32_t* dinx,
...@@ -1511,6 +1517,15 @@ void elementwise_max_relu_broadcast<float>(const float* dinx, ...@@ -1511,6 +1517,15 @@ void elementwise_max_relu_broadcast<float>(const float* dinx,
} }
} }
// todo: use arm intrinsics
template <>
void elementwise_div<int32_t>(const int32_t* dinx,
const int32_t* diny,
int32_t* dout,
int num) {
naive_elementwise_op<int32_t>(dinx, diny, dout, num, naive_div<int32_t>);
}
template <> template <>
void elementwise_div<int64_t>(const int64_t* dinx, void elementwise_div<int64_t>(const int64_t* dinx,
const int64_t* diny, const int64_t* diny,
...@@ -1576,6 +1591,18 @@ void elementwise_div<float>(const float* dinx, ...@@ -1576,6 +1591,18 @@ void elementwise_div<float>(const float* dinx,
} }
} }
// todo: use arm intrinsics
template <>
void elementwise_div_broadcast<int32_t>(const int32_t* dinx,
const int32_t* diny,
int32_t* dout,
int batch,
int channels,
int num) {
naive_elementwise_op_broadcast<int32_t>(
dinx, diny, dout, batch, channels, num, naive_div<int32_t>);
}
template <> template <>
void elementwise_div_broadcast<int64_t>(const int64_t* dinx, void elementwise_div_broadcast<int64_t>(const int64_t* dinx,
const int64_t* diny, const int64_t* diny,
......
...@@ -548,6 +548,16 @@ REGISTER_LITE_KERNEL( ...@@ -548,6 +548,16 @@ REGISTER_LITE_KERNEL(
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize(); .Finalize();
using elementwise_div_int32_t =
paddle::lite::kernels::arm::ElementwiseDivCompute<int32_t,
PRECISION(kInt32)>;
REGISTER_LITE_KERNEL(
elementwise_div, kARM, kInt32, kNCHW, elementwise_div_int32_t, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt32))})
.Finalize();
using elementwise_div_int64_t = using elementwise_div_int64_t =
paddle::lite::kernels::arm::ElementwiseDivCompute<int64_t, paddle::lite::kernels::arm::ElementwiseDivCompute<int64_t,
PRECISION(kInt64)>; PRECISION(kInt64)>;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册