未验证 提交 4e9852e7 编写于 作者: Y yiicy 提交者: GitHub

[ARM] add elementwise mod operator, test=develop (#3726)

上级 07ae2599
......@@ -11,8 +11,8 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/arm/math/elementwise.h"
#include <math.h>
#include <algorithm>
#include "lite/backends/arm/math/funcs.h"
......@@ -1541,6 +1541,87 @@ void elementwise_div_relu_broadcast<float>(const float* dinx,
}
}
template <typename T>
void elementwise_mod_broadcast(
const T* dinx, const T* diny, T* dout, int batch, int channels, int num) {
#pragma omp parallel for collapse(2)
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const T* din_ptr = dinx + offset;
const T diny_data = diny[j];
T* dout_ptr = dout + offset;
int cnt = num >> 2;
int remain = num % 4;
for (int k = 0; k < cnt; ++k) {
register T dinx0 = din_ptr[0];
register T dinx1 = din_ptr[1];
register T dinx2 = din_ptr[2];
register T dinx3 = din_ptr[3];
dout_ptr[0] = dinx0 % diny_data;
dout_ptr[1] = dinx1 % diny_data;
dout_ptr[2] = dinx2 % diny_data;
dout_ptr[3] = dinx3 % diny_data;
din_ptr += 4;
dout_ptr += 4;
}
if (remain > 0) {
for (int p = 0; p < remain; p++) {
*dout_ptr++ = *din_ptr++ % diny_data;
}
}
}
}
}
template <typename T>
void elementwise_mod(const T* dinx, const T* diny, T* dout, int num) {
int cnt = num >> 2;
int remain = num % 4;
#pragma omp parallel for
for (int i = 0; i < cnt; i++) {
const T* dinx_ptr = dinx + (i << 2);
const T* diny_ptr = diny + (i << 2);
T* dout_ptr = dout + (i << 2);
register T dinx0 = dinx_ptr[0];
register T dinx1 = dinx_ptr[1];
register T dinx2 = dinx_ptr[2];
register T dinx3 = dinx_ptr[3];
register T diny0 = diny_ptr[0];
register T diny1 = diny_ptr[1];
register T diny2 = diny_ptr[2];
register T diny3 = diny_ptr[3];
dout_ptr[0] = dinx0 % diny0;
dout_ptr[1] = dinx1 % diny1;
dout_ptr[2] = dinx2 % diny2;
dout_ptr[3] = dinx3 % diny3;
}
if (remain > 0) {
const T* dinx_ptr = dinx + (cnt << 2);
const T* diny_ptr = diny + (cnt << 2);
T* dout_ptr = dout + (cnt << 2);
for (int i = 0; i < remain; i++) {
*dout_ptr++ = *dinx_ptr++ % *diny_ptr++;
}
}
}
template void elementwise_mod<int64_t>(const int64_t* dinx,
const int64_t* diny,
int64_t* dout,
int num);
template void elementwise_mod_broadcast<int64_t>(const int64_t* dinx,
const int64_t* diny,
int64_t* dout,
int batch,
int channels,
int num);
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -253,6 +253,13 @@ template <typename T>
void elementwise_div_relu_broadcast(
const T* dinx, const T* diny, T* dout, int batch, int channels, int num);
template <typename T>
void elementwise_mod(const T* dinx, const T* diny, T* dout, int num);
template <typename T>
void elementwise_mod_broadcast(
const T* dinx, const T* diny, T* dout, int batch, int channels, int num);
} // namespace math
} // namespace arm
} // namespace lite
......
......@@ -351,6 +351,29 @@ void ElementwiseDivActivationCompute::Run() {
}
}
template <typename T, PrecisionType PType>
void ElementwiseModCompute<T, PType>::Run() {
auto& param = this->template Param<operators::ElementwiseParam>();
auto* x_data = param.X->template data<T>();
auto* y_data = param.Y->template data<T>();
auto* out_data = param.Out->template mutable_data<T>();
int axis = param.axis;
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
int pre, n, post;
if (x_dims.size() < y_dims.size() &&
is_broadcast(y_dims, x_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_mod_broadcast<T>(
y_data, x_data, out_data, pre, n, post);
} else if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) {
lite::arm::math::elementwise_mod_broadcast<T>(
x_data, y_data, out_data, pre, n, post);
} else {
lite::arm::math::elementwise_mod<T>(
x_data, y_data, out_data, x_dims.production());
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
......@@ -487,3 +510,13 @@ REGISTER_LITE_KERNEL(
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
using elementwise_mod_int64 =
paddle::lite::kernels::arm::ElementwiseModCompute<int64_t,
PRECISION(kInt64)>;
REGISTER_LITE_KERNEL(
elementwise_mod, kARM, kInt64, kNCHW, elementwise_mod_int64, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kInt64))})
.Finalize();
......@@ -102,6 +102,22 @@ class ElementwiseDivActivationCompute
virtual ~ElementwiseDivActivationCompute() = default;
};
template <typename T, PrecisionType PType>
class ElementwiseModCompute : public KernelLite<TARGET(kARM), PType> {
public:
void Run() override;
virtual ~ElementwiseModCompute() = default;
};
// class ElementwiseModActivationCompute
// : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
// public:
// void Run() override;
// virtual ~ElementwiseModActivationCompute() = default;
// };
} // namespace arm
} // namespace kernels
} // namespace lite
......
......@@ -14,6 +14,7 @@
#include "lite/kernels/arm/elementwise_compute.h"
#include <gtest/gtest.h>
#include <cmath>
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
......@@ -140,6 +141,119 @@ void elementwise_compute_ref(const operators::ElementwiseParam& param,
}
}
template <typename dtype>
void elementwise_fmod_compute_ref(const operators::ElementwiseParam& param,
const std::string act_type) {
const dtype* x_data = param.X->data<const dtype>();
const dtype* y_data = param.Y->data<const dtype>();
dtype* out_data = param.Out->mutable_data<dtype>();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
int axis = param.axis;
if (axis < 0) {
axis = x_dims.size() - y_dims.size();
}
int batch = 1;
int channels = 1;
int num = 1;
for (int i = 0; i < axis; ++i) {
batch *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
channels *= y_dims[i];
}
for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) {
num *= x_dims[i];
}
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const dtype* din_ptr = x_data + offset;
const dtype diny_data = y_data[j];
dtype* dout_ptr = out_data + offset;
for (int k = 0; k < num; ++k) {
*dout_ptr = fmod(diny_data + fmod(*din_ptr, diny_data), diny_data);
dout_ptr++;
din_ptr++;
}
}
}
// do activation relu
if (act_type.size() > 0) {
if (act_type == "relu") {
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
dtype* dout_ptr = out_data + (i * channels + j) * num;
for (int k = 0; k < num; ++k) {
*dout_ptr = *dout_ptr > 0.0f ? *dout_ptr : 0.0f;
dout_ptr++;
}
}
}
}
}
}
template <typename dtype>
void elementwise_imod_compute_ref(const operators::ElementwiseParam& param,
const std::string act_type) {
const dtype* x_data = param.X->data<const dtype>();
const dtype* y_data = param.Y->data<const dtype>();
dtype* out_data = param.Out->mutable_data<dtype>();
auto x_dims = param.X->dims();
auto y_dims = param.Y->dims();
int axis = param.axis;
if (axis < 0) {
axis = x_dims.size() - y_dims.size();
}
int batch = 1;
int channels = 1;
int num = 1;
for (int i = 0; i < axis; ++i) {
batch *= x_dims[i];
}
for (int i = 0; i < y_dims.size(); ++i) {
channels *= y_dims[i];
}
for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) {
num *= x_dims[i];
}
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
int offset = (i * channels + j) * num;
const dtype* din_ptr = x_data + offset;
const dtype diny_data = y_data[j];
dtype* dout_ptr = out_data + offset;
for (int k = 0; k < num; ++k) {
*dout_ptr = (*din_ptr) % diny_data;
dout_ptr++;
din_ptr++;
}
}
}
// do activation relu
if (act_type.size() > 0) {
if (act_type == "relu") {
for (int i = 0; i < batch; ++i) {
for (int j = 0; j < channels; ++j) {
dtype* dout_ptr = out_data + (i * channels + j) * num;
for (int k = 0; k < num; ++k) {
*dout_ptr = *dout_ptr > 0.0f ? *dout_ptr : 0.0f;
dout_ptr++;
}
}
}
}
}
}
template void elementwise_fmod_compute_ref<float>(
const operators::ElementwiseParam& param, const std::string act_type);
template void elementwise_imod_compute_ref<int32_t>(
const operators::ElementwiseParam& param, const std::string act_type);
template void elementwise_imod_compute_ref<int64_t>(
const operators::ElementwiseParam& param, const std::string act_type);
TEST(elementwise_add, compute) {
ElementwiseAddCompute elementwise_add;
operators::ElementwiseParam param;
......@@ -685,7 +799,7 @@ TEST(fusion_elementwise_max_activation_arm, compute) {
}
for (int i = 0; i < y_dim.production(); i++) {
float sign = i % 2 == 0 ? 0.5f : -0.5f;
y_data[i] = i * sign;
y_data[i] = (i + 1) * sign;
}
param.X = &x;
param.Y = &y;
......@@ -708,6 +822,108 @@ TEST(fusion_elementwise_max_activation_arm, compute) {
}
}
TEST(elementwise_mod_int64_arm, retrive_op) {
auto elementwise_mod =
KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kInt64)>(
"elementwise_mod");
ASSERT_FALSE(elementwise_mod.empty());
ASSERT_TRUE(elementwise_mod.front());
}
TEST(elementwise_mod_int64_arm, init) {
ElementwiseModCompute<int64_t, PRECISION(kInt64)> elementwise_mod;
ASSERT_EQ(elementwise_mod.precision(), PRECISION(kInt64));
ASSERT_EQ(elementwise_mod.target(), TARGET(kARM));
}
TEST(elementwise_mod_int64_arm, compute) {
ElementwiseModCompute<int64_t, PRECISION(kInt64)> elementwise_mod;
operators::ElementwiseParam param;
lite::Tensor x, y, output, output_ref;
#if 1
for (auto n : {1, 3, 4}) {
for (auto c : {1, 3, 4}) {
for (auto h : {1, 3, 4}) {
for (auto w : {1, 3, 4}) {
for (auto axis : {-1, 0, 1, 3}) {
for (auto yd : {std::vector<int64_t>({n}),
std::vector<int64_t>({c}),
std::vector<int64_t>({h}),
std::vector<int64_t>({w}),
std::vector<int64_t>({n, c}),
std::vector<int64_t>({c, h}),
std::vector<int64_t>({c, h, w}),
std::vector<int64_t>({n, c, h, w})}) {
#else
for (auto n : {1, 3, 4, 11}) {
for (auto c : {1, 3, 4, 11}) {
for (auto h : {1, 3, 4, 11}) {
for (auto w : {1, 3, 4, 11}) {
for (auto axis : {-1, 0, 1, 2, 3}) {
for (auto yd : {std::vector<int64_t>({n}),
std::vector<int64_t>({c}),
std::vector<int64_t>({h}),
std::vector<int64_t>({w}),
std::vector<int64_t>({n, c}),
std::vector<int64_t>({c, h}),
std::vector<int64_t>({h, w}),
std::vector<int64_t>({n, c, h}),
std::vector<int64_t>({c, h, w}),
std::vector<int64_t>({n, c, h, w})}) {
#endif
auto x_dim = DDim(std::vector<int64_t>({n, c, h, w}));
auto y_dim = DDim(yd);
int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis;
if (axis_t + y_dim.size() > 4) continue;
bool flag = false;
for (int i = 0; i < y_dim.size(); i++) {
if (x_dim[i + axis_t] != y_dim[i]) flag = true;
}
if (flag) continue;
x.Resize(x_dim);
y.Resize(y_dim);
output.Resize(x_dim);
output_ref.Resize(x_dim);
auto* x_data = x.mutable_data<int64_t>();
auto* y_data = y.mutable_data<int64_t>();
auto* output_data = output.mutable_data<int64_t>();
auto* output_ref_data = output_ref.mutable_data<int64_t>();
for (int i = 0; i < x_dim.production(); i++) {
x_data[i] = i + 1;
}
for (int i = 0; i < y_dim.production(); i++) {
y_data[i] = y_dim.production() - i;
}
param.X = &x;
param.Y = &y;
param.axis = axis;
param.Out = &output;
elementwise_mod.SetParam(param);
elementwise_mod.Run();
param.Out = &output_ref;
elementwise_imod_compute_ref<int64_t>(param, "");
for (int i = 0; i < output.dims().production(); i++) {
if (std::abs(output_data[i] - output_ref_data[i]) > 1e-5 ||
std::isnan(output_data[i]) ||
std::isnan(output_ref_data[i])) {
LOG(FATAL) << "elementwise mod cmp error, i: " << i
<< ", x_data: " << x_data[i]
<< ", y_data: " << y_data[i]
<< ", output_data: " << output_data[i]
<< ", output_ref_data: " << output_ref_data[i];
}
}
}
}
}
}
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
......@@ -719,3 +935,4 @@ USE_LITE_KERNEL(elementwise_mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_mul_activation, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_max, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(fusion_elementwise_max_activation, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(elementwise_mod, kARM, kInt64, kNCHW, def);
......@@ -144,6 +144,7 @@ REGISTER_LITE_OP(elementwise_add, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP(elementwise_mul, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP(elementwise_max, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP(elementwise_div, paddle::lite::operators::ElementwiseOp);
REGISTER_LITE_OP(elementwise_mod, paddle::lite::operators::ElementwiseOp);
// #ifdef LITE_WITH_TRAIN
// REGISTER_LITE_OP(elementwise_sub_grad,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册