From 25d2ad2d3d1868a9019652e7394e48c2084ce50d Mon Sep 17 00:00:00 2001 From: liuqi Date: Fri, 27 Apr 2018 15:46:03 +0800 Subject: [PATCH] Fix eltwise sub and div swapped bug. --- mace/kernels/eltwise.h | 55 ++++++++++++++++++++------- mace/kernels/opencl/cl/eltwise.cl | 12 +++++- mace/kernels/opencl/eltwise_opencl.cc | 7 +++- mace/ops/eltwise_test.cc | 31 +++++++++++++++ 4 files changed, 88 insertions(+), 17 deletions(-) diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 945963d6..703a515e 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -114,6 +114,7 @@ inline void TensorVector(const EltwiseType type, const index_t batch, const index_t channel, const index_t hw, + const bool swapped, float *output) { switch (type) { case SUM: @@ -129,13 +130,26 @@ inline void TensorVector(const EltwiseType type, } break; case SUB: + if (swapped) { #pragma omp parallel for collapse(3) - for (index_t b = 0; b < batch; ++b) { - for (index_t c = 0; c < channel; ++c) { - for (index_t i = 0; i < hw; ++i) { - const index_t idx0 = (b * channel + c) * hw + i; - const index_t idx1 = b * channel + c; - output[idx0] = input0[idx0] - input1[idx1]; + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input1[idx1] - input0[idx0]; + } + } + } + } else { +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input0[idx0] - input1[idx1]; + } } } } @@ -153,13 +167,26 @@ inline void TensorVector(const EltwiseType type, } break; case DIV: + if (swapped) { #pragma omp parallel for collapse(3) - for (index_t b = 0; b < batch; ++b) { - for (index_t c = 0; c < channel; ++c) { - for (index_t i = 0; i < hw; ++i) { - const index_t idx0 = (b * channel + c) * hw + i; - const index_t idx1 = b * channel + c; - output[idx0] = input0[idx0] / input1[idx1]; + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input1[idx1] / input0[idx0]; + } + } + } + } else { +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input0[idx0] / input1[idx1]; + } } } } @@ -283,12 +310,14 @@ struct EltwiseFunctor: EltwiseFunctorBase { const Tensor *input1, Tensor *output, StatsFuture *future) { + bool swapped = false; if (input1 != nullptr) { MACE_CHECK(input0->dim_size() == input1->dim_size()) << "Inputs of Eltwise op must be same shape"; if (input0->size() != input1->size()) { if (input0->size() < input1->size()) { std::swap(input0, input1); + swapped = true; } MACE_CHECK(input0->dim(0) == input1->dim(0) && input0->dim(1) == input1->dim(1) && @@ -316,7 +345,7 @@ struct EltwiseFunctor: EltwiseFunctorBase { const index_t channel = input0->dim(1); const index_t hw = input0->dim(2) * input0->dim(3); TensorVector(type_, input0_ptr, input1_ptr, - batch, channel, hw, output_ptr); + batch, channel, hw, swapped, output_ptr); } else { if (!coeff_.empty() && type_ == SUM) { #pragma omp parallel for diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/kernels/opencl/cl/eltwise.cl index 717cf868..3a0ea33c 100644 --- a/mace/kernels/opencl/cl/eltwise.cl +++ b/mace/kernels/opencl/cl/eltwise.cl @@ -45,11 +45,19 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS out = in0 + in1; #endif #elif ELTWISE_TYPE == 1 - out = in0 - in1; + #ifdef SWAPPED + out = in1 - in0; + #else + out = in0 - in1; + #endif #elif ELTWISE_TYPE == 2 out = in0 * in1; #elif ELTWISE_TYPE == 3 - out = in0 / in1; + #ifdef SWAPPED + out = in1 / in0; + #else + out = in0 / in1; + #endif #elif ELTWISE_TYPE == 4 out = fmin(in0, in1); #elif ELTWISE_TYPE == 5 diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index 56e371b6..3e3d2e47 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -25,12 +25,14 @@ void EltwiseFunctor::operator()(const Tensor *input0, const Tensor *input1, Tensor *output, StatsFuture *future) { + bool swapped = false; if (input1 != nullptr) { MACE_CHECK(input0->dim_size() == input1->dim_size()) << "Inputs of Eltwise op must be same shape"; if (input0->size() != input1->size()) { if (input0->size() < input1->size()) { std::swap(input0, input1); + swapped = true; } MACE_CHECK(input0->dim(0) == input1->dim(0) && input1->dim(1) == 1 && @@ -62,9 +64,10 @@ void EltwiseFunctor::operator()(const Tensor *input0, built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace(MakeString("-DELTWISE_TYPE=", type_)); if (input1 == nullptr) { - built_options.emplace(MakeString("-DINPUT_TYPE=1")); + built_options.emplace("-DINPUT_TYPE=1"); } else if (input0->size() != input1->size()) { - built_options.emplace(MakeString("-DINPUT_TYPE=2")); + built_options.emplace("-DINPUT_TYPE=2"); + if (swapped) built_options.emplace("-DSWAPPED"); } if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index edf457e8..7685d436 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -238,6 +238,12 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); + SimpleTensorEltwise(kernels::EltwiseType::SUB, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, + -5, -5, -5, -5, -5}); SimpleTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, @@ -247,6 +253,11 @@ TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); + SimpleTensorEltwise(kernels::EltwiseType::DIV, + {1, 1, 1, 5}, {1, 1, 1, 2, 4}, + {1, 2, 1, 5}, + {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); SimpleTensorEltwise(kernels::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {1, 2, 1, 5}, @@ -276,6 +287,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::SUB, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, -5, -5, -5, -5, -5}); SimpleTensorEltwise( kernels::EltwiseType::PROD, {1, 1, 1, 3}, {1, 2, 3}, @@ -286,6 +303,12 @@ TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 1, 1, 5}, {1, 1, 1, 1, 5}, {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); + SimpleTensorEltwise( + kernels::EltwiseType::DIV, + {1, 1, 1, 5}, {1, 1, 1, 2, 4}, + {1, 2, 1, 5}, + {1, 1, 1, 2, 2, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 2, 1, 1, 1, 2, 4}); SimpleTensorEltwise( kernels::EltwiseType::MIN, {1, 1, 1, 5}, {1, 2, 3, 4, 5}, @@ -530,6 +553,10 @@ TEST_F(EltwiseOpTest, RandomTensorVecFloat) { {1, 32, 32, 16}, {1, 1, 1, 16}); RandomTensorEltwise(kernels::EltwiseType::SUB, {5, 32, 32, 16}, {5, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, + {5, 1, 1, 16}, {5, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, + {1, 31, 37, 17}, {1, 1, 1, 17}); RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 17}, {1, 31, 37, 17}); RandomTensorEltwise(kernels::EltwiseType::DIV, @@ -547,8 +574,12 @@ TEST_F(EltwiseOpTest, RandomTensorVecHalf) { {1, 32, 32, 16}, {1, 1, 1, 16}); RandomTensorEltwise(kernels::EltwiseType::SUB, {3, 32, 32, 16}, {3, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, + {3, 1, 1, 16}, {3, 32, 32, 16}); RandomTensorEltwise(kernels::EltwiseType::PROD, {1, 1, 1, 17}, {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, + {5, 31, 37, 17}, {5, 1, 1, 17}); RandomTensorEltwise(kernels::EltwiseType::DIV, {5, 1, 1, 17}, {5, 31, 37, 17}); RandomTensorEltwise(kernels::EltwiseType::MIN, -- GitLab