From f078a2654f41c4ff50fbd67bd4622f97c11c85c6 Mon Sep 17 00:00:00 2001 From: liuqi Date: Thu, 26 Apr 2018 20:16:15 +0800 Subject: [PATCH] Refactor eltwise op. --- mace/kernels/eltwise.h | 468 +++++++------ mace/kernels/opencl/cl/eltwise.cl | 116 ++-- mace/kernels/opencl/eltwise_opencl.cc | 47 +- mace/ops/eltwise.h | 51 +- mace/ops/eltwise_benchmark.cc | 26 +- mace/ops/eltwise_test.cc | 825 ++++++++++++++--------- mace/python/tools/caffe_converter_lib.py | 10 +- mace/python/tools/tf_converter_lib.py | 42 +- 8 files changed, 903 insertions(+), 682 deletions(-) diff --git a/mace/kernels/eltwise.h b/mace/kernels/eltwise.h index 8ebb4364..945963d6 100644 --- a/mace/kernels/eltwise.h +++ b/mace/kernels/eltwise.h @@ -18,6 +18,7 @@ #include #include #include +#include #include "mace/core/future.h" #include "mace/core/tensor.h" @@ -30,216 +31,302 @@ namespace mace { namespace kernels { enum EltwiseType { - PROD = 0, - SUM = 1, - MAX = 2, - MIN = 3, - SUB = 4, - DIV = 5, + SUM = 0, + SUB = 1, + PROD = 2, + DIV = 3, + MIN = 4, + MAX = 5, NEG = 6, ABS = 7, SQR_DIFF = 8, + NONE = 9, }; +inline void TensorScalar(const EltwiseType type, + const float *input0, + const float value, + const index_t size, + float *output) { + switch (type) { + case SUM: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] + value; + } + break; + case SUB: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] - value; + } + break; + case PROD: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] * value; + } + break; + case DIV: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] / value; + } + break; + case MIN: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::min(input0[i], value); + } + break; + case MAX: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::max(input0[i], value); + } + break; + case NEG: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = -input0[i]; + } + break; + case ABS: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::abs(input0[i]); + } + break; + case SQR_DIFF: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::pow(input0[i] - value, 2.f); + } + break; + default: + LOG(FATAL) << "Eltwise op not support type " << type; + } +} + +inline void TensorVector(const EltwiseType type, + const float *input0, + const float *input1, + const index_t batch, + const index_t channel, + const index_t hw, + float *output) { + switch (type) { + case SUM: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input0[idx0] + input1[idx1]; + } + } + } + break; + case SUB: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input0[idx0] - input1[idx1]; + } + } + } + break; + case PROD: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input0[idx0] * input1[idx1]; + } + } + } + break; + case DIV: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = input0[idx0] / input1[idx1]; + } + } + } + break; + case MIN: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = std::min(input0[idx0], input1[idx1]); + } + } + } + break; + case MAX: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = std::max(input0[idx0], input1[idx1]); + } + } + } + break; + case SQR_DIFF: +#pragma omp parallel for collapse(3) + for (index_t b = 0; b < batch; ++b) { + for (index_t c = 0; c < channel; ++c) { + for (index_t i = 0; i < hw; ++i) { + const index_t idx0 = (b * channel + c) * hw + i; + const index_t idx1 = b * channel + c; + output[idx0] = std::pow(input0[idx0] - input1[idx1], 2.f); + } + } + } + break; + default: + LOG(FATAL) << "Eltwise op not support type " << type; + } +} +inline void TensorEltwise(const EltwiseType type, + const float *input0, + const float *input1, + const index_t size, + float *output) { + switch (type) { + case SUM: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] + input1[i]; + } + break; + case SUB: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] - input1[i]; + } + break; + case PROD: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] * input1[i]; + } + break; + case DIV: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = input0[i] / input1[i]; + } + break; + case MIN: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::min(input0[i], input1[i]); + } + break; + case MAX: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::max(input0[i], input1[i]); + } + break; + case SQR_DIFF: +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output[i] = std::pow(input0[i] - input1[i], 2.f); + } + break; + default: + LOG(FATAL) << "Eltwise op not support type " << type; + } +} + + struct EltwiseFunctorBase { EltwiseFunctorBase(const EltwiseType type, - const std::vector &coeff) - : type_(type), coeff_(coeff) {} + const std::vector &coeff, + const float value) + : type_(type), coeff_(coeff), value_(value) {} EltwiseType type_; std::vector coeff_; + float value_; }; template -struct EltwiseFunctor : EltwiseFunctorBase { +struct EltwiseFunctor; + +template <> +struct EltwiseFunctor: EltwiseFunctorBase { EltwiseFunctor(const EltwiseType type, - const std::vector &coeff) - : EltwiseFunctorBase(type, coeff) {} + const std::vector &coeff, + const float value) + : EltwiseFunctorBase(type, coeff, value) {} void operator()(const Tensor *input0, const Tensor *input1, - const index_t start_axis, - const bool is_scaler, - const float value, - const bool swap, Tensor *output, StatsFuture *future) { - if (is_scaler) { - Tensor::MappingGuard input0_guard(input0); - Tensor::MappingGuard output_guard(output); - - const T *input0_ptr = input0->data(); - T *output_ptr = output->mutable_data(); - const index_t num = input0->size(); - switch (type_) { - case PROD: -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = input0_ptr[i] * value; - } - break; - case SUM: - if (coeff_.empty()) { -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = input0_ptr[i] + value; - } - } else { - const float coeff_0 = swap ? coeff_[1] : coeff_[0]; - const float coeff_1 = swap ? coeff_[0] : coeff_[1]; -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = coeff_0 * input0_ptr[i] + - coeff_1 * value; - } - } - break; - case MAX: -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = std::max(input0_ptr[i], value); - } - break; - case MIN: -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = std::min(input0_ptr[i], value); - } - break; - case SUB: -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = swap ? value - input0_ptr[i] : - input0_ptr[i] - value; - } - break; - case DIV: - if (!swap) { - MACE_CHECK(fabs(value) > 1e-6, "cannot divided by 0."); -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - output_ptr[i] = input0_ptr[i] / value; - } - } else { -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - MACE_CHECK(fabs(input0_ptr[i]) > 1e-6, "cannot divided by 0."); - output_ptr[i] = value / input0_ptr[i]; - } - } - break; - case SQR_DIFF: -#pragma omp parallel for - for (index_t i = 0; i < num; ++i) { - const float tmp = input0_ptr[i] - value; - output_ptr[i] = tmp * tmp; - } - break; - default: - LOG(FATAL) << "Eltwise op not support type " << type_; + if (input1 != nullptr) { + MACE_CHECK(input0->dim_size() == input1->dim_size()) + << "Inputs of Eltwise op must be same shape"; + if (input0->size() != input1->size()) { + if (input0->size() < input1->size()) { + std::swap(input0, input1); + } + MACE_CHECK(input0->dim(0) == input1->dim(0) && + input0->dim(1) == input1->dim(1) && + input1->dim(2) == 1 && + input1->dim(3) == 1) + << "Element-Wise op only support channel dimension broadcast"; } + } + output->ResizeLike(input0); + + Tensor::MappingGuard input0_guard(input0); + Tensor::MappingGuard output_guard(output); + + const float *input0_ptr = input0->data(); + float *output_ptr = output->mutable_data(); + const index_t size = input0->size(); + if (input1 == nullptr) { + TensorScalar(type_, input0_ptr, value_, size, output_ptr); } else { - MACE_CHECK_NOTNULL(input0); - MACE_CHECK_NOTNULL(input1); - Tensor::MappingGuard input0_guard(input0); Tensor::MappingGuard input1_guard(input1); - Tensor::MappingGuard output_guard(output); - - const T *input0_ptr = input0->data(); - const T *input1_ptr = input1->data(); - T *output_ptr = output->mutable_data(); - const index_t size0 = input0->size(); - const index_t size1 = input1->size(); - const index_t num = size0 / size1; - switch (type_) { - case PROD: -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j= 0; j < size1; ++j) { - output_ptr[i * size1 + j] = - input0_ptr[i * size1 + j] * input1_ptr[j]; - } - } - break; - case SUM: - if (coeff_.empty()) { -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - output_ptr[i * size1 + j] = - input0_ptr[i * size1 + j] + input1_ptr[j]; - } - } - } else { - const float coeff_0 = swap ? coeff_[1] : coeff_[0]; - const float coeff_1 = swap ? coeff_[0] : coeff_[1]; -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - output_ptr[i * size1 + j] = - coeff_0 * input0_ptr[i * size1 + j] + - coeff_1 * input1_ptr[j]; - } - } - } - break; - case MAX: -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - output_ptr[i * size1 + j] = - std::max(input0_ptr[i * size1 + j], input1_ptr[j]); - } - } - break; - case MIN: -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - output_ptr[i * size1 + j] = - std::min(input0_ptr[i * size1 + j], input1_ptr[j]); - } - } - break; - case SUB: -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - output_ptr[i * size1 + j] = swap ? - input0_ptr[i * size1 + j] - input1_ptr[j] : - input1_ptr[j] - input0_ptr[i * size1 + j]; - } - } - break; - case DIV: -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - if (!swap) { - MACE_CHECK(fabs(input1_ptr[j]) > 1e-6, "cannot divided by 0."); - output_ptr[i * size1 + j] = - input0_ptr[i * size1 + j] / input1_ptr[j]; - } else { - MACE_CHECK(fabs(input0_ptr[i * size1 + j]) > 1e-6, - "cannot divided by 0."); - output_ptr[i * size1 + j] = - input1_ptr[j] / input0_ptr[i * size1 + j]; - } - } - } - break; - case SQR_DIFF: -#pragma omp parallel for collapse(2) - for (index_t i = 0; i < num; ++i) { - for (index_t j = 0; j < size1; ++j) { - const T tmp = input0_ptr[i * size1 + j] - input1_ptr[j]; - output_ptr[i * size1 + j] = tmp * tmp; - } + const float *input1_ptr = input1->data(); + if (input1->size() != input0->size()) { + const index_t batch = input0->dim(0); + const index_t channel = input0->dim(1); + const index_t hw = input0->dim(2) * input0->dim(3); + TensorVector(type_, input0_ptr, input1_ptr, + batch, channel, hw, output_ptr); + } else { + if (!coeff_.empty() && type_ == SUM) { +#pragma omp parallel for + for (index_t i = 0; i < size; ++i) { + output_ptr[i] = coeff_[0] * input0_ptr[i] + + coeff_[1] * input1_ptr[i]; } - break; - default: - LOG(FATAL) << "Eltwise op not support type " << type_; + } else { + TensorEltwise(type_, input0_ptr, input1_ptr, size, output_ptr); + } } } } @@ -249,15 +336,12 @@ struct EltwiseFunctor : EltwiseFunctorBase { template struct EltwiseFunctor : EltwiseFunctorBase { EltwiseFunctor(const EltwiseType type, - const std::vector &coeff) - : EltwiseFunctorBase(type, coeff) {} + const std::vector &coeff, + const float value) + : EltwiseFunctorBase(type, coeff, value) {} void operator()(const Tensor *input0, const Tensor *input1, - const index_t start_axis, - const bool is_scaler, - const float value, - const bool swap, Tensor *output, StatsFuture *future); diff --git a/mace/kernels/opencl/cl/eltwise.cl b/mace/kernels/opencl/cl/eltwise.cl index b2ebebec..717cf868 100644 --- a/mace/kernels/opencl/cl/eltwise.cl +++ b/mace/kernels/opencl/cl/eltwise.cl @@ -3,8 +3,11 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS GLOBAL_WORK_GROUP_SIZE_DIM3 __read_only image2d_t input0, - __read_only image2d_t input1, +#if INPUT_TYPE == 1 __private const float value, +#else + __read_only image2d_t input1, +#endif __private const int height, __private const int width, __private const int channel, @@ -13,101 +16,68 @@ __kernel void eltwise(KERNEL_ERROR_PARAMS __private const float coeff1, #endif __write_only image2d_t output) { - const int c = get_global_id(0); - const int w = get_global_id(1); + const int chan_idx = get_global_id(0); + const int width_idx = get_global_id(1); const int hb = get_global_id(2); #ifndef NON_UNIFORM_WORK_GROUP - if (c >= global_size_dim0 || w >= global_size_dim1 || hb >= global_size_dim2) + if (chan_idx >= global_size_dim0 || + width_idx >= global_size_dim1 || hb >= global_size_dim2) return; #endif - int pos_w; - int pos_h; -#if START_AXIS == 0 - pos_w = mad24(c, width, w); - pos_h = hb; -#elif START_AXIS == 1 - pos_w = mad24(c, width, w); - pos_h = hb % height; -#elif START_AXIS == 2 - pos_w = mad24(c, width, w); - pos_h = 0; -#elif START_AXIS == 3 - pos_w = c; - pos_h = 0; -#endif - const int pos = mad24(c, width, w); - const int remain_channel = channel - 4 * c; + const int pos = mad24(chan_idx, width, width_idx); DATA_TYPE4 in0 = READ_IMAGET(input0, SAMPLER, (int2)(pos, hb)); - DATA_TYPE4 in1 ; -#if IS_SCALER == 1 - in1 = (DATA_TYPE4){value, value, value, value}; +#if INPUT_TYPE == 1 + DATA_TYPE4 in1 = (DATA_TYPE4)(value, value, value, value); +#elif INPUT_TYPE == 2 + const int batch_idx = hb / height; + DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(chan_idx, batch_idx)); #else - in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos_w, pos_h)); + DATA_TYPE4 in1 = READ_IMAGET(input1, SAMPLER, (int2)(pos, hb)); #endif + DATA_TYPE4 out; #if ELTWISE_TYPE == 0 - out = in0 * in1; -#elif ELTWISE_TYPE == 1 - -#ifdef COEFF_SUM - #if NEEDSWAP == 0 - out = mad(coeff0, in0, mad(coeff1, in1, 0)); - #else + #ifdef COEFF_SUM out = mad(coeff1, in0, mad(coeff0, in1, 0)); + #else + out = in0 + in1; #endif -#else - out = in0 + in1; -#endif - +#elif ELTWISE_TYPE == 1 + out = in0 - in1; #elif ELTWISE_TYPE == 2 - out = fmax(in0, in1); + out = in0 * in1; #elif ELTWISE_TYPE == 3 - out = fmin(in0, in1); + out = in0 / in1; #elif ELTWISE_TYPE == 4 - #if NEED_SWAP == 0 - out = in0 - in1; - #else - out = in1 - in0; - #endif + out = fmin(in0, in1); #elif ELTWISE_TYPE == 5 - #if NEED_SWAP == 0 - if (fabs(in1.x) > 0.000001f) - out.x = in0.x / in1.x; - if (fabs(in1.y) > 0.000001f) - out.y = in0.y / in1.y; - if (fabs(in1.z) > 0.000001f) - out.z = in0.z / in1.z; - if (fabs(in1.w) > 0.000001f) - out.w = in0.w / in1.w; - #else - if (fabs(in1.x) > 0.000001f) - out.x = in1.x / in0.x; - if (fabs(in1.y) > 0.000001f) - out.y = in1.y / in0.y; - if (fabs(in1.z) > 0.000001f) - out.z = in1.z / in0.z; - if (fabs(in1.w) > 0.000001f) - out.w = in1.w / in0.w; - #endif + out = fmax(in0, in1); +#elif ELTWISE_TYPE == 6 + in1 = (DATA_TYPE4)(0, 0, 0, 0); + out = in1 - in0; +#elif ELTWISE_TYPE == 7 + out = fabs(in0); #elif ELTWISE_TYPE == 8 DATA_TYPE4 diff = in0 - in1; out = diff * diff; #endif -#if ELTWISE_TYPE == 1 || ELTWISE_TYPE == 2 || ELTWISE_TYPE == 3 \ - || ELTWISE_TYPE == 4 || ELTWISE_TYPE == 8 - if (remain_channel < 4) { - switch (remain_channel) { - case 1: - out.y = 0; - case 2: - out.z = 0; - case 3: - out.w = 0; +#if INPUT_TYPE == 1 + #if ELTWISE_TYPE == 0 || ELTWISE_TYPE == 1 || ELTWISE_TYPE == 4 || ELTWISE_TYPE == 5 || ELTWISE_TYPE == 8 + const int remain_channel = channel - 4 * chan_idx; + if (remain_channel < 4) { + switch (remain_channel) { + case 1: + out.y = 0; + case 2: + out.z = 0; + case 3: + out.w = 0; + } } - } + #endif #endif WRITE_IMAGET(output, (int2)(pos, hb), out); diff --git a/mace/kernels/opencl/eltwise_opencl.cc b/mace/kernels/opencl/eltwise_opencl.cc index 0ec4a1e5..56e371b6 100644 --- a/mace/kernels/opencl/eltwise_opencl.cc +++ b/mace/kernels/opencl/eltwise_opencl.cc @@ -23,16 +23,27 @@ namespace kernels { template void EltwiseFunctor::operator()(const Tensor *input0, const Tensor *input1, - const index_t start_axis, - const bool is_scaler, - const float value, - const bool swap, Tensor *output, StatsFuture *future) { - const index_t batch = input0->dim(0); - const index_t height = input0->dim(1); - const index_t width = input0->dim(2); - const index_t channels = input0->dim(3); + if (input1 != nullptr) { + MACE_CHECK(input0->dim_size() == input1->dim_size()) + << "Inputs of Eltwise op must be same shape"; + if (input0->size() != input1->size()) { + if (input0->size() < input1->size()) { + std::swap(input0, input1); + } + MACE_CHECK(input0->dim(0) == input1->dim(0) && + input1->dim(1) == 1 && + input1->dim(2) == 1 && + input0->dim(3) == input1->dim(3)) + << "Element-Wise op only support channel dimension broadcast"; + } + } + output->ResizeLike(input0); + const index_t batch = output->dim(0); + const index_t height = output->dim(1); + const index_t width = output->dim(2); + const index_t channels = output->dim(3); const index_t channel_blocks = RoundUpDiv4(channels); const index_t batch_height_pixels = batch * height; @@ -41,8 +52,6 @@ void EltwiseFunctor::operator()(const Tensor *input0, static_cast(width), static_cast(batch_height_pixels)}; - const int scaler = is_scaler ? 1 : 0; - const int need_swap = swap ? 1 : 0; auto runtime = OpenCLRuntime::Global(); if (kernel_.get() == nullptr) { std::set built_options; @@ -52,9 +61,13 @@ void EltwiseFunctor::operator()(const Tensor *input0, built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(dt)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(dt)); built_options.emplace(MakeString("-DELTWISE_TYPE=", type_)); - built_options.emplace(MakeString("-DSTART_AXIS=", start_axis)); - built_options.emplace(MakeString("-DIS_SCALER=", scaler)); - built_options.emplace(MakeString("-DNEEDSWAP=", need_swap)); + if (input1 == nullptr) { + built_options.emplace(MakeString("-DINPUT_TYPE=1")); + } else if (input0->size() != input1->size()) { + built_options.emplace(MakeString("-DINPUT_TYPE=2")); + } + if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); + if (runtime->IsOutOfRangeCheckEnabled()) { built_options.emplace("-DOUT_OF_RANGE_CHECK"); kernel_error_ = std::move(std::unique_ptr( @@ -66,7 +79,6 @@ void EltwiseFunctor::operator()(const Tensor *input0, if (runtime->IsNonUniformWorkgroupsSupported()) { built_options.emplace("-DNON_UNIFORM_WORK_GROUP"); } - if (!coeff_.empty()) built_options.emplace("-DCOEFF_SUM"); kernel_ = runtime->BuildKernel("eltwise", kernel_name, built_options); kwg_size_ = @@ -84,8 +96,11 @@ void EltwiseFunctor::operator()(const Tensor *input0, kernel_.setArg(idx++, gws[2]); } kernel_.setArg(idx++, *(input0->opencl_image())); - kernel_.setArg(idx++, *(input1->opencl_image())); - kernel_.setArg(idx++, value); + if (input1 == nullptr) { + kernel_.setArg(idx++, value_); + } else { + kernel_.setArg(idx++, *(input1->opencl_image())); + } kernel_.setArg(idx++, static_cast(height)); kernel_.setArg(idx++, static_cast(width)); kernel_.setArg(idx++, static_cast(channels)); diff --git a/mace/ops/eltwise.h b/mace/ops/eltwise.h index 2972a83a..57f73e26 100644 --- a/mace/ops/eltwise.h +++ b/mace/ops/eltwise.h @@ -28,57 +28,20 @@ class EltwiseOp : public Operator { : Operator(op_def, ws), functor_(static_cast( OperatorBase::GetSingleArgument( - "type", static_cast(kernels::EltwiseType::SUM))), - OperatorBase::GetRepeatedArgument("coeff")) {} + "type", static_cast(kernels::EltwiseType::NONE))), + OperatorBase::GetRepeatedArgument("coeff"), + OperatorBase::GetSingleArgument("x", 1.0)) {} bool Run(StatsFuture *future) override { - if (this->InputSize() == 1) { - const Tensor* input = this->Input(0); - Tensor *output = this->Output(OUTPUT); - start_axis_ = input->dim_size() - 1; - is_scaler_ = true; - output->ResizeLike(input); - const float x = OperatorBase::GetSingleArgument("x", 1.0); - functor_(input, nullptr, start_axis_, - is_scaler_, x, false, output, future); - } else { - const index_t size0 = this->Input(0)->size(); - const index_t size1 = this->Input(1)->size(); - const bool swap = (size0 < size1); - const Tensor *input0 = swap ? this->Input(1) : this->Input(0); - const Tensor *input1 = swap ? this->Input(0) : this->Input(1); - - Tensor *output = this->Output(OUTPUT); - MACE_CHECK(input0->dim_size() == input1->dim_size()) - << "Inputs of Eltwise op must be same shape"; - start_axis_ = input0->dim_size() - 1; - is_scaler_ = (input1->size() == 1); - uint32_t compared_size = 1; - if (!is_scaler_) { - while (start_axis_ >= 0) { - MACE_CHECK(input0->dim(start_axis_) == input1->dim(start_axis_), - "Invalid inputs dimension at axis: ") << start_axis_ - << "input 0: " << input0->dim(start_axis_) - << "input 1: " << input1->dim(start_axis_); - compared_size *= input1->dim(start_axis_); - if (compared_size == input1->size()) { - break; - } - start_axis_--; - } - } - output->ResizeLike(input0); - const float x = OperatorBase::GetSingleArgument("x", 1.0); - functor_(input0, input1, start_axis_, - is_scaler_, x, swap, output, future); - } + const Tensor* input0 = this->Input(0); + const Tensor* input1 = this->InputSize() == 2 ? this->Input(1) : nullptr; + Tensor *output = this->Output(OUTPUT); + functor_(input0, input1, output, future); return true; } private: kernels::EltwiseFunctor functor_; - index_t start_axis_; - bool is_scaler_; private: OP_OUTPUT_TAGS(OUTPUT); diff --git a/mace/ops/eltwise_benchmark.cc b/mace/ops/eltwise_benchmark.cc index b9577919..8c3843a6 100644 --- a/mace/ops/eltwise_benchmark.cc +++ b/mace/ops/eltwise_benchmark.cc @@ -35,10 +35,10 @@ void EltwiseBenchmark( net.AddRandomInput("Input1", {n, h, w, c}); if (D == DeviceType::OPENCL) { - BufferToImage(&net, "Input0", "InputImg0", - kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(&net, "Input0", "InputImg0", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(&net, "Input1", "InputImg1", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") .Input("InputImg0") .Input("InputImg1") @@ -48,9 +48,13 @@ void EltwiseBenchmark( .Output("OutputImg") .Finalize(net.NewOperatorDef()); } else { + net.TransformDataFormat("Input0", NHWC, + "TInput0", NCHW); + net.TransformDataFormat("Input1", NHWC, + "TInput1", NCHW); OpDefBuilder("Eltwise", "EltwiseTest") - .Input("Input0") - .Input("Input1") + .Input("TInput0") + .Input("TInput1") .AddIntArg("type", static_cast(type)) .AddFloatsArg("coeff", {1.2, 2.1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) @@ -89,13 +93,13 @@ void EltwiseBenchmark( BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, float, OPENCL); \ BM_ELTWISE_MACRO(ELT_TYPE, N, H, W, C, half, OPENCL); -BM_ELTWISE(0, 1, 256, 256, 32); -BM_ELTWISE(0, 1, 128, 128, 32); -BM_ELTWISE(1, 1, 128, 128, 32); BM_ELTWISE(2, 1, 128, 128, 32); -BM_ELTWISE(0, 1, 240, 240, 256); -BM_ELTWISE(1, 1, 240, 240, 256); BM_ELTWISE(2, 1, 240, 240, 256); +BM_ELTWISE(2, 1, 256, 256, 32); +BM_ELTWISE(0, 1, 128, 128, 32); +BM_ELTWISE(0, 1, 240, 240, 256); +BM_ELTWISE(5, 1, 128, 128, 32); +BM_ELTWISE(5, 1, 240, 240, 256); } // namespace test } // namespace ops diff --git a/mace/ops/eltwise_test.cc b/mace/ops/eltwise_test.cc index 6dd3b33d..edf457e8 100644 --- a/mace/ops/eltwise_test.cc +++ b/mace/ops/eltwise_test.cc @@ -23,44 +23,98 @@ namespace test { class EltwiseOpTest : public OpsTestBase {}; namespace { -template -void Simple(const kernels::EltwiseType type, - const std::vector &shape0, - const std::vector &shape1, - const std::vector &input0, - const std::vector &input1, - const std::vector &output, - const float x = 1.f, - const std::vector coeff = {}) { +template +void SimpleTensorScalar(const kernels::EltwiseType type, + const std::vector &shape, + const std::vector &input, + const float x, + const std::vector &output) { // Construct graph OpsTestNet net; // Add input data - net.AddInputFromArray("Input1", shape0, input0); - net.AddInputFromArray("Input2", shape1, input1); + net.AddInputFromArray("Input", shape, input); if (D == DeviceType::CPU) { + net.TransformDataFormat("Input", + NHWC, + "TInput", + NCHW); + OpDefBuilder("Eltwise", "EltwiseTest") + .Input("TInput") + .AddIntArg("type", static_cast(type)) + .AddFloatArg("x", x) + .Output("TOutput") + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(D); + net.TransformDataFormat("TOutput", + NCHW, + "Output", + NHWC); + } else { + BufferToImage(&net, "Input", "InputImg", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") - .Input("Input1") - .Input("Input2") + .Input("InputImg") .AddIntArg("type", static_cast(type)) .AddFloatArg("x", x) + .Output("OutputImg") + .Finalize(net.NewOperatorDef()); + + // Run + net.RunOp(D); + + ImageToBuffer(&net, "OutputImg", "Output", + kernels::BufferType::IN_OUT_CHANNEL); + } + + auto expected = CreateTensor(shape, output); + + ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); +} + +template +void SimpleTensorEltwise(const kernels::EltwiseType type, + const std::vector &shape0, + const std::vector &input0, + const std::vector &shape1, + const std::vector &input1, + const std::vector &output, + const std::vector &coeff = {}) { + // Construct graph + OpsTestNet net; + + // Add input data + net.AddInputFromArray("Input0", shape0, input0); + net.AddInputFromArray("Input1", shape1, input1); + + if (D == DeviceType::CPU) { + net.TransformDataFormat("Input0", NHWC, + "TInput0", NCHW); + net.TransformDataFormat("Input1", NHWC, + "TInput1", NCHW); + OpDefBuilder("Eltwise", "EltwiseTest") + .Input("TInput0") + .Input("TInput1") + .AddIntArg("type", static_cast(type)) .AddFloatsArg("coeff", coeff) - .Output("Output") + .Output("TOutput") .Finalize(net.NewOperatorDef()); // Run net.RunOp(D); + net.TransformDataFormat("TOutput", NCHW, + "Output", NHWC); } else { - BufferToImage(&net, "Input1", "InputImg1", + BufferToImage(&net, "Input0", "InputImg0", kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(&net, "Input2", "InputImg2", + BufferToImage(&net, "Input1", "InputImg1", kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") + .Input("InputImg0") .Input("InputImg1") - .Input("InputImg2") .AddIntArg("type", static_cast(type)) - .AddFloatArg("x", x) .AddFloatsArg("coeff", coeff) .Output("OutputImg") .Finalize(net.NewOperatorDef()); @@ -72,357 +126,474 @@ void Simple(const kernels::EltwiseType type, kernels::BufferType::IN_OUT_CHANNEL); } - auto expected = CreateTensor(shape0, output); + std::vector output_shape = shape0; + if (input0.size() < input1.size()) { + output_shape = shape1; + } + auto expected = CreateTensor(output_shape, output); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace -TEST_F(EltwiseOpTest, CPUSimple) { - Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, - {1, 4, 9, 16, 25, 36}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, - {2, 4, 6, 8, 10, 12}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, - {3, 6, 9, 12, 15, 18}, 1., {2, 1}); - Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, - {1, 2, 3, 4, 6, 6}); - Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, - {1, 1, 3, 3, 5, 6}); - Simple(kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, - {0, 1, 0, 1, 1, 0}); - Simple(kernels::EltwiseType::DIV, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 2, 10, 24}, - {1, 2, 1, 2, 0.5, 0.25}); - - Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3}, - {1, 4, 9, 4, 10, 18}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3}, - {2, 4, 6, 5, 7, 9}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3}, - {3, 6, 9, 9, 12, 15}, 1., {2, 1}); - Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {1, 2, 3, 4, 5, 6}); - Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {1, 1, 3, 1, 1, 3}); - Simple(kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {0, 1, 0, 9, 16, 9}); - Simple(kernels::EltwiseType::DIV, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {1, 2, 1, 4, 5, 2}); - - Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {2}, - {2, 4, 6, 8, 10, 12}, 2); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {2}, - {3, 4, 5, 6, 7, 8}, 2); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {2}, - {4, 6, 8, 10, 12, 14}, 2, {2, 1}); - Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {3}, - {3, 3, 3, 4, 5, 6}, 3); - Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {3}, - {1, 2, 3, 3, 3, 3}, 3); - Simple(kernels::EltwiseType::DIV, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {0.5}, - {2, 4, 6, 8, 10, 12}, 0.5); - Simple(kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {3}, - {4, 1, 0, 1, 4, 9}, 3); +TEST_F(EltwiseOpTest, CPUSimpleTensorScalar) { + SimpleTensorScalar(kernels::EltwiseType::SUM, + {1, 1, 1, 1}, {1}, 1, + {2}); + SimpleTensorScalar(kernels::EltwiseType::SUB, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 1, + {0, 1, 2, 3, 4, 5}); + SimpleTensorScalar(kernels::EltwiseType::PROD, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 2, + {2, 4, 6, 8, 10, 12}); + SimpleTensorScalar(kernels::EltwiseType::DIV, + {1, 1, 2, 3}, + {2, 4, 6, 8, 10, 12}, + 2, + {1, 2, 3, 4, 5, 6}); + SimpleTensorScalar(kernels::EltwiseType::MIN, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 1, + {1, 1, 1, 1, 1, 1}); + SimpleTensorScalar(kernels::EltwiseType::MAX, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 3, + {3, 3, 3, 4, 5, 6}); + SimpleTensorScalar(kernels::EltwiseType::NEG, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 3, + {-1, -2, -3, -4, -5, -6}); + SimpleTensorScalar(kernels::EltwiseType::ABS, + {1, 1, 2, 3}, + {-1, -2, -3, -4, -5, -6}, + 3, + {1, 2, 3, 4, 5, 6}); + SimpleTensorScalar(kernels::EltwiseType::SQR_DIFF, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 1, + {0, 1, 4, 9, 16, 25}); } -TEST_F(EltwiseOpTest, GPUSimple) { - Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, - {1, 4, 9, 16, 25, 36}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, - {2, 4, 6, 8, 10, 12}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, - {3, 6, 9, 12, 15, 18}, 1., {2, 1}); - Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, - {1, 2, 3, 4, 6, 6}); - Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, - {1, 1, 3, 3, 5, 6}); - Simple(kernels::EltwiseType::DIV, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 2, 10, 24}, - {1, 2, 1, 2, 0.5, 0.25}); - Simple(kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, - {1, 1, 2, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3, 3, 6, 6}, - {0, 1, 0, 1, 1, 0}); - - Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3}, - {1, 4, 9, 4, 10, 18}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3}, - {2, 4, 6, 5, 7, 9}); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 2, 3}, - {3, 6, 9, 9, 12, 15}, 1., {2, 1}); - Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {1, 2, 3, 4, 5, 6}); - Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {1, 1, 3, 1, 1, 3}); - Simple(kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {0, 1, 0, 9, 16, 9}); - Simple(kernels::EltwiseType::DIV, {1, 1, 2, 3}, - {1, 1, 1, 3}, - {1, 2, 3, 4, 5, 6}, {1, 1, 3}, - {1, 2, 1, 4, 5, 2}); - - Simple(kernels::EltwiseType::PROD, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {2}, - {2, 4, 6, 8, 10, 12}, 2); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {2}, - {3, 4, 5, 6, 7, 8}, 2); - Simple(kernels::EltwiseType::SUM, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {2}, - {4, 6, 8, 10, 12, 14}, 2, {2, 1}); - Simple(kernels::EltwiseType::MAX, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {3}, - {3, 3, 3, 4, 5, 6}, 3); - Simple(kernels::EltwiseType::MIN, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {3}, - {1, 2, 3, 3, 3, 3}, 3); - Simple(kernels::EltwiseType::SQR_DIFF, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {3}, - {4, 1, 0, 1, 4, 9}, 3); - Simple(kernels::EltwiseType::DIV, {1, 1, 2, 3}, - {1, 1, 1, 1}, - {1, 2, 3, 4, 5, 6}, {0.5}, - {2, 4, 6, 8, 10, 12}, 0.5); +TEST_F(EltwiseOpTest, GPUSimpleTensorScalar) { + SimpleTensorScalar(kernels::EltwiseType::SUM, + {1, 1, 1, 1}, {1}, 1, + {2}); + SimpleTensorScalar(kernels::EltwiseType::SUB, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 1, + {0, 1, 2, 3, 4, 5}); + SimpleTensorScalar(kernels::EltwiseType::PROD, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 2, + {2, 4, 6, 8, 10, 12}); + SimpleTensorScalar(kernels::EltwiseType::DIV, + {1, 1, 2, 3}, + {2, 4, 6, 8, 10, 12}, + 2, + {1, 2, 3, 4, 5, 6}); + SimpleTensorScalar(kernels::EltwiseType::MIN, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 1, + {1, 1, 1, 1, 1, 1}); + SimpleTensorScalar(kernels::EltwiseType::MAX, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 3, + {3, 3, 3, 4, 5, 6}); + SimpleTensorScalar(kernels::EltwiseType::NEG, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 3, + {-1, -2, -3, -4, -5, -6}); + SimpleTensorScalar(kernels::EltwiseType::ABS, + {1, 1, 2, 3}, + {-1, -2, -3, -4, -5, -6}, + 3, + {1, 2, 3, 4, 5, 6}); + SimpleTensorScalar(kernels::EltwiseType::SQR_DIFF, + {1, 1, 2, 3}, + {1, 2, 3, 4, 5, 6}, + 1, + {0, 1, 4, 9, 16, 25}); } -namespace { -template -void RandomTest(const kernels::EltwiseType type, - const std::vector &shape1, - const std::vector &shape2) { - testing::internal::LogToStderr(); - srand(time(NULL)); +TEST_F(EltwiseOpTest, CPUSimpleTensorVector) { + SimpleTensorEltwise(kernels::EltwiseType::SUM, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 1, 3}, {1, 2, 3}, + {2, 4, 6, 5, 7, 9}); + SimpleTensorEltwise(kernels::EltwiseType::SUB, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); + SimpleTensorEltwise(kernels::EltwiseType::PROD, + {1, 1, 1, 3}, {1, 2, 3}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 4, 9, 4, 10, 18}); + SimpleTensorEltwise(kernels::EltwiseType::DIV, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 1, 1, 1, 5}, + {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); + SimpleTensorEltwise(kernels::EltwiseType::MIN, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + SimpleTensorEltwise(kernels::EltwiseType::MAX, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + SimpleTensorEltwise(kernels::EltwiseType::SQR_DIFF, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 25, 25, 25, 25, + 25}); +} +TEST_F(EltwiseOpTest, GPUSimpleTensorVector) { + SimpleTensorEltwise( + kernels::EltwiseType::SUM, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 1, 3}, {1, 2, 3}, + {2, 4, 6, 5, 7, 9}); + SimpleTensorEltwise( + kernels::EltwiseType::SUB, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0, 5, 5, 5, 5, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::PROD, + {1, 1, 1, 3}, {1, 2, 3}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 4, 9, 4, 10, 18}); + SimpleTensorEltwise( + kernels::EltwiseType::DIV, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 1, 1, 1, 5}, + {1, 2, 3, 4, 1, 6, 7, 8, 9, 2}); + SimpleTensorEltwise( + kernels::EltwiseType::MIN, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::MAX, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + SimpleTensorEltwise( + kernels::EltwiseType::SQR_DIFF, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); +} + +TEST_F(EltwiseOpTest, CPUSimpleTensorTensor) { + SimpleTensorEltwise(kernels::EltwiseType::SUM, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {2, 4, 6, 8, 10, 12}); + SimpleTensorEltwise(kernels::EltwiseType::SUM, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {0.2, 0.4, 0.6, 0.8, 1, 1.2}, + {0.1, 0.1}); + SimpleTensorEltwise(kernels::EltwiseType::SUB, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0}); + SimpleTensorEltwise(kernels::EltwiseType::PROD, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 4, 9, 16, 25, 36}); + SimpleTensorEltwise(kernels::EltwiseType::DIV, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 1, 1, 1, 1}); + SimpleTensorEltwise(kernels::EltwiseType::MIN, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + SimpleTensorEltwise(kernels::EltwiseType::MAX, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + SimpleTensorEltwise(kernels::EltwiseType::SQR_DIFF, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 1, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 25, 25, 25, 25, + 25}); +} +TEST_F(EltwiseOpTest, GPUSimpleTensorTensor) { + SimpleTensorEltwise( + kernels::EltwiseType::SUM, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {2, 4, 6, 8, 10, 12}); + SimpleTensorEltwise( + kernels::EltwiseType::SUM, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 2, 3}, {1, 2, 3, 4, 5, 6}, + {0.2, 0.4, 0.6, 0.8, 1, 1.2}, + {0.1, 0.1}); + SimpleTensorEltwise( + kernels::EltwiseType::SUB, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {1, 1, 1, 5}, {1, 2, 3, 4, 5}, + {0, 0, 0, 0, 0}); + SimpleTensorEltwise( + kernels::EltwiseType::PROD, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 4, 9, 16, 25, 36}); + SimpleTensorEltwise( + kernels::EltwiseType::DIV, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 2, 1, 3}, {1, 2, 3, 4, 5, 6}, + {1, 1, 1, 1, 1, 1}); + SimpleTensorEltwise( + kernels::EltwiseType::MIN, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}); + SimpleTensorEltwise( + kernels::EltwiseType::MAX, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}); + SimpleTensorEltwise( + kernels::EltwiseType::SQR_DIFF, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 1, 2, 3, 4, 5}, + {1, 2, 1, 5}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, + {0, 0, 0, 0, 0, 25, 25, 25, 25, 25}); +} + +namespace { +template +void RandomTensorScalar(const kernels::EltwiseType type, + const std::vector &shape) { // Construct graph OpsTestNet net; - bool is_divide = (type == kernels::EltwiseType::DIV); - // Add input data - net.AddRandomInput("Input1", shape1, true, is_divide); - net.AddRandomInput("Input2", shape2, true, is_divide); + net.AddRandomInput("Input", shape, true, true); + net.TransformDataFormat("Input", + NHWC, + "TInput", + NCHW); + OpDefBuilder("Eltwise", "EltwiseTest") + .Input("TInput") + .AddIntArg("type", static_cast(type)) + .AddFloatArg("x", 0.1) + .Output("TOutput") + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(DeviceType::CPU); + net.TransformDataFormat("TOutput", + NCHW, + "Output", + NHWC); + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + BufferToImage(&net, "Input", "InputImg", + kernels::BufferType::IN_OUT_CHANNEL); + OpDefBuilder("Eltwise", "EltwiseTest") + .Input("InputImg") + .AddIntArg("type", static_cast(type)) + .AddFloatArg("x", 0.1) + .Output("OutputImg") + .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); + // Run + net.RunOp(DeviceType::OPENCL); + + ImageToBuffer(&net, "OutputImg", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); + if (DataTypeToEnum::value == DT_FLOAT) { + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); + } else { + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-2); + } +} + +template +void RandomTensorEltwise(const kernels::EltwiseType type, + const std::vector &shape0, + const std::vector &shape1, + const std::vector &coeff = {}) { + // Construct graph + OpsTestNet net; + + // Add input data + net.AddRandomInput("Input0", shape0, true, true); + net.AddRandomInput("Input1", shape1, true, true); + + net.TransformDataFormat("Input0", NHWC, + "TInput0", NCHW); + net.TransformDataFormat("Input1", NHWC, + "TInput1", NCHW); OpDefBuilder("Eltwise", "EltwiseTest") - .Input("Input1") - .Input("Input2") + .Input("TInput0") + .Input("TInput1") .AddIntArg("type", static_cast(type)) - .AddFloatsArg("coeff", {1.2, 2.1}) - .Output("Output") + .AddFloatsArg("coeff", coeff) + .Output("TOutput") .Finalize(net.NewOperatorDef()); // Run - net.RunOp(); - - BufferToImage(&net, "Input1", "InputImg1", - kernels::BufferType::IN_OUT_CHANNEL); - BufferToImage(&net, "Input2", "InputImg2", - kernels::BufferType::IN_OUT_CHANNEL); + net.RunOp(DeviceType::CPU); + net.TransformDataFormat("TOutput", NCHW, + "Output", NHWC); + Tensor expected; + expected.Copy(*net.GetOutput("Output")); + + BufferToImage(&net, "Input0", "InputImg0", + kernels::BufferType::IN_OUT_CHANNEL); + BufferToImage(&net, "Input1", "InputImg1", + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Eltwise", "EltwiseTest") + .Input("InputImg0") .Input("InputImg1") - .Input("InputImg2") .AddIntArg("type", static_cast(type)) - .AddFloatsArg("coeff", {1.2, 2.1}) + .AddFloatsArg("coeff", coeff) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Output("OutputImg") .Finalize(net.NewOperatorDef()); // Run - net.RunOp(D); + net.RunOp(DeviceType::OPENCL); - ImageToBuffer(&net, "OutputImg", "OPENCLOutput", - kernels::BufferType::IN_OUT_CHANNEL); + ImageToBuffer(&net, "OutputImg", "OPENCLOutput", + kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DT_FLOAT) { - ExpectTensorNear(*net.GetTensor("Output"), - *net.GetOutput("OPENCLOutput"), 1e-5, 1e-4); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-5); } else { - ExpectTensorNear(*net.GetTensor("Output"), - *net.GetOutput("OPENCLOutput"), 1e-2, 1e-2); + ExpectTensorNear(expected, *net.GetOutput("OPENCLOutput"), 1e-2, + 1e-2); } } } // namespace -TEST_F(EltwiseOpTest, OPENCLRandomFloat) { - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {3, 23, 37, 19}); - RandomTest(kernels::EltwiseType::SUM, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::MAX, - {3, 32, 32, 64}, - {3, 32, 32, 64}); - RandomTest(kernels::EltwiseType::MIN, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::DIV, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::SQR_DIFF, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {1, 1, 37, 19}); - RandomTest(kernels::EltwiseType::SUM, - {13, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::MAX, - {3, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::MIN, - {13, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::DIV, - {13, 32, 32, 63}, - {1, 1, 32, 63}); - RandomTest(kernels::EltwiseType::SQR_DIFF, - {13, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {1, 1, 1, 19}); - RandomTest(kernels::EltwiseType::SUM, - {13, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::MAX, - {3, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::MIN, - {13, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::DIV, - {13, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::SQR_DIFF, - {13, 32, 32, 64}, - {1, 1, 1, 64}); +TEST_F(EltwiseOpTest, RandomTensorScalarFloat) { + RandomTensorScalar(kernels::EltwiseType::SUM, {1, 32, 32, 16}); + RandomTensorScalar(kernels::EltwiseType::SUB, {3, 32, 32, 16}); + RandomTensorScalar(kernels::EltwiseType::PROD, {1, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::DIV, {3, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::MIN, {1, 32, 32, 16}); + RandomTensorScalar(kernels::EltwiseType::MAX, {3, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::NEG, {1, 32, 32, 32}); + RandomTensorScalar(kernels::EltwiseType::ABS, {3, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}); } -TEST_F(EltwiseOpTest, OPENCLRandomHalf) { - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {3, 23, 37, 19}); - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {1, 23, 37, 19}); - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {1, 1, 37, 19}); - RandomTest(kernels::EltwiseType::PROD, - {3, 23, 37, 19}, - {1, 1, 1, 19}); - RandomTest(kernels::EltwiseType::SUM, - {13, 32, 32, 64}, - {1, 1, 1, 1}); - RandomTest(kernels::EltwiseType::SUM, - {13, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::SUM, - {13, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::MAX, - {3, 32, 32, 64}, - {3, 32, 32, 64}); - RandomTest(kernels::EltwiseType::MAX, - {3, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::MIN, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::SQR_DIFF, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::SQR_DIFF, - {13, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::SQR_DIFF, - {13, 32, 32, 64}, - {1, 1, 32, 64}); - RandomTest(kernels::EltwiseType::DIV, - {13, 32, 32, 64}, - {13, 32, 32, 64}); - RandomTest(kernels::EltwiseType::DIV, - {13, 32, 32, 64}, - {1, 1, 1, 64}); - RandomTest(kernels::EltwiseType::DIV, - {13, 32, 32, 64}, - {1, 1, 32, 64}); +TEST_F(EltwiseOpTest, RandomTensorScalarHalf) { + RandomTensorScalar(kernels::EltwiseType::SUM, {1, 32, 32, 16}); + RandomTensorScalar(kernels::EltwiseType::SUB, {3, 32, 32, 16}); + RandomTensorScalar(kernels::EltwiseType::PROD, {1, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::DIV, {3, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::MIN, {1, 32, 32, 16}); + RandomTensorScalar(kernels::EltwiseType::MAX, {3, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::NEG, {1, 32, 32, 32}); + RandomTensorScalar(kernels::EltwiseType::ABS, {3, 31, 37, 17}); + RandomTensorScalar(kernels::EltwiseType::SQR_DIFF, {3, 31, 37, 17}); } +TEST_F(EltwiseOpTest, RandomTensorVecFloat) { + RandomTensorEltwise(kernels::EltwiseType::SUM, + {1, 32, 32, 16}, {1, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, + {5, 32, 32, 16}, {5, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, + {1, 1, 1, 17}, {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, + {3, 1, 1, 17}, {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, + {1, 1, 1, 16}, {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, + {5, 31, 37, 17}, {5, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, + {5, 31, 37, 17}, {5, 1, 1, 17}); +} + +TEST_F(EltwiseOpTest, RandomTensorVecHalf) { + RandomTensorEltwise(kernels::EltwiseType::SUM, + {1, 32, 32, 16}, {1, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, + {3, 32, 32, 16}, {3, 1, 1, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, + {1, 1, 1, 17}, {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, + {5, 1, 1, 17}, {5, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, + {1, 1, 1, 16}, {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, + {3, 31, 37, 17}, {3, 1, 1, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, + {3, 31, 37, 17}, {3, 1, 1, 17}); +} + +TEST_F(EltwiseOpTest, RandomTensorTensorFloat) { + RandomTensorEltwise(kernels::EltwiseType::SUM, + {1, 32, 32, 16}, {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, + {3, 32, 32, 16}, {3, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, + {1, 31, 37, 17}, {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, + {5, 31, 37, 17}, {5, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, + {1, 32, 32, 16}, {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, + {3, 31, 37, 17}, {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, + {3, 31, 37, 17}, {3, 31, 37, 17}); +} + +TEST_F(EltwiseOpTest, RandomTensorTensorHalf) { + RandomTensorEltwise(kernels::EltwiseType::SUM, + {1, 32, 32, 16}, {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::SUB, + {3, 32, 32, 16}, {3, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::PROD, + {1, 31, 37, 17}, {1, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::DIV, + {5, 31, 37, 17}, {5, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::MIN, + {1, 32, 32, 16}, {1, 32, 32, 16}); + RandomTensorEltwise(kernels::EltwiseType::MAX, + {3, 31, 37, 17}, {3, 31, 37, 17}); + RandomTensorEltwise(kernels::EltwiseType::SQR_DIFF, + {3, 31, 37, 17}, {3, 31, 37, 17}); +} + + } // namespace test } // namespace ops } // namespace mace diff --git a/mace/python/tools/caffe_converter_lib.py b/mace/python/tools/caffe_converter_lib.py index cc961c36..7cdce0ec 100644 --- a/mace/python/tools/caffe_converter_lib.py +++ b/mace/python/tools/caffe_converter_lib.py @@ -41,6 +41,12 @@ activation_name_map = { 'TanH': 'TANH', } +math_type_mode = { + 0: 2, # PROD + 1: 0, # SUM + 2: 5, # MAX +} + MACE_INPUT_NODE_NAME = "mace_input_node" MACE_OUTPUT_NODE_NAME = "mace_output_node" @@ -922,11 +928,11 @@ class CaffeConverter(object): param = op.layer.eltwise_param type_arg = op_def.arg.add() type_arg.name = 'type' - type_arg.i = param.operation + type_arg.i = math_type_mode[param.operation] if len(param.coeff) > 0: coeff_arg = op_def.arg.add() coeff_arg.name = 'coeff' - coeff_arg.ints.extend(list(param.coeff)) + coeff_arg.floats.extend(list(param.coeff)) output_shape = op.parents[0].output_shape_map[op.layer.bottom[0]] op.output_shape_map[op.layer.top[0]] = output_shape diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index fc057dd0..fa2ce567 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -30,14 +30,14 @@ pooling_type_mode = {'AvgPool': 1, 'MaxPool': 2} # and also cwise type's in mace/kernels/cwise.h # cuz these math ops should have compatible with "EltWise" and "CWise" math_type_mode = { - 'MUL': 0, - 'ADD': 1, - 'MAX': 2, - 'MIN': 3, - 'SUB': 4, - 'DIV': 5, + 'ADD': 0, + 'SUB': 1, + 'MUL': 2, + 'DIV': 3, + 'MIN': 4, + 'MAX': 5, 'NEG': 6, - 'ABS': 7 + 'ABS': 7, } buffer_type_map = { @@ -836,18 +836,26 @@ class TFConverter(object): arg.i = self.dt op_def.name = op.name op_def.type = "Eltwise" - op_def.input.extend([input.name for input in op.inputs]) - x_value = op.get_attr('x') - if len(op.inputs) >= 2: + if len(op.inputs) == 2: input_tensor0 = get_input_tensor(op, 0) input_tensor1 = get_input_tensor(op, 1) - if len(input_tensor0) == 1: - x_value = input_tensor0.eval().astype(np.float32) - elif len(input_tensor1) == 1: - x_value = input_tensor1.eval().astype(np.float32) - x_arg = op_def.arg.add() - x_arg.name = 'x' - x_arg.f = x_value + x_value = None + if np.asarray(input_tensor1.shape).size == 0: + x_value = input_tensor1.eval() + op_def.input.extend([op.inputs[0].name]) + self.unused_tensor.add(input_tensor1.name) + elif np.asarray(input_tensor0.shape).size == 0: + x_value = input_tensor0.eval() + op_def.input.extend([op.inputs[1].name]) + self.unused_tensor.add(input_tensor0.name) + else: + op_def.input.extend([input.name for input in op.inputs]) + if x_value is not None: + x_arg = op_def.arg.add() + x_arg.name = 'x' + x_arg.f = x_value + else: + op_def.input.extend([input.name for input in op.inputs]) type_arg = op_def.arg.add() type_arg.name = 'type' type_arg.i = math_type_mode[math_type] -- GitLab