From dd2cdb013d0bb6858a15500349ab07b3f589e2f3 Mon Sep 17 00:00:00 2001 From: luxuhui Date: Wed, 21 Oct 2020 13:59:21 +0800 Subject: [PATCH] feat: add Elu for Tensorflow and Caffe and fix bug in reverse op and net adapter N/A Signed-off-by: Luxuhui --- mace/core/net_def_adapter.cc | 2 +- mace/ops/activation.cc | 16 +++---- mace/ops/activation.h | 22 ++++----- mace/ops/arm/base/activation.cc | 33 +++++++++++-- mace/ops/arm/base/activation.h | 2 + mace/ops/arm/q8/activation.cc | 10 ++++ mace/ops/batch_norm.cc | 8 ++-- mace/ops/conv_2d.cc | 16 +++---- mace/ops/deconv_2d.cc | 6 +-- mace/ops/deconv_2d.h | 6 +-- mace/ops/delegator/activation.h | 10 ++-- mace/ops/depthwise_conv2d.cc | 12 ++--- mace/ops/depthwise_deconv2d.cc | 6 +-- mace/ops/fully_connected.cc | 13 +++-- mace/ops/opencl/buffer/conv_2d.cc | 6 +-- mace/ops/opencl/buffer/conv_2d.h | 6 +-- mace/ops/opencl/buffer/conv_2d_1x1.cc | 7 ++- mace/ops/opencl/buffer/conv_2d_general.cc | 7 ++- mace/ops/opencl/buffer/depthwise_conv2d.cc | 11 +++-- mace/ops/opencl/buffer/depthwise_conv2d.h | 4 +- mace/ops/opencl/cl/activation.cl | 10 ++-- mace/ops/opencl/cl/batch_norm.cl | 6 +-- mace/ops/opencl/cl/common.h | 9 ++-- mace/ops/opencl/cl/conv_2d.cl | 12 ++--- mace/ops/opencl/cl/conv_2d_1x1.cl | 12 ++--- mace/ops/opencl/cl/conv_2d_1x1_buffer.cl | 8 ++-- mace/ops/opencl/cl/conv_2d_3x3.cl | 14 +++--- mace/ops/opencl/cl/conv_2d_buffer.cl | 12 ++--- mace/ops/opencl/cl/deconv_2d.cl | 14 +++--- mace/ops/opencl/cl/depthwise_conv2d.cl | 24 +++++----- mace/ops/opencl/cl/depthwise_conv2d_buffer.cl | 12 ++--- mace/ops/opencl/cl/depthwise_deconv2d.cl | 14 +++--- mace/ops/opencl/cl/fully_connected.cl | 12 ++--- mace/ops/opencl/cl/winograd_transform.cl | 48 +++++++++---------- mace/ops/opencl/conv_2d.h | 2 +- mace/ops/opencl/deconv_2d.h | 2 +- mace/ops/opencl/depthwise_conv2d.h | 2 +- mace/ops/opencl/depthwise_deconv2d.h | 2 +- mace/ops/opencl/fully_connected.h | 2 +- mace/ops/opencl/image/activation.cc | 4 +- mace/ops/opencl/image/activation.h | 7 +-- mace/ops/opencl/image/batch_norm.cc | 8 ++-- mace/ops/opencl/image/batch_norm.h | 4 +- mace/ops/opencl/image/conv_2d.cc | 10 ++-- mace/ops/opencl/image/conv_2d.h | 10 ++-- mace/ops/opencl/image/conv_2d_1x1.cc | 8 +++- mace/ops/opencl/image/conv_2d_3x3.cc | 8 +++- mace/ops/opencl/image/conv_2d_general.cc | 7 ++- mace/ops/opencl/image/deconv_2d.cc | 7 ++- mace/ops/opencl/image/deconv_2d.h | 2 +- mace/ops/opencl/image/depthwise_conv2d.cc | 11 +++-- mace/ops/opencl/image/depthwise_conv2d.h | 4 +- mace/ops/opencl/image/depthwise_deconv2d.cc | 7 ++- mace/ops/opencl/image/depthwise_deconv2d.h | 2 +- mace/ops/opencl/image/fully_connected.cc | 7 ++- mace/ops/opencl/image/fully_connected.h | 2 +- mace/ops/opencl/image/winograd_conv2d.cc | 8 ++-- mace/ops/ref/activation.cc | 15 +++++- mace/ops/reverse.cc | 2 + micro/ops/utils/activation.cc | 12 ++--- micro/ops/utils/activation.h | 4 +- .../test/ccunit/micro/ops/activation_test.cc | 2 +- .../mace/ops/activation_benchmark.cc | 4 +- test/ccunit/mace/ops/activation_test.cc | 17 ++++--- test/ccunit/mace/ops/batch_norm_test.cc | 4 +- test/ccunit/mace/ops/conv_2d_test.cc | 4 +- test/ccunit/mace/ops/deconv_2d_test.cc | 4 +- test/ccunit/mace/ops/depthwise_conv2d_test.cc | 4 +- .../mace/ops/depthwise_deconv2d_test.cc | 4 +- test/ccunit/mace/ops/fully_connected_test.cc | 4 +- tools/python/transform/base_converter.py | 2 +- tools/python/transform/caffe_converter.py | 9 +++- tools/python/transform/onnx_converter.py | 10 ++-- .../python/transform/tensorflow_converter.py | 16 +++++-- tools/python/transform/transformer.py | 7 +-- 75 files changed, 377 insertions(+), 283 deletions(-) diff --git a/mace/core/net_def_adapter.cc b/mace/core/net_def_adapter.cc index 07998679..54fc7619 100644 --- a/mace/core/net_def_adapter.cc +++ b/mace/core/net_def_adapter.cc @@ -199,11 +199,11 @@ MaceStatus NetDefAdapter::AdaptNetDef( input_data_format, input_shape, -1)); } - OpConditionContext context(ws_, &tensor_shape_map); DataFormat op_output_data_format; MemoryType op_output_mem_type; for (int idx = 0; idx < net_def->op_size(); ++idx) { OperatorDef op_def(net_def->op(idx)); + OpConditionContext context(ws_, &tensor_shape_map); context.set_operator_def(&op_def); // Select device MACE_RETURN_IF_ERROR(this->AdaptDevice(&context, diff --git a/mace/ops/activation.cc b/mace/ops/activation.cc index 17a3a905..ae395996 100644 --- a/mace/ops/activation.cc +++ b/mace/ops/activation.cc @@ -47,14 +47,14 @@ class ActivationOp : public Operation { activation_type_, Operation::GetOptionalArg("max_limit", 0.f), Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.f)))) {} + "activation_coefficient", 0.f)))) {} MaceStatus Run(OpContext *context) override { MACE_UNUSED(context); const Tensor *input = this->Input(0); Tensor *output = this->Output(0); - if (activation_type_ == PRELU || activation_type_ == ELU) { + if (activation_type_ == PRELU) { MACE_RETURN_IF_ERROR(output->ResizeLike(input)); const T *input_ptr = input->data(); T *output_ptr = output->mutable_data(); @@ -63,8 +63,8 @@ class ActivationOp : public Operation { const T *alpha_ptr = alpha->data(); const index_t outer_size = output->dim(0); const index_t inner_size = output->dim(2) * output->dim(3); - ActivationWithAlpha(context, input_ptr, outer_size, input->dim(1), - inner_size, alpha_ptr, activation_type_, output_ptr); + PReLUActivation(context, input_ptr, outer_size, input->dim(1), + inner_size, alpha_ptr, output_ptr); } else { activation_delegator_->Compute(context, input, output); } @@ -86,17 +86,17 @@ class ActivationOp : public Operation { Operation::GetOptionalArg("activation", "NOOP")); auto relux_max_limit = Operation::GetOptionalArg("max_limit", 0.0f); - auto leakyrelu_coefficient = - Operation::GetOptionalArg("leakyrelu_coefficient", 0.0f); + auto activation_coefficient = + Operation::GetOptionalArg("activation_coefficient", 0.0f); MemoryType mem_type; if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { mem_type = MemoryType::GPU_IMAGE; kernel_ = make_unique( - type, relux_max_limit, leakyrelu_coefficient); + type, relux_max_limit, activation_coefficient); } else { MACE_NOT_IMPLEMENTED; } - if (type == ActivationType::PRELU || type == ActivationType::ELU) { + if (type == ActivationType::PRELU) { MACE_CHECK(TransformFilter( context, operator_def_.get(), 1, OpenCLBufferType::ARGUMENT, mem_type) == MaceStatus::MACE_SUCCESS); diff --git a/mace/ops/activation.h b/mace/ops/activation.h index 95f65777..c4e37249 100644 --- a/mace/ops/activation.h +++ b/mace/ops/activation.h @@ -51,14 +51,13 @@ inline ActivationType StringToActivationType(const std::string type) { } template -void ActivationWithAlpha(const OpContext *context, - const T *input_ptr, - const index_t outer_size, - const index_t input_chan, - const index_t inner_size, - const T *alpha_ptr, - const index_t activation_type, - T *output_ptr) { +void PReLUActivation(const OpContext *context, + const T *input_ptr, + const index_t outer_size, + const index_t input_chan, + const index_t inner_size, + const T *alpha_ptr, + T *output_ptr) { utils::ThreadPool &thread_pool = context->device()->cpu_runtime()->thread_pool(); @@ -69,12 +68,7 @@ void ActivationWithAlpha(const OpContext *context, for (index_t j = 0; j < inner_size; ++j) { index_t idx = i * input_chan * inner_size + chan_idx * inner_size + j; if (input_ptr[idx] < 0) { - if (activation_type == ActivationType::PRELU) { - output_ptr[idx] = input_ptr[idx] * alpha_ptr[chan_idx]; - } else if (activation_type == ActivationType::ELU) { - output_ptr[idx] = - (std::exp(input_ptr[idx]) - 1) * alpha_ptr[chan_idx]; - } + output_ptr[idx] = input_ptr[idx] * alpha_ptr[chan_idx]; } else { output_ptr[idx] = input_ptr[idx]; } diff --git a/mace/ops/arm/base/activation.cc b/mace/ops/arm/base/activation.cc index ab9a5336..28fab419 100644 --- a/mace/ops/arm/base/activation.cc +++ b/mace/ops/arm/base/activation.cc @@ -81,6 +81,11 @@ void Activation::DoActivation(const OpContext *context, break; } + case ELU: { + ActivateElu(&thread_pool, input, output); + break; + } + case NOOP: { break; } @@ -164,7 +169,7 @@ void Activation::ActivateLeakyRelu(utils::ThreadPool *thread_pool, auto output_data = output->mutable_data(); const index_t input_size = input->size(); const float32x4_t vzero = vdupq_n_f32(0.f); - const float32x4_t valpha = vdupq_n_f32(leakyrelu_coefficient_); + const float32x4_t valpha = vdupq_n_f32(activation_coefficient_); const index_t block_count = input_size / 4; thread_pool->Compute1D( @@ -188,7 +193,7 @@ void Activation::ActivateLeakyRelu(utils::ThreadPool *thread_pool, // remain for (index_t i = block_count * 4; i < input_size; ++i) { output_data[i] = std::max(input_data[i], 0.f) + - std::min(input_data[i], 0.f) * leakyrelu_coefficient_; + std::min(input_data[i], 0.f) * activation_coefficient_; } } @@ -226,6 +231,28 @@ void Activation::ActivateSigmoid(utils::ThreadPool *thread_pool, 0, input_size, 1); } +template +void Activation::ActivateElu(utils::ThreadPool *thread_pool, + const Tensor *input, + Tensor *output) { + const auto *input_data = input->data(); + auto *output_data = output->mutable_data(); + const index_t input_size = input->size(); + + thread_pool->Compute1D( + [=](index_t start, index_t end, index_t step) { + for (index_t i = start; i < end; i += step) { + const auto in_val = input_data[i]; + if (in_val < 0) { + output_data[i] = (std::exp(in_val) - 1) * activation_coefficient_; + } else { + output_data[i] = in_val; + } + } + }, + 0, input_size, 1); +} + void RegisterActivationDelegator(OpDelegatorRegistry *registry) { MACE_REGISTER_DELEGATOR( registry, Activation, delegator::ActivationParam, @@ -240,7 +267,7 @@ void RegisterActivationDelegator(OpDelegatorRegistry *registry) { MACE_REGISTER_BF16_DELEGATOR( registry, Activation, delegator::ActivationParam, MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, BFloat16, - ImplType::NEON)); + ImplType::NEON)); } } // namespace arm diff --git a/mace/ops/arm/base/activation.h b/mace/ops/arm/base/activation.h index 3e82b1d6..1b07604e 100644 --- a/mace/ops/arm/base/activation.h +++ b/mace/ops/arm/base/activation.h @@ -45,6 +45,8 @@ class Activation : public delegator::Activation { Tensor *output); void ActivateSigmoid(utils::ThreadPool *thread_pool, const Tensor *input, Tensor *output); + void ActivateElu(utils::ThreadPool *thread_pool, const Tensor *input, + Tensor *output); }; } // namespace arm diff --git a/mace/ops/arm/q8/activation.cc b/mace/ops/arm/q8/activation.cc index 875f8ba4..63bb4b64 100644 --- a/mace/ops/arm/q8/activation.cc +++ b/mace/ops/arm/q8/activation.cc @@ -123,6 +123,16 @@ void Activation::ActivateSigmoid(utils::ThreadPool *thread_pool, MACE_NOT_IMPLEMENTED; } +template<> +void Activation::ActivateElu(utils::ThreadPool *thread_pool, + const Tensor *input, + Tensor *output) { + MACE_UNUSED(thread_pool); + MACE_UNUSED(input); + MACE_UNUSED(output); + MACE_NOT_IMPLEMENTED; +} + } // namespace arm } // namespace ops } // namespace mace diff --git a/mace/ops/batch_norm.cc b/mace/ops/batch_norm.cc index a25936b1..49fc8002 100644 --- a/mace/ops/batch_norm.cc +++ b/mace/ops/batch_norm.cc @@ -50,7 +50,7 @@ class BatchNormOp : public Operation { Operation::GetOptionalArg("activation", "NOOP")), Operation::GetOptionalArg("max_limit", 0.0f), - Operation::GetOptionalArg("leakyrelu_coefficient", + Operation::GetOptionalArg("activation_coefficient", 0.0f)))) {} MaceStatus Run(OpContext *context) override { @@ -168,13 +168,13 @@ class BatchNormOp : public Operation { ActivationType activation = ops::StringToActivationType( Operation::GetOptionalArg("activation", "NOOP")); float relux_max_limit = Operation::GetOptionalArg("max_limit", 0.0f); - float leakyrelu_coefficient = Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f); + float activation_coefficient = Operation::GetOptionalArg( + "activation_coefficient", 0.0f); MemoryType mem_type; if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { mem_type = MemoryType::GPU_IMAGE; kernel_ = make_unique( - epsilon, activation, relux_max_limit, leakyrelu_coefficient); + epsilon, activation, relux_max_limit, activation_coefficient); } else { MACE_NOT_IMPLEMENTED; } diff --git a/mace/ops/conv_2d.cc b/mace/ops/conv_2d.cc index 03b098c6..26b4d02a 100644 --- a/mace/ops/conv_2d.cc +++ b/mace/ops/conv_2d.cc @@ -68,7 +68,7 @@ class Conv2dOp : public ConvPool2dOpBase { Operation::GetOptionalArg("activation", "NOOP")), Operation::GetOptionalArg("max_limit", 0.0f), - Operation::GetOptionalArg("leakyrelu_coefficient", + Operation::GetOptionalArg("activation_coefficient", 0.0f)))), bias_add_delegator_(delegator::BiasAdd::Create( context->workspace(), @@ -190,8 +190,8 @@ class Conv2dOp : public ConvPool2dOpBase { Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), - leakyrelu_coefficient_(Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + activation_coefficient_(Operation::GetOptionalArg( + "activation_coefficient", 0.0f)) {} MaceStatus Run(OpContext *context) override { const Tensor *input = this->Input(INPUT); @@ -414,7 +414,7 @@ class Conv2dOp : public ConvPool2dOpBase { private: const ActivationType activation_; const float relux_max_limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; std::vector bias_; private: @@ -433,8 +433,8 @@ class Conv2dOp : public ConvPool2dOpBase { Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), - leakyrelu_coefficient_(Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)), + activation_coefficient_(Operation::GetOptionalArg( + "activation_coefficient", 0.0f)), wino_block_size_(Operation::GetOptionalArg("wino_block_size", 0)) { MemoryType mem_type; if (context->GetOpMemoryType() == MemoryType::GPU_IMAGE) { @@ -488,13 +488,13 @@ class Conv2dOp : public ConvPool2dOpBase { return kernel_->Compute(context, input, filter, bias, strides_.data(), padding_type_, paddings_, dilations_.data(), activation_, relux_max_limit_, - leakyrelu_coefficient_, wino_block_size_, output); + activation_coefficient_, wino_block_size_, output); } private: const ActivationType activation_; const float relux_max_limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; std::unique_ptr kernel_; int wino_block_size_; diff --git a/mace/ops/deconv_2d.cc b/mace/ops/deconv_2d.cc index dc58141a..4df66c0d 100644 --- a/mace/ops/deconv_2d.cc +++ b/mace/ops/deconv_2d.cc @@ -56,8 +56,8 @@ class Deconv2dOp : public Deconv2dOpBase { context->workspace(), MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, T, kCpuImplType), - delegator::ActivationParam(activation_, relux_max_limit_, - leakyrelu_coefficient_))), + delegator::ActivationParam( + activation_, relux_max_limit_, activation_coefficient_))), bias_add_delegator_(delegator::BiasAdd::Create( context->workspace(), MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, T, kCpuImplType), @@ -228,7 +228,7 @@ class Deconv2dOp : public Deconv2dOpBase { return kernel_->Compute(context, input, filter, bias, strides_.data(), in_paddings.data(), activation_, - relux_max_limit_, leakyrelu_coefficient_, + relux_max_limit_, activation_coefficient_, out_shape, output); } diff --git a/mace/ops/deconv_2d.h b/mace/ops/deconv_2d.h index a11d5f8a..d4b85cfd 100644 --- a/mace/ops/deconv_2d.h +++ b/mace/ops/deconv_2d.h @@ -43,8 +43,8 @@ class Deconv2dOpBase : public Operation { "NOOP"))), relux_max_limit_( Operation::GetOptionalArg("max_limit", 0.0f)), - leakyrelu_coefficient_( - Operation::GetOptionalArg("leakyrelu_coefficient", 0.0f)) {} + activation_coefficient_( + Operation::GetOptionalArg("activation_coefficient", 0.0f)) {} protected: std::vector strides_; // [stride_h, stride_w] @@ -54,7 +54,7 @@ class Deconv2dOpBase : public Operation { const FrameworkType model_type_; const ActivationType activation_; const float relux_max_limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; }; } // namespace ops diff --git a/mace/ops/delegator/activation.h b/mace/ops/delegator/activation.h index 80a9c6b3..ec0b1a07 100644 --- a/mace/ops/delegator/activation.h +++ b/mace/ops/delegator/activation.h @@ -26,20 +26,20 @@ namespace delegator { struct ActivationParam : public DelegatorParam { explicit ActivationParam(ActivationType type, const float limit, - const float leakyrelu_coefficient) + const float activation_coefficient) : type_(type), limit_(limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} + activation_coefficient_(activation_coefficient) {} ActivationType type_; const float limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; }; class Activation : public OpDelegator { public: explicit Activation(const ActivationParam ¶m) : OpDelegator(param), type_(param.type_), limit_(param.limit_), - leakyrelu_coefficient_(param.leakyrelu_coefficient_) {} + activation_coefficient_(param.activation_coefficient_) {} virtual ~Activation() = default; MACE_DEFINE_DELEGATOR_CREATOR(Activation) @@ -51,7 +51,7 @@ class Activation : public OpDelegator { protected: ActivationType type_; const float limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; }; } // namespace delegator diff --git a/mace/ops/depthwise_conv2d.cc b/mace/ops/depthwise_conv2d.cc index 903a2cfd..3f0d7d64 100644 --- a/mace/ops/depthwise_conv2d.cc +++ b/mace/ops/depthwise_conv2d.cc @@ -52,12 +52,12 @@ class DepthwiseConv2dOpBase : public ConvPool2dOpBase { Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), - leakyrelu_coefficient_(Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + activation_coefficient_(Operation::GetOptionalArg( + "activation_coefficient", 0.0f)) {} protected: const ActivationType activation_; const float relux_max_limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; }; template @@ -73,8 +73,8 @@ class DepthwiseConv2dOp : public DepthwiseConv2dOpBase { context->workspace(), MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, T, kCpuImplType), - delegator::ActivationParam(activation_, relux_max_limit_, - leakyrelu_coefficient_))), + delegator::ActivationParam( + activation_, relux_max_limit_, activation_coefficient_))), bias_add_delegator_(delegator::BiasAdd::Create( context->workspace(), MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, T, kCpuImplType), @@ -389,7 +389,7 @@ class DepthwiseConv2dOp : return kernel_->Compute(context, input, filter, bias, strides_.data(), padding_type_, paddings_, dilations_.data(), activation_, relux_max_limit_, - leakyrelu_coefficient_, output); + activation_coefficient_, output); } private: diff --git a/mace/ops/depthwise_deconv2d.cc b/mace/ops/depthwise_deconv2d.cc index 615b5ecc..b1373d76 100644 --- a/mace/ops/depthwise_deconv2d.cc +++ b/mace/ops/depthwise_deconv2d.cc @@ -55,8 +55,8 @@ class DepthwiseDeconv2dOp context->workspace(), MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, T, kCpuImplType), - delegator::ActivationParam(activation_, relux_max_limit_, - leakyrelu_coefficient_))), + delegator::ActivationParam( + activation_, relux_max_limit_, activation_coefficient_))), bias_add_delegator_(delegator::BiasAdd::Create( context->workspace(), MACE_DELEGATOR_KEY(BiasAdd, DeviceType::CPU, T, kCpuImplType), @@ -209,7 +209,7 @@ class DepthwiseDeconv2dOp : public Deconv2dOpBase { group_, activation_, relux_max_limit_, - leakyrelu_coefficient_, + activation_coefficient_, out_shape, output); } diff --git a/mace/ops/fully_connected.cc b/mace/ops/fully_connected.cc index f0c83a25..e81e1e90 100644 --- a/mace/ops/fully_connected.cc +++ b/mace/ops/fully_connected.cc @@ -42,12 +42,12 @@ class FullyConnectedOpBase : public Operation { Operation::GetOptionalArg("activation", "NOOP"))), relux_max_limit_(Operation::GetOptionalArg("max_limit", 0.0f)), - leakyrelu_coefficient_(Operation::GetOptionalArg( - "leakyrelu_coefficient", 0.0f)) {} + activation_coefficient_(Operation::GetOptionalArg( + "activation_coefficient", 0.0f)) {} protected: const ActivationType activation_; const float relux_max_limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS); MACE_OP_OUTPUT_TAGS(OUTPUT); @@ -64,9 +64,8 @@ class FullyConnectedOp : public FullyConnectedOpBase { activation_delegator_(delegator::Activation::Create( context->workspace(), MACE_DELEGATOR_KEY(Activation, DeviceType::CPU, T, kCpuImplType), - delegator::ActivationParam(activation_, - relux_max_limit_, - leakyrelu_coefficient_))), + delegator::ActivationParam( + activation_, relux_max_limit_, activation_coefficient_))), gemv_(delegator::Gemv::Create( context->workspace(), MACE_DELEGATOR_KEY(Gemv, DeviceType::CPU, T, kCpuImplType), @@ -215,7 +214,7 @@ class FullyConnectedOp : public FullyConnectedOpBase { " don't match."); return kernel_->Compute( context, input, weight, bias, activation_, relux_max_limit_, - leakyrelu_coefficient_, output); + activation_coefficient_, output); } private: diff --git a/mace/ops/opencl/buffer/conv_2d.cc b/mace/ops/opencl/buffer/conv_2d.cc index 50109b6e..1c604d8e 100644 --- a/mace/ops/opencl/buffer/conv_2d.cc +++ b/mace/ops/opencl/buffer/conv_2d.cc @@ -46,7 +46,7 @@ MaceStatus Conv2dKernel::Compute( const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int winograd_blk_size, Tensor *output) { MACE_UNUSED(winograd_blk_size); @@ -148,14 +148,14 @@ MaceStatus Conv2dKernel::Compute( return conv2d::Conv2d1x1( context, &kernels_[1], pad_input, filter, bias, strides, activation, relux_max_limit, - leakyrelu_coefficient, input_changed, output, &conv_future); + activation_coefficient, input_changed, output, &conv_future); }; } else { conv_func = [&](const Tensor *pad_input, Tensor *output) -> MaceStatus { return conv2d::Conv2dGeneral( context, &kernels_[1], pad_input, filter, bias, strides, dilations, activation, relux_max_limit, - leakyrelu_coefficient, input_changed, output, &conv_future); + activation_coefficient, input_changed, output, &conv_future); }; } MACE_RETURN_IF_ERROR(conv_func(padded_input_ptr, output)); diff --git a/mace/ops/opencl/buffer/conv_2d.h b/mace/ops/opencl/buffer/conv_2d.h index 563b8358..224f6670 100644 --- a/mace/ops/opencl/buffer/conv_2d.h +++ b/mace/ops/opencl/buffer/conv_2d.h @@ -38,7 +38,7 @@ extern MaceStatus Conv2d1x1(OpContext *context, const int *strides, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output, StatsFuture *future); @@ -52,7 +52,7 @@ extern MaceStatus Conv2dGeneral(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output, StatsFuture *future); @@ -81,7 +81,7 @@ class Conv2dKernel : public OpenCLConv2dKernel { const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int winograd_blk_size, Tensor *output) override; diff --git a/mace/ops/opencl/buffer/conv_2d_1x1.cc b/mace/ops/opencl/buffer/conv_2d_1x1.cc index 001c201d..5b80d90f 100644 --- a/mace/ops/opencl/buffer/conv_2d_1x1.cc +++ b/mace/ops/opencl/buffer/conv_2d_1x1.cc @@ -31,7 +31,7 @@ MaceStatus Conv2d1x1(OpContext *context, const int *strides, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output, StatsFuture *future) { @@ -75,6 +75,9 @@ MaceStatus Conv2d1x1(OpContext *context, case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -110,7 +113,7 @@ MaceStatus Conv2d1x1(OpContext *context, kernel->setArg(idx++, strides[0]); kernel->setArg(idx++, strides[1]); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, *(output->opencl_buffer())); } diff --git a/mace/ops/opencl/buffer/conv_2d_general.cc b/mace/ops/opencl/buffer/conv_2d_general.cc index 9e7d7508..8bdb419f 100644 --- a/mace/ops/opencl/buffer/conv_2d_general.cc +++ b/mace/ops/opencl/buffer/conv_2d_general.cc @@ -32,7 +32,7 @@ MaceStatus Conv2dGeneral(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output, StatsFuture *future) { @@ -81,6 +81,9 @@ MaceStatus Conv2dGeneral(OpContext *context, case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -125,7 +128,7 @@ MaceStatus Conv2dGeneral(OpContext *context, kernel->setArg(idx++, static_cast( dilations[1] * in_channel)); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, *(output->opencl_buffer())); } diff --git a/mace/ops/opencl/buffer/depthwise_conv2d.cc b/mace/ops/opencl/buffer/depthwise_conv2d.cc index 48c9829f..88a7c535 100644 --- a/mace/ops/opencl/buffer/depthwise_conv2d.cc +++ b/mace/ops/opencl/buffer/depthwise_conv2d.cc @@ -32,7 +32,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output, StatsFuture *future) { @@ -79,6 +79,9 @@ MaceStatus DepthwiseConv2d(OpContext *context, case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -119,7 +122,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, kernel->setArg(idx++, static_cast( dilations[1] * in_channel)); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, *(output->opencl_buffer())); } @@ -147,7 +150,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) { StatsFuture pad_future, dw_conv_future; index_t filter_w = filter->dim(3); @@ -242,7 +245,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( depthwise::DepthwiseConv2d( context, &kernels_[1], padded_input_ptr, filter, bias, strides, dilations, activation, relux_max_limit, - leakyrelu_coefficient, input_changed, output, &dw_conv_future)); + activation_coefficient, input_changed, output, &dw_conv_future)); MergeMultipleFutureWaitFn({pad_future, dw_conv_future}, context->future()); return MaceStatus::MACE_SUCCESS; } diff --git a/mace/ops/opencl/buffer/depthwise_conv2d.h b/mace/ops/opencl/buffer/depthwise_conv2d.h index 60d68077..e8990b30 100644 --- a/mace/ops/opencl/buffer/depthwise_conv2d.h +++ b/mace/ops/opencl/buffer/depthwise_conv2d.h @@ -39,7 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output, StatsFuture *future); @@ -59,7 +59,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel { const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) override; private: diff --git a/mace/ops/opencl/cl/activation.cl b/mace/ops/opencl/cl/activation.cl index 8e825ece..46615a77 100644 --- a/mace/ops/opencl/cl/activation.cl +++ b/mace/ops/opencl/cl/activation.cl @@ -3,11 +3,11 @@ __kernel void activation(OUT_OF_RANGE_PARAMS GLOBAL_WORK_GROUP_SIZE_DIM3 __read_only image2d_t input, -#if defined (USE_PRELU) || defined (USE_ELU) +#ifdef USE_PRELU __read_only image2d_t alpha, #endif __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float coefficient, __write_only image2d_t output) { const int ch_blk = get_global_id(0); const int w = get_global_id(1); @@ -23,11 +23,11 @@ __kernel void activation(OUT_OF_RANGE_PARAMS const int pos = mad24(ch_blk, width, w); DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb)); -#if defined (USE_PRELU) || defined (USE_ELU) +#ifdef USE_PRELU DATA_TYPE4 activation_alpha = READ_IMAGET(alpha, SAMPLER, (int2)(ch_blk, 0)); - DATA_TYPE4 out = do_activation(in, activation_alpha, relux_max_limit, leakyrelu_coefficient); + DATA_TYPE4 out = do_activation(in, activation_alpha, relux_max_limit, coefficient); #else - DATA_TYPE4 out = do_activation(in, relux_max_limit, leakyrelu_coefficient); + DATA_TYPE4 out = do_activation(in, relux_max_limit, coefficient); #endif WRITE_IMAGET(output, (int2)(pos, hb), out); diff --git a/mace/ops/opencl/cl/batch_norm.cl b/mace/ops/opencl/cl/batch_norm.cl index 87da37d0..37faf876 100644 --- a/mace/ops/opencl/cl/batch_norm.cl +++ b/mace/ops/opencl/cl/batch_norm.cl @@ -12,7 +12,7 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient) { + __private const float activation_coefficient) { const int ch_blk = get_global_id(0); const int w = get_global_id(1); const int hb = get_global_id(2); @@ -44,8 +44,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 out = mad(in, bn_scale, bn_offset); -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out = do_activation(out, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out = do_activation(out, relux_max_limit, activation_coefficient); #endif WRITE_IMAGET(output, (int2)(pos, hb), out); diff --git a/mace/ops/opencl/cl/common.h b/mace/ops/opencl/cl/common.h index 0bd97045..3cf2cc27 100644 --- a/mace/ops/opencl/cl/common.h +++ b/mace/ops/opencl/cl/common.h @@ -83,11 +83,11 @@ inline float4 do_sigmoid(float4 in) { #ifdef DATA_TYPE inline DATA_TYPE4 do_activation(DATA_TYPE4 in, -#if defined (USE_PRELU) || defined (USE_ELU) +#if defined (USE_PRELU) DATA_TYPE4 alpha, #endif __private const float relux_max_limit, - __private const float leakyrelu_coefficient) { + __private const float activation_coefficient) { DATA_TYPE4 out; #ifdef USE_RELU out = fmax(in, (DATA_TYPE)0); @@ -99,7 +99,8 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in, out = select(alpha * in, in, in >= (DATA_TYPE)0); #endif #ifdef USE_ELU - out = select(alpha * (native_exp(in) - 1.0f), in, in >= (DATA_TYPE)0); + out = select(activation_coefficient * (native_exp(in) - 1.0f), + in, in >= (DATA_TYPE)0); #endif #ifdef USE_TANH out = tanh(in); @@ -108,7 +109,7 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in, out = do_sigmoid(in); #endif #ifdef USE_LEAKYRELU - out = select(leakyrelu_coefficient * in, in, in >= (DATA_TYPE)0); + out = select(activation_coefficient * in, in, in >= (DATA_TYPE)0); #endif return out; } diff --git a/mace/ops/opencl/cl/conv_2d.cl b/mace/ops/opencl/cl/conv_2d.cl index 2bf4572b..53b06f28 100644 --- a/mace/ops/opencl/cl/conv_2d.cl +++ b/mace/ops/opencl/cl/conv_2d.cl @@ -9,7 +9,7 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __private const int in_height, __private const int in_width, __private const int in_ch_blks, @@ -125,11 +125,11 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS } } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); #endif const int out_x_base = mul24(out_ch_blk, out_width); diff --git a/mace/ops/opencl/cl/conv_2d_1x1.cl b/mace/ops/opencl/cl/conv_2d_1x1.cl index 1d146028..ca62c8ac 100644 --- a/mace/ops/opencl/cl/conv_2d_1x1.cl +++ b/mace/ops/opencl/cl/conv_2d_1x1.cl @@ -9,7 +9,7 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __private const int in_height, __private const int in_width, __private const int in_ch_blks, @@ -98,11 +98,11 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS filter_x_base += 4; } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); #endif const int out_x_base = mul24(out_ch_blk, width); diff --git a/mace/ops/opencl/cl/conv_2d_1x1_buffer.cl b/mace/ops/opencl/cl/conv_2d_1x1_buffer.cl index be9f36e1..72669030 100644 --- a/mace/ops/opencl/cl/conv_2d_1x1_buffer.cl +++ b/mace/ops/opencl/cl/conv_2d_1x1_buffer.cl @@ -17,7 +17,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS __private const int stride_h, __private const int stride_w, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __global OUT_DATA_TYPE *output) { const int out_wc_blk_idx = get_global_id(0); const int out_hb_idx = get_global_id(1); @@ -80,9 +80,9 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS in_offset += 4; } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); #endif int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx), diff --git a/mace/ops/opencl/cl/conv_2d_3x3.cl b/mace/ops/opencl/cl/conv_2d_3x3.cl index 0c37cd80..f918504a 100644 --- a/mace/ops/opencl/cl/conv_2d_3x3.cl +++ b/mace/ops/opencl/cl/conv_2d_3x3.cl @@ -9,7 +9,7 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __private const int in_height, __private const int in_width, __private const int in_ch_blks, @@ -130,12 +130,12 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS } } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); - out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); + out4 = do_activation(out4, relux_max_limit, activation_coefficient); #endif const int out_x_base = mul24(out_ch_blk, out_width); diff --git a/mace/ops/opencl/cl/conv_2d_buffer.cl b/mace/ops/opencl/cl/conv_2d_buffer.cl index 41efc13a..d90e5951 100644 --- a/mace/ops/opencl/cl/conv_2d_buffer.cl +++ b/mace/ops/opencl/cl/conv_2d_buffer.cl @@ -22,7 +22,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS __private const int dilated_h_offset, __private const int dilated_w_offset, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __global OUT_DATA_TYPE *output) { const int out_wc_blk_idx = get_global_id(0); const int out_hb_idx = get_global_id(1); @@ -108,11 +108,11 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS } } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); #endif int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx), diff --git a/mace/ops/opencl/cl/deconv_2d.cl b/mace/ops/opencl/cl/deconv_2d.cl index ef7c7025..0a787771 100644 --- a/mace/ops/opencl/cl/deconv_2d.cl +++ b/mace/ops/opencl/cl/deconv_2d.cl @@ -9,7 +9,7 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __private const int in_height, __private const int in_width, __private const int in_channels, @@ -129,12 +129,12 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS } } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); - out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); + out4 = do_activation(out4, relux_max_limit, activation_coefficient); #endif int2 out_pos; diff --git a/mace/ops/opencl/cl/depthwise_conv2d.cl b/mace/ops/opencl/cl/depthwise_conv2d.cl index 5a611968..e5bcdcbc 100644 --- a/mace/ops/opencl/cl/depthwise_conv2d.cl +++ b/mace/ops/opencl/cl/depthwise_conv2d.cl @@ -10,7 +10,7 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __private const short in_height, __private const short in_width, __private const short in_ch_blks, @@ -113,11 +113,11 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS in_hb_idx += dilation_h; } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); #endif const short out_x_base = mul24(out_ch_blk, out_width); @@ -146,7 +146,7 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const DATA_TYPE relux_max_limit, - __private const DATA_TYPE leakyrelu_coefficient, + __private const DATA_TYPE activation_coefficient, __private const short in_height, __private const short in_width, __private const short in_ch_blks, @@ -240,11 +240,11 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS in_hb_idx += 1; } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); #endif const short out_x_base = mul24(out_ch_blk, out_width); diff --git a/mace/ops/opencl/cl/depthwise_conv2d_buffer.cl b/mace/ops/opencl/cl/depthwise_conv2d_buffer.cl index 6c42c11f..b6661e6b 100644 --- a/mace/ops/opencl/cl/depthwise_conv2d_buffer.cl +++ b/mace/ops/opencl/cl/depthwise_conv2d_buffer.cl @@ -22,7 +22,7 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS __private const int dilated_h_offset, __private const int dilated_w_offset, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __global OUT_DATA_TYPE *output) { const int out_wc_blk_idx = get_global_id(0); const int out_hb_idx = get_global_id(1); @@ -86,11 +86,11 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS } } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); #endif int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx), diff --git a/mace/ops/opencl/cl/depthwise_deconv2d.cl b/mace/ops/opencl/cl/depthwise_deconv2d.cl index b86bc96d..72bdbf9c 100644 --- a/mace/ops/opencl/cl/depthwise_deconv2d.cl +++ b/mace/ops/opencl/cl/depthwise_deconv2d.cl @@ -9,7 +9,7 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS #endif __write_only image2d_t output, __private const float relux_max_limit, - __private const float leakyrelu_coefficient, + __private const float activation_coefficient, __private const int in_height, __private const int in_width, __private const int out_height, @@ -109,12 +109,12 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS } } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient); - out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient); - out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient); - out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient); - out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0 = do_activation(out0, relux_max_limit, activation_coefficient); + out1 = do_activation(out1, relux_max_limit, activation_coefficient); + out2 = do_activation(out2, relux_max_limit, activation_coefficient); + out3 = do_activation(out3, relux_max_limit, activation_coefficient); + out4 = do_activation(out4, relux_max_limit, activation_coefficient); #endif diff --git a/mace/ops/opencl/cl/fully_connected.cl b/mace/ops/opencl/cl/fully_connected.cl index f7f4bc48..b9b47173 100644 --- a/mace/ops/opencl/cl/fully_connected.cl +++ b/mace/ops/opencl/cl/fully_connected.cl @@ -13,7 +13,7 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS __private const int input_width, __private const int input_channel, __private const float relux_max_limit, - __private const float leakyrelu_coefficient) { + __private const float activation_coefficient) { const int batch_idx = get_global_id(0); const int out_blk_idx = get_global_id(1); const int input_chan_blk = (input_channel + 3) >> 2; @@ -57,8 +57,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS input_coord.y++; } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - result = do_activation(result, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + result = do_activation(result, relux_max_limit, activation_coefficient); #endif WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); @@ -79,7 +79,7 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS __private const int in_chan_blks, __private const int out_blks, __private const float relux_max_limit, - __private const float leakyrelu_coefficient) { + __private const float activation_coefficient) { const int inter_out_idx = get_global_id(0); const int width_blk_idx = get_global_id(1); const int width_blk_count = global_size_dim1; @@ -149,8 +149,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS inter_idx += 4; } -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - result = do_activation(result, relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + result = do_activation(result, relux_max_limit, activation_coefficient); #endif WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); diff --git a/mace/ops/opencl/cl/winograd_transform.cl b/mace/ops/opencl/cl/winograd_transform.cl index d30427b5..169d6148 100644 --- a/mace/ops/opencl/cl/winograd_transform.cl +++ b/mace/ops/opencl/cl/winograd_transform.cl @@ -128,7 +128,7 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS __private const int round_hw, __private const int round_w, __private const float relux_max_limit, - __private const float leakyrelu_coefficient) { + __private const float activation_coefficient) { const int width_idx = get_global_id(0); const int height_idx = get_global_id(1); @@ -204,11 +204,11 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS #endif -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - in0[0] = do_activation(in0[0], relux_max_limit, leakyrelu_coefficient); - in0[1] = do_activation(in0[1], relux_max_limit, leakyrelu_coefficient); - in1[0] = do_activation(in1[0], relux_max_limit, leakyrelu_coefficient); - in1[1] = do_activation(in1[1], relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + in0[0] = do_activation(in0[0], relux_max_limit, activation_coefficient); + in0[1] = do_activation(in0[1], relux_max_limit, activation_coefficient); + in1[0] = do_activation(in1[0], relux_max_limit, activation_coefficient); + in1[1] = do_activation(in1[1], relux_max_limit, activation_coefficient); #endif WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]); @@ -397,7 +397,7 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS __private const int round_hw, __private const int round_w, __private const float relux_max_limit, - __private const float leakyrelu_coefficient) { + __private const float activation_coefficient) { const int width_idx = get_global_id(0); const int height_idx = get_global_id(1); @@ -517,23 +517,23 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS out3[3] += bias_value; #endif -#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) - out0[0] = do_activation(out0[0], relux_max_limit, leakyrelu_coefficient); - out0[1] = do_activation(out0[1], relux_max_limit, leakyrelu_coefficient); - out0[2] = do_activation(out0[2], relux_max_limit, leakyrelu_coefficient); - out0[3] = do_activation(out0[3], relux_max_limit, leakyrelu_coefficient); - out1[0] = do_activation(out1[0], relux_max_limit, leakyrelu_coefficient); - out1[1] = do_activation(out1[1], relux_max_limit, leakyrelu_coefficient); - out1[2] = do_activation(out1[2], relux_max_limit, leakyrelu_coefficient); - out1[3] = do_activation(out1[3], relux_max_limit, leakyrelu_coefficient); - out2[0] = do_activation(out2[0], relux_max_limit, leakyrelu_coefficient); - out2[1] = do_activation(out2[1], relux_max_limit, leakyrelu_coefficient); - out2[2] = do_activation(out2[2], relux_max_limit, leakyrelu_coefficient); - out2[3] = do_activation(out2[3], relux_max_limit, leakyrelu_coefficient); - out3[0] = do_activation(out3[0], relux_max_limit, leakyrelu_coefficient); - out3[1] = do_activation(out3[1], relux_max_limit, leakyrelu_coefficient); - out3[2] = do_activation(out3[2], relux_max_limit, leakyrelu_coefficient); - out3[3] = do_activation(out3[3], relux_max_limit, leakyrelu_coefficient); +#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) || defined(USE_ELU) + out0[0] = do_activation(out0[0], relux_max_limit, activation_coefficient); + out0[1] = do_activation(out0[1], relux_max_limit, activation_coefficient); + out0[2] = do_activation(out0[2], relux_max_limit, activation_coefficient); + out0[3] = do_activation(out0[3], relux_max_limit, activation_coefficient); + out1[0] = do_activation(out1[0], relux_max_limit, activation_coefficient); + out1[1] = do_activation(out1[1], relux_max_limit, activation_coefficient); + out1[2] = do_activation(out1[2], relux_max_limit, activation_coefficient); + out1[3] = do_activation(out1[3], relux_max_limit, activation_coefficient); + out2[0] = do_activation(out2[0], relux_max_limit, activation_coefficient); + out2[1] = do_activation(out2[1], relux_max_limit, activation_coefficient); + out2[2] = do_activation(out2[2], relux_max_limit, activation_coefficient); + out2[3] = do_activation(out2[3], relux_max_limit, activation_coefficient); + out3[0] = do_activation(out3[0], relux_max_limit, activation_coefficient); + out3[1] = do_activation(out3[1], relux_max_limit, activation_coefficient); + out3[2] = do_activation(out3[2], relux_max_limit, activation_coefficient); + out3[3] = do_activation(out3[3], relux_max_limit, activation_coefficient); #endif const int num = min(4, out_width - out_width_idx); diff --git a/mace/ops/opencl/conv_2d.h b/mace/ops/opencl/conv_2d.h index d6dd40bd..d59273e7 100644 --- a/mace/ops/opencl/conv_2d.h +++ b/mace/ops/opencl/conv_2d.h @@ -46,7 +46,7 @@ class OpenCLConv2dKernel { const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int winograd_blk_size, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConv2dKernel); diff --git a/mace/ops/opencl/deconv_2d.h b/mace/ops/opencl/deconv_2d.h index 3335bebf..fc486c59 100644 --- a/mace/ops/opencl/deconv_2d.h +++ b/mace/ops/opencl/deconv_2d.h @@ -39,7 +39,7 @@ class OpenCLDeconv2dKernel { const int *padding_data, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const std::vector &output_shape, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDeconv2dKernel); diff --git a/mace/ops/opencl/depthwise_conv2d.h b/mace/ops/opencl/depthwise_conv2d.h index 98f97a20..ddf3b035 100644 --- a/mace/ops/opencl/depthwise_conv2d.h +++ b/mace/ops/opencl/depthwise_conv2d.h @@ -38,7 +38,7 @@ class OpenCLDepthwiseConv2dKernel { const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseConv2dKernel); }; diff --git a/mace/ops/opencl/depthwise_deconv2d.h b/mace/ops/opencl/depthwise_deconv2d.h index 46201072..f0426a7b 100644 --- a/mace/ops/opencl/depthwise_deconv2d.h +++ b/mace/ops/opencl/depthwise_deconv2d.h @@ -42,7 +42,7 @@ class OpenCLDepthwiseDeconv2dKernel { const int group, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const std::vector &output_shape, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseDeconv2dKernel); diff --git a/mace/ops/opencl/fully_connected.h b/mace/ops/opencl/fully_connected.h index 88c1cbab..7a5ca790 100644 --- a/mace/ops/opencl/fully_connected.h +++ b/mace/ops/opencl/fully_connected.h @@ -34,7 +34,7 @@ class OpenCLFullyConnectedKernel { const Tensor *bias, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) = 0; MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLFullyConnectedKernel); }; diff --git a/mace/ops/opencl/image/activation.cc b/mace/ops/opencl/image/activation.cc index f013c990..300a1da6 100644 --- a/mace/ops/opencl/image/activation.cc +++ b/mace/ops/opencl/image/activation.cc @@ -99,12 +99,12 @@ MaceStatus ActivationKernel::Compute( MACE_OUT_OF_RANGE_SET_ARGS(kernel_); MACE_SET_3D_GWS_ARGS(kernel_, gws); kernel_.setArg(idx++, *(input->opencl_image())); - if (activation_ == PRELU || activation_ == ELU) { + if (activation_ == PRELU) { MACE_CHECK_NOTNULL(alpha); kernel_.setArg(idx++, *(alpha->opencl_image())); } kernel_.setArg(idx++, relux_max_limit_); - kernel_.setArg(idx++, leakyrelu_coefficient_); + kernel_.setArg(idx++, activation_coefficient_); kernel_.setArg(idx++, *(output->opencl_image())); input_shape_ = input->shape(); diff --git a/mace/ops/opencl/image/activation.h b/mace/ops/opencl/image/activation.h index bfbdc47c..2107964d 100644 --- a/mace/ops/opencl/image/activation.h +++ b/mace/ops/opencl/image/activation.h @@ -35,9 +35,9 @@ class ActivationKernel : public OpenCLActivationKernel { public: ActivationKernel(ActivationType type, float relux_max_limit, - float leakyrelu_coefficient) + float activation_coefficient) : activation_(type), relux_max_limit_(relux_max_limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} + activation_coefficient_(activation_coefficient) {} MaceStatus Compute( OpContext *context, @@ -48,7 +48,8 @@ class ActivationKernel : public OpenCLActivationKernel { private: ActivationType activation_; float relux_max_limit_; - float leakyrelu_coefficient_; + float activation_coefficient_; + cl::Kernel kernel_; uint32_t kwg_size_; std::vector input_shape_; diff --git a/mace/ops/opencl/image/batch_norm.cc b/mace/ops/opencl/image/batch_norm.cc index bfb496e7..82523326 100644 --- a/mace/ops/opencl/image/batch_norm.cc +++ b/mace/ops/opencl/image/batch_norm.cc @@ -22,11 +22,11 @@ namespace image { BatchNormKernel::BatchNormKernel(const float epsilon, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient) + const float activation_coefficient) : epsilon_(epsilon), activation_(activation), relux_max_limit_(relux_max_limit), - leakyrelu_coefficient_(leakyrelu_coefficient) {} + activation_coefficient_(activation_coefficient) {} MaceStatus BatchNormKernel::Compute( OpContext *context, @@ -75,6 +75,8 @@ MaceStatus BatchNormKernel::Compute( break; case LEAKYRELU:built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: built_options.emplace("-DUSE_ELU"); + break; default:LOG(FATAL) << "Unknown activation type: " << activation_; } @@ -99,7 +101,7 @@ MaceStatus BatchNormKernel::Compute( } kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, relux_max_limit_); - kernel_.setArg(idx++, leakyrelu_coefficient_); + kernel_.setArg(idx++, activation_coefficient_); input_shape_ = input->shape(); } diff --git a/mace/ops/opencl/image/batch_norm.h b/mace/ops/opencl/image/batch_norm.h index 9a93b534..b6508f00 100644 --- a/mace/ops/opencl/image/batch_norm.h +++ b/mace/ops/opencl/image/batch_norm.h @@ -37,7 +37,7 @@ class BatchNormKernel : public OpenCLBatchNormKernel { const float epsilon, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient); + const float activation_coefficient); MaceStatus Compute(OpContext *context, const Tensor *input, const Tensor *scale, @@ -50,7 +50,7 @@ class BatchNormKernel : public OpenCLBatchNormKernel { const float epsilon_; const ActivationType activation_; const float relux_max_limit_; - const float leakyrelu_coefficient_; + const float activation_coefficient_; cl::Kernel kernel_; uint32_t kwg_size_; std::vector input_shape_; diff --git a/mace/ops/opencl/image/conv_2d.cc b/mace/ops/opencl/image/conv_2d.cc index 60a9b155..4f57c1aa 100644 --- a/mace/ops/opencl/image/conv_2d.cc +++ b/mace/ops/opencl/image/conv_2d.cc @@ -68,7 +68,7 @@ MaceStatus Conv2dKernel::Compute( const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int wino_blk_size, Tensor *output) { index_t kernel_h = filter->dim(2); @@ -116,7 +116,7 @@ MaceStatus Conv2dKernel::Compute( paddings.data(), activation, relux_max_limit, - leakyrelu_coefficient, + activation_coefficient, wino_blk_size, &input_shape_, output, @@ -135,7 +135,7 @@ MaceStatus Conv2dKernel::Compute( dilations, activation, relux_max_limit, - leakyrelu_coefficient, + activation_coefficient, &input_shape_, output, &kwg_size_[0]); @@ -153,7 +153,7 @@ MaceStatus Conv2dKernel::Compute( dilations, activation, relux_max_limit, - leakyrelu_coefficient, + activation_coefficient, &input_shape_, output, &kwg_size_[0]); @@ -171,7 +171,7 @@ MaceStatus Conv2dKernel::Compute( dilations, activation, relux_max_limit, - leakyrelu_coefficient, + activation_coefficient, &input_shape_, output, &kwg_size_[0]); diff --git a/mace/ops/opencl/image/conv_2d.h b/mace/ops/opencl/image/conv_2d.h index 1ecd9131..2d54ddc9 100644 --- a/mace/ops/opencl/image/conv_2d.h +++ b/mace/ops/opencl/image/conv_2d.h @@ -39,7 +39,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size); @@ -55,7 +55,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size); @@ -71,7 +71,7 @@ extern MaceStatus Conv2d(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size); @@ -84,7 +84,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, const int *padding, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int wino_blk_size, std::vector *prev_input_shape, Tensor *output, @@ -111,7 +111,7 @@ class Conv2dKernel : public OpenCLConv2dKernel { const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int wino_blk_size, Tensor *output) override; diff --git a/mace/ops/opencl/image/conv_2d_1x1.cc b/mace/ops/opencl/image/conv_2d_1x1.cc index 2d4baa5b..8827d99e 100644 --- a/mace/ops/opencl/image/conv_2d_1x1.cc +++ b/mace/ops/opencl/image/conv_2d_1x1.cc @@ -77,7 +77,7 @@ MaceStatus Conv2dK1x1(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size) { @@ -135,6 +135,10 @@ MaceStatus Conv2dK1x1(OpContext *context, built_options.emplace("-DUSE_LEAKYRELU"); break; } + case ELU: { + built_options.emplace("-DUSE_ELU"); + break; + } default: { LOG(FATAL) << "Unknown activation type: " << activation; } @@ -165,7 +169,7 @@ MaceStatus Conv2dK1x1(OpContext *context, kernel->setArg(idx++, *(output->opencl_image())); // FIXME handle flexable data type: half not supported kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, static_cast(input_height)); kernel->setArg(idx++, static_cast(input_width)); kernel->setArg(idx++, static_cast(input_channel_blocks)); diff --git a/mace/ops/opencl/image/conv_2d_3x3.cc b/mace/ops/opencl/image/conv_2d_3x3.cc index c5ea2890..8f3b2939 100644 --- a/mace/ops/opencl/image/conv_2d_3x3.cc +++ b/mace/ops/opencl/image/conv_2d_3x3.cc @@ -70,7 +70,7 @@ MaceStatus Conv2dK3x3(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size) { @@ -120,6 +120,10 @@ MaceStatus Conv2dK3x3(OpContext *context, built_options.emplace("-DUSE_LEAKYRELU"); break; } + case ELU: { + built_options.emplace("-DUSE_ELU"); + break; + } default: { LOG(FATAL) << "Unknown activation type: " << activation; } @@ -149,7 +153,7 @@ MaceStatus Conv2dK3x3(OpContext *context, } kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); kernel->setArg(idx++, static_cast(input_channel_blocks)); diff --git a/mace/ops/opencl/image/conv_2d_general.cc b/mace/ops/opencl/image/conv_2d_general.cc index b84d8394..22be8709 100644 --- a/mace/ops/opencl/image/conv_2d_general.cc +++ b/mace/ops/opencl/image/conv_2d_general.cc @@ -78,7 +78,7 @@ MaceStatus Conv2d(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size) { @@ -128,6 +128,9 @@ MaceStatus Conv2d(OpContext *context, built_options.emplace("-DUSE_LEAKYRELU"); break; } + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: { LOG(FATAL) << "Unknown activation type: " << activation; } @@ -157,7 +160,7 @@ MaceStatus Conv2d(OpContext *context, } kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, static_cast(input->dim(1))); kernel->setArg(idx++, static_cast(input->dim(2))); kernel->setArg(idx++, static_cast(input_channel_blocks)); diff --git a/mace/ops/opencl/image/deconv_2d.cc b/mace/ops/opencl/image/deconv_2d.cc index 0509fcf0..8284b1dd 100644 --- a/mace/ops/opencl/image/deconv_2d.cc +++ b/mace/ops/opencl/image/deconv_2d.cc @@ -29,7 +29,7 @@ MaceStatus Deconv2dKernel::Compute( const int *padding_data, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const std::vector &output_shape, Tensor *output) { std::vector output_image_shape; @@ -90,6 +90,9 @@ MaceStatus Deconv2dKernel::Compute( case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -117,7 +120,7 @@ MaceStatus Deconv2dKernel::Compute( } kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, relux_max_limit); - kernel_.setArg(idx++, leakyrelu_coefficient); + kernel_.setArg(idx++, activation_coefficient); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(input->dim(3))); diff --git a/mace/ops/opencl/image/deconv_2d.h b/mace/ops/opencl/image/deconv_2d.h index 2ab38504..928630b7 100644 --- a/mace/ops/opencl/image/deconv_2d.h +++ b/mace/ops/opencl/image/deconv_2d.h @@ -41,7 +41,7 @@ class Deconv2dKernel : public OpenCLDeconv2dKernel { const int *padding_data, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const std::vector &output_shape, Tensor *output) override; diff --git a/mace/ops/opencl/image/depthwise_conv2d.cc b/mace/ops/opencl/image/depthwise_conv2d.cc index 0101ea13..2a1776ce 100644 --- a/mace/ops/opencl/image/depthwise_conv2d.cc +++ b/mace/ops/opencl/image/depthwise_conv2d.cc @@ -73,7 +73,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size) { @@ -129,6 +129,9 @@ MaceStatus DepthwiseConv2d(OpContext *context, case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -162,7 +165,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, } kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); kernel->setArg(idx++, static_cast(input_height)); kernel->setArg(idx++, static_cast(input_width)); kernel->setArg(idx++, static_cast(input_channel_blocks)); @@ -204,7 +207,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) { index_t kernel_h = filter->dim(2); index_t kernel_w = filter->dim(3); @@ -243,7 +246,7 @@ MaceStatus DepthwiseConv2dKernel::Compute( return depthwise::DepthwiseConv2d( context, &kernel_, input, filter, bias, strides[0], paddings.data(), - dilations, activation, relux_max_limit, leakyrelu_coefficient, + dilations, activation, relux_max_limit, activation_coefficient, &input_shape_, output, &kwg_size_); } diff --git a/mace/ops/opencl/image/depthwise_conv2d.h b/mace/ops/opencl/image/depthwise_conv2d.h index fc8833dd..52b036de 100644 --- a/mace/ops/opencl/image/depthwise_conv2d.h +++ b/mace/ops/opencl/image/depthwise_conv2d.h @@ -39,7 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, std::vector *prev_input_shape, Tensor *output, uint32_t *kwg_size); @@ -58,7 +58,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel { const int *dilations, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) override; private: diff --git a/mace/ops/opencl/image/depthwise_deconv2d.cc b/mace/ops/opencl/image/depthwise_deconv2d.cc index 6a8d7eb9..59331290 100644 --- a/mace/ops/opencl/image/depthwise_deconv2d.cc +++ b/mace/ops/opencl/image/depthwise_deconv2d.cc @@ -30,7 +30,7 @@ MaceStatus DepthwiseDeconv2dKernel::Compute( const int group, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const std::vector &output_shape, Tensor *output) { const index_t batch = output_shape[0]; @@ -95,6 +95,9 @@ MaceStatus DepthwiseDeconv2dKernel::Compute( case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -122,7 +125,7 @@ MaceStatus DepthwiseDeconv2dKernel::Compute( } kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, relux_max_limit); - kernel_.setArg(idx++, leakyrelu_coefficient); + kernel_.setArg(idx++, activation_coefficient); kernel_.setArg(idx++, static_cast(input->dim(1))); kernel_.setArg(idx++, static_cast(input->dim(2))); kernel_.setArg(idx++, static_cast(height)); diff --git a/mace/ops/opencl/image/depthwise_deconv2d.h b/mace/ops/opencl/image/depthwise_deconv2d.h index 4643a9c1..e58e154b 100644 --- a/mace/ops/opencl/image/depthwise_deconv2d.h +++ b/mace/ops/opencl/image/depthwise_deconv2d.h @@ -42,7 +42,7 @@ class DepthwiseDeconv2dKernel : public OpenCLDepthwiseDeconv2dKernel { const int group, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const std::vector &output_shape, Tensor *output) override; diff --git a/mace/ops/opencl/image/fully_connected.cc b/mace/ops/opencl/image/fully_connected.cc index 9ec83e91..4fc7d719 100644 --- a/mace/ops/opencl/image/fully_connected.cc +++ b/mace/ops/opencl/image/fully_connected.cc @@ -27,7 +27,7 @@ MaceStatus FullyConnectedKernel::Compute( const Tensor *bias, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) { std::vector output_shape = {input->dim(0), 1, 1, weight->dim(0)}; std::vector output_image_shape; @@ -71,6 +71,9 @@ MaceStatus FullyConnectedKernel::Compute( case LEAKYRELU: built_options.emplace("-DUSE_LEAKYRELU"); break; + case ELU: + built_options.emplace("-DUSE_ELU"); + break; default: LOG(FATAL) << "Unknown activation type: " << activation; } @@ -121,7 +124,7 @@ MaceStatus FullyConnectedKernel::Compute( kernel_.setArg(idx++, static_cast(RoundUpDiv4(input->dim(3)))); kernel_.setArg(idx++, static_cast(output_blocks)); kernel_.setArg(idx++, relux_max_limit); - kernel_.setArg(idx++, leakyrelu_coefficient); + kernel_.setArg(idx++, activation_coefficient); input_shape_ = input->shape(); } diff --git a/mace/ops/opencl/image/fully_connected.h b/mace/ops/opencl/image/fully_connected.h index 46a93a61..cd063cb5 100644 --- a/mace/ops/opencl/image/fully_connected.h +++ b/mace/ops/opencl/image/fully_connected.h @@ -40,7 +40,7 @@ class FullyConnectedKernel : public OpenCLFullyConnectedKernel { const Tensor *bias, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, Tensor *output) override; private: diff --git a/mace/ops/opencl/image/winograd_conv2d.cc b/mace/ops/opencl/image/winograd_conv2d.cc index e5c2be97..0ae87013 100644 --- a/mace/ops/opencl/image/winograd_conv2d.cc +++ b/mace/ops/opencl/image/winograd_conv2d.cc @@ -113,7 +113,7 @@ MaceStatus WinogradOutputTransform(OpContext *context, const int wino_blk_size, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const bool input_changed, Tensor *output_tensor, uint32_t *kwg_size, @@ -213,7 +213,7 @@ MaceStatus WinogradOutputTransform(OpContext *context, kernel->setArg(idx++, static_cast(round_h * round_w)); kernel->setArg(idx++, static_cast(round_w)); kernel->setArg(idx++, relux_max_limit); - kernel->setArg(idx++, leakyrelu_coefficient); + kernel->setArg(idx++, activation_coefficient); } const std::vector lws = {*kwg_size / 8, 8, 0}; std::string tuning_key = @@ -237,7 +237,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, const int *paddings, const ActivationType activation, const float relux_max_limit, - const float leakyrelu_coefficient, + const float activation_coefficient, const int wino_blk_size, std::vector *prev_input_shape, Tensor *output, @@ -355,7 +355,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, MACE_RETURN_IF_ERROR(WinogradOutputTransform( context, kernels[2], mm_output.get(), bias, round_h, round_w, wino_blk_size, activation, relux_max_limit, - leakyrelu_coefficient, input_changed, output, kwg_size[2], + activation_coefficient, input_changed, output, kwg_size[2], &t_output_future)) MergeMultipleFutureWaitFn({t_input_future, mm_future, t_output_future}, diff --git a/mace/ops/ref/activation.cc b/mace/ops/ref/activation.cc index bb798531..c9402709 100644 --- a/mace/ops/ref/activation.cc +++ b/mace/ops/ref/activation.cc @@ -81,7 +81,7 @@ void Activation::DoActivation(const OpContext *context, for (index_t i = 0; i < size; ++i) { *output_ptr = std::max(*input_ptr, 0.f) - + std::min(*input_ptr, 0.f) * leakyrelu_coefficient_; + + std::min(*input_ptr, 0.f) * activation_coefficient_; ++input_ptr; ++output_ptr; } @@ -104,6 +104,19 @@ void Activation::DoActivation(const OpContext *context, break; } + case ELU: { + for (index_t i = 0; i < input->size(); ++i) { + const auto in_val = *input_ptr++; + if (in_val < 0) { + *output_ptr = (std::exp(in_val) - 1) * activation_coefficient_; + } else { + *output_ptr = in_val; + } + output_ptr++; + } + break; + } + case NOOP:break; default:MACE_NOT_IMPLEMENTED; diff --git a/mace/ops/reverse.cc b/mace/ops/reverse.cc index d49f14a1..6f6e0247 100644 --- a/mace/ops/reverse.cc +++ b/mace/ops/reverse.cc @@ -32,6 +32,8 @@ class ReverseOp : public Operation { const Tensor *input = this->Input(INPUT); const Tensor *axis = this->Input(AXIS); Tensor *output = this->Output(OUTPUT); + Tensor::MappingGuard input_guard(input); + Tensor::MappingGuard axis_guard(axis); MACE_CHECK(axis->dim_size() == 1, "Only support reverse in one axis now"); diff --git a/micro/ops/utils/activation.cc b/micro/ops/utils/activation.cc index 29f34d2f..4df68328 100644 --- a/micro/ops/utils/activation.cc +++ b/micro/ops/utils/activation.cc @@ -31,17 +31,17 @@ MaceStatus Activation::Init(const framework::Operator *op) { atcivation_type = "NOOP"; } const float max_limit = op->GetArgByName("max_limit", 0.0f); - const float leakyrelu_coefficient = - op->GetArgByName("leakyrelu_coefficient", 0.0f); + const float activation_coefficient = + op->GetArgByName("activation_coefficient", 0.0f); - return Init(atcivation_type, max_limit, leakyrelu_coefficient); + return Init(atcivation_type, max_limit, activation_coefficient); } MaceStatus Activation::Init(const char *type, const float limit, - const float leakyrelu_coefficient) { + const float activation_coefficient) { type_ = StringToActivationType(type); limit_ = limit; - leakyrelu_coefficient_ = leakyrelu_coefficient; + activation_coefficient_ = activation_coefficient; return MACE_SUCCESS; } @@ -71,7 +71,7 @@ MaceStatus Activation::Compute(const mifloat *input_ptr, for (int32_t i = 0; i < size; ++i) { float input = *input_ptr; *output_ptr = base::max(input, 0.f) + - base::min(input, 0.f) * leakyrelu_coefficient_; // NOLINT + base::min(input, 0.f) * activation_coefficient_; // NOLINT ++input_ptr; ++output_ptr; } diff --git a/micro/ops/utils/activation.h b/micro/ops/utils/activation.h index f2786157..2a4d5c67 100644 --- a/micro/ops/utils/activation.h +++ b/micro/ops/utils/activation.h @@ -44,7 +44,7 @@ class Activation { MaceStatus Init(const framework::Operator *op); MaceStatus Init(const char *type, const float limit, - const float leakyrelu_coefficient); + const float activation_coefficient); MaceStatus Compute(const mifloat *input_ptr, const int32_t size, mifloat *output_ptr); ActivationType GetActivationType(); @@ -55,7 +55,7 @@ class Activation { private: ActivationType type_; float limit_; - float leakyrelu_coefficient_; + float activation_coefficient_; }; } // namespace ops diff --git a/micro/test/ccunit/micro/ops/activation_test.cc b/micro/test/ccunit/micro/ops/activation_test.cc index 693f13cd..b38f5053 100644 --- a/micro/test/ccunit/micro/ops/activation_test.cc +++ b/micro/test/ccunit/micro/ops/activation_test.cc @@ -67,7 +67,7 @@ void TestSimpleLeakyRelu() { framework::SubstituteOp substitude_op; substitude_op.AddInput(input, input_dims, 4) .AddRepeatArg("activation", activation_type, arg_type_len) - .AddArg("leakyrelu_coefficient", 0.1f) + .AddArg("activation_coefficient", 0.1f) .AddOutput(output, output_dims, 4); activation_op.Init(NULL, reinterpret_cast( diff --git a/test/ccbenchmark/mace/ops/activation_benchmark.cc b/test/ccbenchmark/mace/ops/activation_benchmark.cc index a991e6d0..18fe6ffe 100644 --- a/test/ccbenchmark/mace/ops/activation_benchmark.cc +++ b/test/ccbenchmark/mace/ops/activation_benchmark.cc @@ -250,14 +250,14 @@ void EluBenchmark(int iters, int batch, int channels, int height, int width) { } else { MACE_NOT_IMPLEMENTED; } - net.AddRandomInput("Alpha", {channels}, true); OpDefBuilder("Activation", "EluBM") .Input("Input") - .Input("Alpha") .Output("Output") .AddStringArg("activation", "ELU") + .AddFloatArg("activation_coefficient", 1.0) .AddIntArg("T", static_cast(DataTypeToEnum::value)) + .Finalize(net.NewOperatorDef()); // Warm-up diff --git a/test/ccunit/mace/ops/activation_test.cc b/test/ccunit/mace/ops/activation_test.cc index f9b8bdb0..3988faf6 100644 --- a/test/ccunit/mace/ops/activation_test.cc +++ b/test/ccunit/mace/ops/activation_test.cc @@ -66,7 +66,7 @@ void TestSimpleLeakyRelu() { .Input("Input") .Output("Output") .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .Finalize(net.NewOperatorDef()); // Run @@ -243,15 +243,14 @@ void TestSimpleElu() { // Add input data net.AddInputFromArray( "Input", {2, 2, 2, 2}, - {-7, 7, -6, 6, -5, -5, -4, -4, -3, 3, -2, 2, -1, -1, 0, 0}); - net.AddInputFromArray("Alpha", {2}, {2.0, 3.0}, true); + {-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0}); if (D == DeviceType::GPU) { OpDefBuilder("Activation", "EluTest") .Input("Input") - .Input("Alpha") .Output("Output") .AddStringArg("activation", "ELU") + .AddFloatArg("activation_coefficient", 2.0) .Finalize(net.NewOperatorDef()); // Run @@ -261,9 +260,9 @@ void TestSimpleElu() { "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); OpDefBuilder("Activation", "EluTest") .Input("InputNCHW") - .Input("Alpha") .Output("OutputNCHW") .AddStringArg("activation", "ELU") + .AddFloatArg("activation_coefficient", 2.0) .Finalize(net.NewOperatorDef()); // Run @@ -275,9 +274,9 @@ void TestSimpleElu() { auto expected = net.CreateTensor( {2, 2, 2, 2}, {-1.998176236068891, 7, -1.9950424956466672, 6, -1.986524106001829, - -2.9797861590027437, -1.9633687222225316, -2.9450530833337973, + 5, -1.9633687222225316, 4, -1.900425863264272, 3, -1.7293294335267746, 2, -1.2642411176571153, - -1.896361676485673, 0, 0}); + 1, 0, 0}); ExpectTensorNear(*expected, *net.GetOutput("Output"), 1e-5); } } // namespace @@ -439,7 +438,7 @@ void TestBFloat16(const char *activation) { .Input("Alpha") .Output("Output") .AddStringArg("activation", activation) - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .AddFloatArg("max_limit", 6) .AddIntArg("T", static_cast(DT_FLOAT)) .Finalize(net.NewOperatorDef()); @@ -450,7 +449,7 @@ void TestBFloat16(const char *activation) { .Input("BF16Alpha") .Output("BF16Output") .AddStringArg("activation", activation) - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .AddFloatArg("max_limit", 6) .AddIntArg("T", static_cast(DT_BFLOAT16)) .Finalize(net.NewOperatorDef()); diff --git a/test/ccunit/mace/ops/batch_norm_test.cc b/test/ccunit/mace/ops/batch_norm_test.cc index 0a07fc64..25edcaf5 100644 --- a/test/ccunit/mace/ops/batch_norm_test.cc +++ b/test/ccunit/mace/ops/batch_norm_test.cc @@ -108,7 +108,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .AddFloatArg("epsilon", 1e-3) .Output("OutputNCHW") .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .Finalize(net.NewOperatorDef()); // run cpu @@ -131,7 +131,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { .AddFloatArg("epsilon", 1e-3) .Output("Output") .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .Finalize(net.NewOperatorDef()); net.Setup(DeviceType::GPU); diff --git a/test/ccunit/mace/ops/conv_2d_test.cc b/test/ccunit/mace/ops/conv_2d_test.cc index 8205c590..6d04d05e 100644 --- a/test/ccunit/mace/ops/conv_2d_test.cc +++ b/test/ccunit/mace/ops/conv_2d_test.cc @@ -684,7 +684,7 @@ void TestComplexConvNxN(const std::vector &shape, .AddIntArg("padding", type) .AddIntsArg("dilations", {1, 1}) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); @@ -709,7 +709,7 @@ void TestComplexConvNxN(const std::vector &shape, .AddIntArg("padding", type) .AddIntsArg("dilations", {1, 1}) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .AddIntArg("wino_block_size", wino_blk_size) .Finalize(net.NewOperatorDef()); diff --git a/test/ccunit/mace/ops/deconv_2d_test.cc b/test/ccunit/mace/ops/deconv_2d_test.cc index 9ea8161e..83c92efa 100644 --- a/test/ccunit/mace/ops/deconv_2d_test.cc +++ b/test/ccunit/mace/ops/deconv_2d_test.cc @@ -421,7 +421,7 @@ void TestComplexDeconvNxN(const int batch, .AddIntsArg("padding_values", paddings) .AddIntArg("framework_type", model_type) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); } else { @@ -459,7 +459,7 @@ void TestComplexDeconvNxN(const int batch, .AddIntsArg("padding_values", paddings) .AddIntArg("framework_type", model_type) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); } else { diff --git a/test/ccunit/mace/ops/depthwise_conv2d_test.cc b/test/ccunit/mace/ops/depthwise_conv2d_test.cc index b0db1b3d..f3585fcd 100644 --- a/test/ccunit/mace/ops/depthwise_conv2d_test.cc +++ b/test/ccunit/mace/ops/depthwise_conv2d_test.cc @@ -261,7 +261,7 @@ void TestNxNS12(const index_t height, const index_t width) { .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .Finalize(net.NewOperatorDef()); // Run on cpu @@ -284,7 +284,7 @@ void TestNxNS12(const index_t height, const index_t width) { .AddIntsArg("dilations", {1, 1}) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1) + .AddFloatArg("activation_coefficient", 0.1) .Finalize(net.NewOperatorDef()); net.RunOp(DeviceType::GPU); diff --git a/test/ccunit/mace/ops/depthwise_deconv2d_test.cc b/test/ccunit/mace/ops/depthwise_deconv2d_test.cc index fda0cf59..0189939b 100644 --- a/test/ccunit/mace/ops/depthwise_deconv2d_test.cc +++ b/test/ccunit/mace/ops/depthwise_deconv2d_test.cc @@ -206,7 +206,7 @@ void RandomTest(index_t batch, .AddIntArg("group", channel) .AddIntsArg("dilations", {1, 1}) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1f) + .AddFloatArg("activation_coefficient", 0.1f) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); // Run @@ -229,7 +229,7 @@ void RandomTest(index_t batch, .AddIntsArg("padding_values", {padding, padding}) .AddIntArg("group", channel) .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1f) + .AddFloatArg("activation_coefficient", 0.1f) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); diff --git a/test/ccunit/mace/ops/fully_connected_test.cc b/test/ccunit/mace/ops/fully_connected_test.cc index 25dcbebb..7de4ab9f 100644 --- a/test/ccunit/mace/ops/fully_connected_test.cc +++ b/test/ccunit/mace/ops/fully_connected_test.cc @@ -138,7 +138,7 @@ void Random(const index_t batch, .Input("Bias") .Output("OutputNCHW") .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1f) + .AddFloatArg("activation_coefficient", 0.1f) .Finalize(net.NewOperatorDef()); // run cpu @@ -158,7 +158,7 @@ void Random(const index_t batch, .Input("Bias") .Output("Output") .AddStringArg("activation", "LEAKYRELU") - .AddFloatArg("leakyrelu_coefficient", 0.1f) + .AddFloatArg("activation_coefficient", 0.1f) .AddIntArg("T", static_cast(DataTypeToEnum::value)) .Finalize(net.NewOperatorDef()); diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py index 3b6279ff..2862b785 100644 --- a/tools/python/transform/base_converter.py +++ b/tools/python/transform/base_converter.py @@ -223,7 +223,7 @@ class MaceKeyword(object): mace_element_type_str = 'type' mace_activation_type_str = 'activation' mace_activation_max_limit_str = 'max_limit' - mace_activation_leakyrelu_coefficient_str = 'leakyrelu_coefficient' + mace_activation_coefficient_str = 'activation_coefficient' mace_resize_size_str = 'size' mace_batch_to_space_crops_str = 'crops' mace_paddings_str = 'paddings' diff --git a/tools/python/transform/caffe_converter.py b/tools/python/transform/caffe_converter.py index 09fd9c4b..0eddb5f9 100644 --- a/tools/python/transform/caffe_converter.py +++ b/tools/python/transform/caffe_converter.py @@ -166,6 +166,7 @@ class CaffeConverter(base_converter.ConverterInterface): 'TanH': ActivationType.TANH, 'Sigmoid': ActivationType.SIGMOID, 'Clip': ActivationType.RELUX, + 'ELU': ActivationType.ELU, } def __init__(self, option, src_model_file, src_weight_file): @@ -181,6 +182,7 @@ class CaffeConverter(base_converter.ConverterInterface): 'Sigmoid': self.convert_activation, 'PReLU': self.convert_activation, 'Clip': self.convert_activation, + 'ELU': self.convert_activation, 'Pooling': self.convert_pooling, 'Concat': self.convert_concat, 'Slice': self.convert_slice, @@ -509,7 +511,7 @@ class CaffeConverter(base_converter.ConverterInterface): negative_slope = caffe_op.layer.relu_param.negative_slope if negative_slope != 0: param_arg = op.arg.add() - param_arg.name = MaceKeyword.mace_activation_leakyrelu_coefficient_str # noqa + param_arg.name = MaceKeyword.mace_activation_coefficient_str param_arg.f = caffe_op.layer.relu_param.negative_slope type_arg.s = six.b(ActivationType.LEAKYRELU.name) elif caffe_op.type == 'ReLU6': @@ -522,6 +524,11 @@ class CaffeConverter(base_converter.ConverterInterface): limit_arg = op.arg.add() limit_arg.name = MaceKeyword.mace_activation_max_limit_str limit_arg.f = caffe_op.layer.clip_param.max + elif caffe_op.type == 'ELU': + # TODO(luxuhui): we have not verify ELU for Caffe + param_arg = op.arg.add() + param_arg.name = MaceKeyword.mace_activation_coefficient_str + param_arg.f = caffe_op.layer.elu_param.alpha def convert_folded_batchnorm(self, caffe_op): op = self.convert_general_op(caffe_op) diff --git a/tools/python/transform/onnx_converter.py b/tools/python/transform/onnx_converter.py index 8dd0dbd1..041b625c 100644 --- a/tools/python/transform/onnx_converter.py +++ b/tools/python/transform/onnx_converter.py @@ -629,18 +629,16 @@ class OnnxConverter(base_converter.ConverterInterface): type_arg.s = six.b(self.activation_type[node.op_type].name) if "alpha" in node.attrs: - alpha_tensor_name = node.name + '_alpha' - alpha_value = np.array([node.attrs["alpha"]]) - self.add_tensor(alpha_tensor_name, alpha_value.reshape(-1).shape, - mace_pb2.DT_FLOAT, alpha_value) - op.input.extend([alpha_tensor_name]) + alpha_value = node.attrs["alpha"] else: if node.op_type == OnnxOpType.LeakyRelu.name: alpha_value = 0.01 + elif node.op_type == OnnxOpType.Elu.name: + alpha_value = 1.0 else: alpha_value = 0 alpha_arg = op.arg.add() - alpha_arg.name = MaceKeyword.mace_activation_leakyrelu_coefficient_str + alpha_arg.name = MaceKeyword.mace_activation_coefficient_str alpha_arg.f = alpha_value def convert_affine(self, node): diff --git a/tools/python/transform/tensorflow_converter.py b/tools/python/transform/tensorflow_converter.py index 90ab048f..1ee69b0d 100644 --- a/tools/python/transform/tensorflow_converter.py +++ b/tools/python/transform/tensorflow_converter.py @@ -70,6 +70,7 @@ TFSupportedOps = [ 'DepthwiseConv2dNative', 'DepthToSpace', 'Div', + 'Elu', 'Equal', 'ExpandDims', 'ExtractImagePatches', @@ -190,6 +191,7 @@ class TensorflowConverter(base_converter.ConverterInterface): } activation_type = { + TFOpType.Elu.name: ActivationType.ELU, TFOpType.Relu.name: ActivationType.RELU, TFOpType.Relu6.name: ActivationType.RELUX, TFOpType.Tanh.name: ActivationType.TANH, @@ -232,6 +234,7 @@ class TensorflowConverter(base_converter.ConverterInterface): TFOpType.DepthwiseConv2dNative.name: self.convert_conv2d, TFOpType.DepthToSpace.name: self.convert_space_depth, TFOpType.Div.name: self.convert_elementwise, + TFOpType.Elu.name: self.convert_activation, TFOpType.Equal.name: self.convert_elementwise, TFOpType.ExpandDims.name: self.convert_expand_dims, TFOpType.ExtractImagePatches.name: @@ -668,11 +671,18 @@ class TensorflowConverter(base_converter.ConverterInterface): limit_arg = op.arg.add() limit_arg.name = MaceKeyword.mace_activation_max_limit_str limit_arg.f = 6.0 - elif tf_op.type == TFOpType.LeakyRelu.name: + elif tf_op.type == TFOpType.LeakyRelu.name or \ + tf_op.type == TFOpType.Elu.name: alpha_arg = op.arg.add() alpha_arg.name = \ - MaceKeyword.mace_activation_leakyrelu_coefficient_str - alpha_arg.f = tf_op.get_attr(tf_alpha_str) + MaceKeyword.mace_activation_coefficient_str + try: + alpha_arg.f = tf_op.get_attr(tf_alpha_str) + except ValueError: + if tf_op.type == TFOpType.LeakyRelu.name: + alpha_arg.f = 0.0 + else: + alpha_arg.f = 1.0 def convert_fill(self, tf_op): op = self.convert_general_op(tf_op) diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py index c26f2753..fbaff406 100644 --- a/tools/python/transform/transformer.py +++ b/tools/python/transform/transformer.py @@ -980,10 +980,7 @@ class Transformer(base_converter.ConverterInterface): [ActivationType.RELU.name, ActivationType.RELUX.name]) else: - fold_consumer = ( - act_type != ActivationType.PRELU.name - and act_type != ActivationType.ELU.name - ) + fold_consumer = (act_type != ActivationType.PRELU.name) # during quantization, only fold relu/relux if (self._option.quantize_stat or self._option.quantize) \ and act_type not in [ActivationType.RELU.name, @@ -997,7 +994,7 @@ class Transformer(base_converter.ConverterInterface): if arg.name == MaceKeyword.mace_activation_type_str \ or arg.name == \ MaceKeyword.mace_activation_max_limit_str \ - or arg.name == MaceKeyword.mace_activation_leakyrelu_coefficient_str: # noqa + or arg.name == MaceKeyword.mace_activation_coefficient_str: # noqa op.arg.extend([arg]) self.replace_quantize_info(op, consumer_op) -- GitLab