提交 10ba3061 编写于 作者: Y yejianwu

support leaky relu

上级 8b9021f7
...@@ -36,9 +36,11 @@ class ActivationOp<DeviceType::CPU, float> : public Operation { ...@@ -36,9 +36,11 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
: Operation(context), : Operation(context),
activation_(ops::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", relux_max_limit_(Operation::GetOptionalArg<float>("max_limit",
0.0f)) {} 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context); MACE_UNUSED(context);
...@@ -58,7 +60,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation { ...@@ -58,7 +60,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
alpha_ptr, output_ptr); alpha_ptr, output_ptr);
} else { } else {
DoActivation(input_ptr, output_ptr, output->size(), activation_, DoActivation(input_ptr, output_ptr, output->size(), activation_,
relux_max_limit_); relux_max_limit_, leakyrelu_coefficient_);
} }
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -66,6 +68,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation { ...@@ -66,6 +68,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
private: private:
ActivationType activation_; ActivationType activation_;
float relux_max_limit_; float relux_max_limit_;
float leakyrelu_coefficient_;
}; };
...@@ -80,11 +83,14 @@ class ActivationOp<DeviceType::GPU, T> : public Operation { ...@@ -80,11 +83,14 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
"NOOP")); "NOOP"));
auto relux_max_limit = static_cast<T>( auto relux_max_limit = static_cast<T>(
Operation::GetOptionalArg<float>("max_limit", 0.0f)); Operation::GetOptionalArg<float>("max_limit", 0.0f));
auto leakyrelu_coefficient = static_cast<T>(
Operation::GetOptionalArg<float>("leakyrelu_coefficient", 0.0f));
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset( kernel_.reset(
new opencl::image::ActivationKernel<T>(type, relux_max_limit)); new opencl::image::ActivationKernel<T>(type, relux_max_limit,
leakyrelu_coefficient));
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -62,7 +62,8 @@ void DoActivation(const T *input_ptr, ...@@ -62,7 +62,8 @@ void DoActivation(const T *input_ptr,
T *output_ptr, T *output_ptr,
const index_t size, const index_t size,
const ActivationType type, const ActivationType type,
const float relux_max_limit) { const float relux_max_limit,
const float leakyrelu_coefficient) {
MACE_CHECK(DataTypeToEnum<T>::value != DataType::DT_HALF); MACE_CHECK(DataTypeToEnum<T>::value != DataType::DT_HALF);
switch (type) { switch (type) {
...@@ -97,7 +98,7 @@ void DoActivation(const T *input_ptr, ...@@ -97,7 +98,7 @@ void DoActivation(const T *input_ptr,
#pragma omp parallel for schedule(runtime) #pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) { for (index_t i = 0; i < size; ++i) {
output_ptr[i] = std::max(input_ptr[i], static_cast<T>(0)) output_ptr[i] = std::max(input_ptr[i], static_cast<T>(0))
+ std::min(input_ptr[i], static_cast<T>(0)) * relux_max_limit; + leakyrelu_coefficient * std::min(input_ptr[i], static_cast<T>(0));
} }
break; break;
default: default:
...@@ -110,7 +111,8 @@ inline void DoActivation(const float *input_ptr, ...@@ -110,7 +111,8 @@ inline void DoActivation(const float *input_ptr,
float *output_ptr, float *output_ptr,
const index_t size, const index_t size,
const ActivationType type, const ActivationType type,
const float relux_max_limit) { const float relux_max_limit,
const float leakyrelu_coefficient) {
switch (type) { switch (type) {
case NOOP: case NOOP:
break; break;
...@@ -133,7 +135,7 @@ inline void DoActivation(const float *input_ptr, ...@@ -133,7 +135,7 @@ inline void DoActivation(const float *input_ptr,
} }
break; break;
case LEAKYRELU: case LEAKYRELU:
LeakyReluNeon(input_ptr, relux_max_limit, size, output_ptr); LeakyReluNeon(input_ptr, leakyrelu_coefficient, size, output_ptr);
break; break;
default: default:
LOG(FATAL) << "Unknown activation type: " << type; LOG(FATAL) << "Unknown activation type: " << type;
......
...@@ -52,6 +52,42 @@ TEST_F(ActivationOpTest, OPENCLSimpleRelu) { ...@@ -52,6 +52,42 @@ TEST_F(ActivationOpTest, OPENCLSimpleRelu) {
TestSimpleRelu<DeviceType::GPU>(); TestSimpleRelu<DeviceType::GPU>();
} }
namespace {
template <DeviceType D>
void TestSimpleLeakyRelu() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
OpDefBuilder("Activation", "ReluTest")
.Input("Input")
.Output("Output")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
auto expected = net.CreateTensor<float>(
{2, 2, 2, 2},
{-0.7, 7, -0.6, 6, -0.5, 5, -0.4, 4, -0.3, 3, -0.2, 2, -0.1, 1, 0, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
} // namespace
TEST_F(ActivationOpTest, CPUSimpleLeakyRelu) {
TestSimpleLeakyRelu<DeviceType::CPU>();
}
TEST_F(ActivationOpTest, OPENCLSimpleLeakyRelu) {
TestSimpleLeakyRelu<DeviceType::GPU>();
}
namespace { namespace {
template <DeviceType D> template <DeviceType D>
void TestUnalignedSimpleRelu() { void TestUnalignedSimpleRelu() {
......
...@@ -35,10 +35,12 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation { ...@@ -35,10 +35,12 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
explicit BatchNormOp(OpConstructContext *context) explicit BatchNormOp(OpConstructContext *context)
: Operation(context), : Operation(context),
epsilon_(Operation::GetOptionalArg<float>("epsilon", epsilon_(Operation::GetOptionalArg<float>("epsilon",
static_cast<float>(1e-4))), static_cast<float>(1e-4))),
activation_(ops::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP"))), Operation::GetOptionalArg<std::string>("activation", "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context); MACE_UNUSED(context);
...@@ -121,7 +123,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation { ...@@ -121,7 +123,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
} }
} }
DoActivation(output_ptr, output_ptr, output->size(), activation_, DoActivation(output_ptr, output_ptr, output->size(), activation_,
relux_max_limit_); relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -130,6 +132,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation { ...@@ -130,6 +132,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
float epsilon_; float epsilon_;
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
protected: protected:
MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR); MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR);
...@@ -148,11 +151,13 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation { ...@@ -148,11 +151,13 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
ActivationType activation = ops::StringToActivationType( ActivationType activation = ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP")); Operation::GetOptionalArg<std::string>("activation", "NOOP"));
float relux_max_limit = Operation::GetOptionalArg<float>("max_limit", 0.0f); float relux_max_limit = Operation::GetOptionalArg<float>("max_limit", 0.0f);
float leakyrelu_coefficient = Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f);
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE; mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::BatchNormKernel<T>( kernel_.reset(new opencl::image::BatchNormKernel<T>(
epsilon, activation, relux_max_limit)); epsilon, activation, relux_max_limit, leakyrelu_coefficient));
} else { } else {
MACE_NOT_IMPLEMENTED; MACE_NOT_IMPLEMENTED;
} }
......
...@@ -88,8 +88,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -88,8 +88,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
// Add input data // Add input data
net.AddRandomInput<DeviceType::GPU, float>("Input", net.AddRandomInput<DeviceType::GPU, float>("Input",
{batch, height, width, channels}); {batch, height, width, channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels}, true); net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels}, true); net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels}, true); net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels}, true);
net.AddRandomInput<DeviceType::GPU, float>("Var", {channels}, true); net.AddRandomInput<DeviceType::GPU, float>("Var", {channels}, true);
...@@ -105,6 +105,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -105,6 +105,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Input("Var") .Input("Var")
.AddFloatArg("epsilon", 1e-3) .AddFloatArg("epsilon", 1e-3)
.Output("OutputNCHW") .Output("OutputNCHW")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// run cpu // run cpu
...@@ -126,6 +128,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -126,6 +128,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Input("Var") .Input("Var")
.AddFloatArg("epsilon", 1e-3) .AddFloatArg("epsilon", 1e-3)
.Output("Output") .Output("Output")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Tuning // Tuning
......
...@@ -58,6 +58,8 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -58,6 +58,8 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)), relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)),
is_filter_transformed_(false) {} is_filter_transformed_(false) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
...@@ -520,7 +522,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -520,7 +522,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
} }
DoActivation(output_data, output_data, output->size(), activation_, DoActivation(output_data, output_data, output->size(), activation_,
relux_max_limit_); relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -703,6 +705,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase { ...@@ -703,6 +705,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
private: private:
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
bool is_filter_transformed_; bool is_filter_transformed_;
SGemm sgemm_; SGemm sgemm_;
...@@ -721,7 +724,9 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase { ...@@ -721,7 +724,9 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
activation_(ops::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
MaceStatus Run(OpContext *context) override { MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT); const Tensor *input = this->Input(INPUT);
...@@ -944,6 +949,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase { ...@@ -944,6 +949,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
private: private:
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
private: private:
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS); MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
...@@ -961,6 +967,8 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase { ...@@ -961,6 +967,8 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)), relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)),
wino_block_size_(Operation::GetOptionalArg<int>("wino_block_size", 0)) { wino_block_size_(Operation::GetOptionalArg<int>("wino_block_size", 0)) {
MemoryType mem_type; MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) { if (context->device()->gpu_runtime()->UseImageMemory()) {
...@@ -1007,12 +1015,13 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase { ...@@ -1007,12 +1015,13 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
return kernel_->Compute(context, input, filter, bias, return kernel_->Compute(context, input, filter, bias,
strides_.data(), padding_type_, paddings_, strides_.data(), padding_type_, paddings_,
dilations_.data(), activation_, relux_max_limit_, dilations_.data(), activation_, relux_max_limit_,
wino_block_size_, output); leakyrelu_coefficient_, wino_block_size_, output);
} }
private: private:
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
std::unique_ptr<OpenCLConv2dKernel> kernel_; std::unique_ptr<OpenCLConv2dKernel> kernel_;
int wino_block_size_; int wino_block_size_;
......
...@@ -527,8 +527,9 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -527,8 +527,9 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
// Add input data // Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels}); net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
net.AddRandomInput<D, T>( net.AddRandomInput<D, T>(
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, true); "Filter", {output_channels, input_channels, kernel_h, kernel_w}, true,
net.AddRandomInput<D, T>("Bias", {output_channels}, true); false);
net.AddRandomInput<D, T>("Bias", {output_channels}, true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
...@@ -541,6 +542,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -541,6 +542,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
.AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type) .AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
...@@ -564,6 +567,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -564,6 +567,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
.AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type) .AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddIntArg("wino_block_size", wino_blk_size) .AddIntArg("wino_block_size", wino_blk_size)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
......
...@@ -292,7 +292,8 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase { ...@@ -292,7 +292,8 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
output_data, output_data,
output->size(), output->size(),
activation_, activation_,
relux_max_limit_); relux_max_limit_,
leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -443,7 +444,8 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase { ...@@ -443,7 +444,8 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
return kernel_->Compute(context, input, filter, bias, return kernel_->Compute(context, input, filter, bias,
strides_.data(), in_paddings.data(), activation_, strides_.data(), in_paddings.data(), activation_,
relux_max_limit_, out_shape, output); relux_max_limit_, leakyrelu_coefficient_,
out_shape, output);
} }
private: private:
......
...@@ -47,7 +47,9 @@ class Deconv2dOpBase : public Operation { ...@@ -47,7 +47,9 @@ class Deconv2dOpBase : public Operation {
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_( relux_max_limit_(
Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(
Operation::GetOptionalArg<float>("leakyrelu_coefficient", 0.0f)) {}
static void CalcDeconvShape_Caffe( static void CalcDeconvShape_Caffe(
const index_t *input_shape, // NHWC const index_t *input_shape, // NHWC
...@@ -191,6 +193,7 @@ class Deconv2dOpBase : public Operation { ...@@ -191,6 +193,7 @@ class Deconv2dOpBase : public Operation {
const FrameworkType model_type_; const FrameworkType model_type_;
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
}; };
template <typename T> template <typename T>
......
...@@ -377,8 +377,9 @@ void TestComplexDeconvNxN(const int batch, ...@@ -377,8 +377,9 @@ void TestComplexDeconvNxN(const int batch,
// Add input data // Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels}); net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
net.AddRandomInput<D, T>( net.AddRandomInput<D, T>(
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, true); "Filter", {output_channels, input_channels, kernel_h, kernel_w}, true,
net.AddRandomInput<D, T>("Bias", {output_channels}, true); false);
net.AddRandomInput<D, T>("Bias", {output_channels}, true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
int out_h = 0; int out_h = 0;
...@@ -418,6 +419,8 @@ void TestComplexDeconvNxN(const int batch, ...@@ -418,6 +419,8 @@ void TestComplexDeconvNxN(const int batch,
.AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("strides", {stride_h, stride_w})
.AddIntsArg("padding_values", paddings) .AddIntsArg("padding_values", paddings)
.AddIntArg("framework_type", model_type) .AddIntArg("framework_type", model_type)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else { } else {
...@@ -454,6 +457,8 @@ void TestComplexDeconvNxN(const int batch, ...@@ -454,6 +457,8 @@ void TestComplexDeconvNxN(const int batch,
.AddIntsArg("strides", {stride_h, stride_w}) .AddIntsArg("strides", {stride_h, stride_w})
.AddIntsArg("padding_values", paddings) .AddIntsArg("padding_values", paddings)
.AddIntArg("framework_type", model_type) .AddIntArg("framework_type", model_type)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
} else { } else {
......
...@@ -49,10 +49,13 @@ class DepthwiseConv2dOpBase : public ConvPool2dOpBase { ...@@ -49,10 +49,13 @@ class DepthwiseConv2dOpBase : public ConvPool2dOpBase {
activation_(ops::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
protected: protected:
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
}; };
template <DeviceType D, class T> template <DeviceType D, class T>
...@@ -218,7 +221,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase { ...@@ -218,7 +221,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase {
} }
DoActivation(output_data, output_data, output->size(), activation_, DoActivation(output_data, output_data, output->size(), activation_,
relux_max_limit_); relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -524,7 +527,7 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase { ...@@ -524,7 +527,7 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
return kernel_->Compute(context, input, filter, bias, return kernel_->Compute(context, input, filter, bias,
strides_.data(), padding_type_, paddings_, strides_.data(), padding_type_, paddings_,
dilations_.data(), activation_, relux_max_limit_, dilations_.data(), activation_, relux_max_limit_,
output); leakyrelu_coefficient_, output);
} }
private: private:
......
...@@ -244,10 +244,10 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -244,10 +244,10 @@ void TestNxNS12(const index_t height, const index_t width) {
net.AddRandomInput<DeviceType::GPU, float>( net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channel}); "Input", {batch, height, width, channel});
net.AddRandomInput<DeviceType::GPU, float>( net.AddRandomInput<DeviceType::GPU, float>(
"Filter", {multiplier, channel, kernel_h, kernel_w}, true); "Filter", {multiplier, channel, kernel_h, kernel_w}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Bias", net.AddRandomInput<DeviceType::GPU, float>("Bias",
{multiplier * channel}, {multiplier * channel},
true); true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
...@@ -260,8 +260,8 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -260,8 +260,8 @@ void TestNxNS12(const index_t height, const index_t width) {
.AddIntArg("padding", type) .AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<float>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<float>::value))
.AddStringArg("activation", "RELUX") .AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("max_limit", 6.0) .AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run on cpu // Run on cpu
...@@ -283,8 +283,8 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -283,8 +283,8 @@ void TestNxNS12(const index_t height, const index_t width) {
.AddIntArg("padding", type) .AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddStringArg("activation", "RELUX") .AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("max_limit", 6.0) .AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
net.RunOp(DeviceType::GPU); net.RunOp(DeviceType::GPU);
......
...@@ -281,7 +281,8 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float> ...@@ -281,7 +281,8 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
output_data, output_data,
output->size(), output->size(),
activation_, activation_,
relux_max_limit_); relux_max_limit_,
leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -458,6 +459,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase { ...@@ -458,6 +459,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
group_, group_,
activation_, activation_,
relux_max_limit_, relux_max_limit_,
leakyrelu_coefficient_,
out_shape, out_shape,
output); output);
} }
......
...@@ -185,12 +185,13 @@ void RandomTest(index_t batch, ...@@ -185,12 +185,13 @@ void RandomTest(index_t batch,
GenerateRandomRealTypeData({multiplier, channel, kernel, kernel}, GenerateRandomRealTypeData({multiplier, channel, kernel, kernel},
&filter_data); &filter_data);
net.AddInputFromArray<DeviceType::GPU, float>( net.AddInputFromArray<DeviceType::GPU, float>(
"Filter", {multiplier, channel, kernel, kernel}, filter_data, true); "Filter", {multiplier, channel, kernel, kernel}, filter_data, true,
false);
std::vector<float> bias_data(channel * multiplier); std::vector<float> bias_data(channel * multiplier);
GenerateRandomRealTypeData({channel * multiplier}, &bias_data); GenerateRandomRealTypeData({channel * multiplier}, &bias_data);
net.AddInputFromArray<DeviceType::GPU, float>("Bias", net.AddInputFromArray<DeviceType::GPU, float>("Bias",
{channel * multiplier}, {channel * multiplier},
bias_data, true); bias_data, true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
...@@ -203,6 +204,8 @@ void RandomTest(index_t batch, ...@@ -203,6 +204,8 @@ void RandomTest(index_t batch,
.AddIntsArg("padding_values", {padding, padding}) .AddIntsArg("padding_values", {padding, padding})
.AddIntArg("group", channel) .AddIntArg("group", channel)
.AddIntsArg("dilations", {1, 1}) .AddIntsArg("dilations", {1, 1})
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<float>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<float>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// Run // Run
...@@ -224,6 +227,8 @@ void RandomTest(index_t batch, ...@@ -224,6 +227,8 @@ void RandomTest(index_t batch,
.AddIntsArg("strides", {stride, stride}) .AddIntsArg("strides", {stride, stride})
.AddIntsArg("padding_values", {padding, padding}) .AddIntsArg("padding_values", {padding, padding})
.AddIntArg("group", channel) .AddIntArg("group", channel)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
......
...@@ -41,10 +41,13 @@ class FullyConnectedOpBase : public Operation { ...@@ -41,10 +41,13 @@ class FullyConnectedOpBase : public Operation {
activation_(ops::StringToActivationType( activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", Operation::GetOptionalArg<std::string>("activation",
"NOOP"))), "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {} relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
protected: protected:
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS); MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT); MACE_OP_OUTPUT_TAGS(OUTPUT);
...@@ -104,7 +107,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase { ...@@ -104,7 +107,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase {
} }
DoActivation(output_ptr, output_ptr, output->size(), activation_, DoActivation(output_ptr, output_ptr, output->size(), activation_,
relux_max_limit_); relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
...@@ -226,7 +229,8 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase { ...@@ -226,7 +229,8 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
"The shape of Weight: ", MakeString(weight->shape()), "The shape of Weight: ", MakeString(weight->shape()),
" don't match."); " don't match.");
return kernel_->Compute( return kernel_->Compute(
context, input, weight, bias, activation_, relux_max_limit_, output); context, input, weight, bias, activation_, relux_max_limit_,
leakyrelu_coefficient_, output);
} }
private: private:
......
...@@ -123,10 +123,11 @@ void Random(const index_t batch, ...@@ -123,10 +123,11 @@ void Random(const index_t batch,
// Add input data // Add input data
net.AddRandomInput<DeviceType::GPU, float>("Input", net.AddRandomInput<DeviceType::GPU, float>("Input",
{batch, height, width, channels}); {batch, height, width, channels}, false, false);
net.AddRandomInput<DeviceType::GPU, float>( net.AddRandomInput<DeviceType::GPU, float>(
"Weight", {out_channel, channels, height, width}, true); "Weight", {out_channel, channels, height, width}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Bias", {out_channel}, true); net.AddRandomInput<DeviceType::GPU, float>("Bias", {out_channel}, true,
false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW", net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW); NCHW);
...@@ -135,6 +136,8 @@ void Random(const index_t batch, ...@@ -135,6 +136,8 @@ void Random(const index_t batch,
.Input("Weight") .Input("Weight")
.Input("Bias") .Input("Bias")
.Output("OutputNCHW") .Output("OutputNCHW")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
// run cpu // run cpu
...@@ -152,6 +155,8 @@ void Random(const index_t batch, ...@@ -152,6 +155,8 @@ void Random(const index_t batch,
.Input("Weight") .Input("Weight")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value)) .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef()); .Finalize(net.NewOperatorDef());
......
...@@ -38,6 +38,7 @@ extern MaceStatus Conv2d1x1(OpContext *context, ...@@ -38,6 +38,7 @@ extern MaceStatus Conv2d1x1(OpContext *context,
const DataType dt, const DataType dt,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output, Tensor *output,
StatsFuture *future); StatsFuture *future);
...@@ -52,6 +53,7 @@ extern MaceStatus Conv2dGeneral(OpContext *context, ...@@ -52,6 +53,7 @@ extern MaceStatus Conv2dGeneral(OpContext *context,
const DataType dt, const DataType dt,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output, Tensor *output,
StatsFuture *future); StatsFuture *future);
...@@ -81,6 +83,7 @@ class Conv2dKernel : public OpenCLConv2dKernel { ...@@ -81,6 +83,7 @@ class Conv2dKernel : public OpenCLConv2dKernel {
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const int winograd_blk_size, const int winograd_blk_size,
Tensor *output) override; Tensor *output) override;
...@@ -120,6 +123,7 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -120,6 +123,7 @@ MaceStatus Conv2dKernel<T>::Compute(
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const int winograd_blk_size, const int winograd_blk_size,
Tensor *output) { Tensor *output) {
MACE_UNUSED(winograd_blk_size); MACE_UNUSED(winograd_blk_size);
...@@ -221,14 +225,14 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -221,14 +225,14 @@ MaceStatus Conv2dKernel<T>::Compute(
return conv2d::Conv2d1x1( return conv2d::Conv2d1x1(
context, &kernels_[1], pad_input, filter, bias, strides, context, &kernels_[1], pad_input, filter, bias, strides,
DataTypeToEnum<T>::v(), activation, relux_max_limit, DataTypeToEnum<T>::v(), activation, relux_max_limit,
input_changed, output, &conv_future); leakyrelu_coefficient, input_changed, output, &conv_future);
}; };
} else { } else {
conv_func = [&](const Tensor *pad_input, Tensor *output) -> MaceStatus { conv_func = [&](const Tensor *pad_input, Tensor *output) -> MaceStatus {
return conv2d::Conv2dGeneral( return conv2d::Conv2dGeneral(
context, &kernels_[1], pad_input, filter, bias, strides, dilations, context, &kernels_[1], pad_input, filter, bias, strides, dilations,
DataTypeToEnum<T>::v(), activation, relux_max_limit, DataTypeToEnum<T>::v(), activation, relux_max_limit,
input_changed, output, &conv_future); leakyrelu_coefficient, input_changed, output, &conv_future);
}; };
} }
MACE_RETURN_IF_ERROR(conv_func(padded_input_ptr, output)); MACE_RETURN_IF_ERROR(conv_func(padded_input_ptr, output));
......
...@@ -32,6 +32,7 @@ MaceStatus Conv2d1x1(OpContext *context, ...@@ -32,6 +32,7 @@ MaceStatus Conv2d1x1(OpContext *context,
const DataType dt, const DataType dt,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
...@@ -71,6 +72,9 @@ MaceStatus Conv2d1x1(OpContext *context, ...@@ -71,6 +72,9 @@ MaceStatus Conv2d1x1(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -106,6 +110,7 @@ MaceStatus Conv2d1x1(OpContext *context, ...@@ -106,6 +110,7 @@ MaceStatus Conv2d1x1(OpContext *context,
kernel->setArg(idx++, strides[0]); kernel->setArg(idx++, strides[0]);
kernel->setArg(idx++, strides[1]); kernel->setArg(idx++, strides[1]);
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, *(output->opencl_buffer())); kernel->setArg(idx++, *(output->opencl_buffer()));
} }
......
...@@ -33,6 +33,7 @@ MaceStatus Conv2dGeneral(OpContext *context, ...@@ -33,6 +33,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
const DataType dt, const DataType dt,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
...@@ -76,6 +77,9 @@ MaceStatus Conv2dGeneral(OpContext *context, ...@@ -76,6 +77,9 @@ MaceStatus Conv2dGeneral(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -120,6 +124,7 @@ MaceStatus Conv2dGeneral(OpContext *context, ...@@ -120,6 +124,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
kernel->setArg(idx++, static_cast<int32_t>( kernel->setArg(idx++, static_cast<int32_t>(
dilations[1] * in_channel)); dilations[1] * in_channel));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, *(output->opencl_buffer())); kernel->setArg(idx++, *(output->opencl_buffer()));
} }
......
...@@ -33,6 +33,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -33,6 +33,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const DataType dt, const DataType dt,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output, Tensor *output,
StatsFuture *future) { StatsFuture *future) {
...@@ -76,6 +77,9 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -76,6 +77,9 @@ MaceStatus DepthwiseConv2d(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -116,6 +120,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -116,6 +120,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
kernel->setArg(idx++, static_cast<int32_t>( kernel->setArg(idx++, static_cast<int32_t>(
dilations[1] * in_channel)); dilations[1] * in_channel));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, *(output->opencl_buffer())); kernel->setArg(idx++, *(output->opencl_buffer()));
} }
......
...@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const DataType dt, const DataType dt,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output, Tensor *output,
StatsFuture *future); StatsFuture *future);
...@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel { ...@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel {
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) override; Tensor *output) override;
private: private:
...@@ -81,6 +83,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute( ...@@ -81,6 +83,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) { Tensor *output) {
StatsFuture pad_future, dw_conv_future; StatsFuture pad_future, dw_conv_future;
index_t filter_w = filter->dim(3); index_t filter_w = filter->dim(3);
...@@ -175,7 +178,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute( ...@@ -175,7 +178,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
depthwise::DepthwiseConv2d( depthwise::DepthwiseConv2d(
context, &kernels_[1], padded_input_ptr, filter, bias, strides, context, &kernels_[1], padded_input_ptr, filter, bias, strides,
dilations, DataTypeToEnum<T>::v(), activation, relux_max_limit, dilations, DataTypeToEnum<T>::v(), activation, relux_max_limit,
input_changed, output, &dw_conv_future)); leakyrelu_coefficient, input_changed, output, &dw_conv_future));
MergeMultipleFutureWaitFn({pad_future, dw_conv_future}, context->future()); MergeMultipleFutureWaitFn({pad_future, dw_conv_future}, context->future());
return MaceStatus::MACE_SUCCESS; return MaceStatus::MACE_SUCCESS;
} }
......
...@@ -7,6 +7,7 @@ __kernel void activation(OUT_OF_RANGE_PARAMS ...@@ -7,6 +7,7 @@ __kernel void activation(OUT_OF_RANGE_PARAMS
__read_only image2d_t alpha, __read_only image2d_t alpha,
#endif #endif
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__write_only image2d_t output) { __write_only image2d_t output) {
const int ch_blk = get_global_id(0); const int ch_blk = get_global_id(0);
const int w = get_global_id(1); const int w = get_global_id(1);
...@@ -24,9 +25,9 @@ __kernel void activation(OUT_OF_RANGE_PARAMS ...@@ -24,9 +25,9 @@ __kernel void activation(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb));
#ifdef USE_PRELU #ifdef USE_PRELU
DATA_TYPE4 prelu_alpha = READ_IMAGET(alpha, SAMPLER, (int2)(ch_blk, 0)); DATA_TYPE4 prelu_alpha = READ_IMAGET(alpha, SAMPLER, (int2)(ch_blk, 0));
DATA_TYPE4 out = do_activation(in, prelu_alpha, relux_max_limit); DATA_TYPE4 out = do_activation(in, prelu_alpha, relux_max_limit, leakyrelu_coefficient);
#else #else
DATA_TYPE4 out = do_activation(in, relux_max_limit); DATA_TYPE4 out = do_activation(in, relux_max_limit, leakyrelu_coefficient);
#endif #endif
WRITE_IMAGET(output, (int2)(pos, hb), out); WRITE_IMAGET(output, (int2)(pos, hb), out);
......
...@@ -11,7 +11,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS ...@@ -11,7 +11,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS
__private const float epsilon, __private const float epsilon,
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit) { __private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int ch_blk = get_global_id(0); const int ch_blk = get_global_id(0);
const int w = get_global_id(1); const int w = get_global_id(1);
const int hb = get_global_id(2); const int hb = get_global_id(2);
...@@ -43,8 +44,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS ...@@ -43,8 +44,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb)); DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb));
DATA_TYPE4 out = mad(in, bn_scale, bn_offset); DATA_TYPE4 out = mad(in, bn_scale, bn_offset);
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out = do_activation(out, relux_max_limit); out = do_activation(out, relux_max_limit, leakyrelu_coefficient);
#endif #endif
WRITE_IMAGET(output, (int2)(pos, hb), out); WRITE_IMAGET(output, (int2)(pos, hb), out);
......
...@@ -86,7 +86,8 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in, ...@@ -86,7 +86,8 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in,
#ifdef USE_PRELU #ifdef USE_PRELU
DATA_TYPE4 prelu_alpha, DATA_TYPE4 prelu_alpha,
#endif #endif
__private const float relux_max_limit) { __private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
DATA_TYPE4 out; DATA_TYPE4 out;
#ifdef USE_RELU #ifdef USE_RELU
out = fmax(in, (DATA_TYPE)0); out = fmax(in, (DATA_TYPE)0);
...@@ -104,7 +105,7 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in, ...@@ -104,7 +105,7 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in,
out = do_sigmoid(in); out = do_sigmoid(in);
#endif #endif
#ifdef USE_LEAKYRELU #ifdef USE_LEAKYRELU
out = fmax(in, (DATA_TYPE)0) * relux_max_limit; out = select(leakyrelu_coefficient * in, in, in >= (DATA_TYPE)0);
#endif #endif
return out; return out;
} }
......
...@@ -9,6 +9,7 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS ...@@ -9,6 +9,7 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int in_ch_blks, __private const int in_ch_blks,
...@@ -123,11 +124,11 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS ...@@ -123,11 +124,11 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
} }
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif #endif
const int out_x_base = mul24(out_ch_blk, out_width); const int out_x_base = mul24(out_ch_blk, out_width);
......
...@@ -9,6 +9,7 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS ...@@ -9,6 +9,7 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int in_ch_blks, __private const int in_ch_blks,
...@@ -96,11 +97,11 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS ...@@ -96,11 +97,11 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
filter_x_base += 4; filter_x_base += 4;
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif #endif
const int out_x_base = mul24(out_ch_blk, width); const int out_x_base = mul24(out_ch_blk, width);
......
...@@ -17,6 +17,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS ...@@ -17,6 +17,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private const int stride_h, __private const int stride_h,
__private const int stride_w, __private const int stride_w,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__global OUT_DATA_TYPE *output) { __global OUT_DATA_TYPE *output) {
const int out_wc_blk_idx = get_global_id(0); const int out_wc_blk_idx = get_global_id(0);
const int out_hb_idx = get_global_id(1); const int out_hb_idx = get_global_id(1);
...@@ -79,9 +80,9 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS ...@@ -79,9 +80,9 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
in_offset += 4; in_offset += 4;
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
#endif #endif
int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx), int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx),
......
...@@ -9,6 +9,7 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS ...@@ -9,6 +9,7 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int in_ch_blks, __private const int in_ch_blks,
...@@ -128,12 +129,12 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS ...@@ -128,12 +129,12 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
} }
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
out4 = do_activation(out4, relux_max_limit); out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient);
#endif #endif
const int out_x_base = mul24(out_ch_blk, out_width); const int out_x_base = mul24(out_ch_blk, out_width);
......
...@@ -22,6 +22,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS ...@@ -22,6 +22,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private const int dilated_h_offset, __private const int dilated_h_offset,
__private const int dilated_w_offset, __private const int dilated_w_offset,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__global OUT_DATA_TYPE *output) { __global OUT_DATA_TYPE *output) {
const int out_wc_blk_idx = get_global_id(0); const int out_wc_blk_idx = get_global_id(0);
const int out_hb_idx = get_global_id(1); const int out_hb_idx = get_global_id(1);
...@@ -107,11 +108,11 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS ...@@ -107,11 +108,11 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
} }
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif #endif
int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx), int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx),
......
...@@ -9,6 +9,7 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS ...@@ -9,6 +9,7 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int in_channels, __private const int in_channels,
...@@ -127,12 +128,12 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS ...@@ -127,12 +128,12 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
} }
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
out4 = do_activation(out4, relux_max_limit); out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient);
#endif #endif
int2 out_pos; int2 out_pos;
......
...@@ -10,6 +10,7 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS ...@@ -10,6 +10,7 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const short in_height, __private const short in_height,
__private const short in_width, __private const short in_width,
__private const short in_ch_blks, __private const short in_ch_blks,
...@@ -112,11 +113,11 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS ...@@ -112,11 +113,11 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS
in_hb_idx += dilation_h; in_hb_idx += dilation_h;
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif #endif
const short out_x_base = mul24(out_ch_blk, out_width); const short out_x_base = mul24(out_ch_blk, out_width);
...@@ -145,6 +146,7 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS ...@@ -145,6 +146,7 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const DATA_TYPE relux_max_limit, __private const DATA_TYPE relux_max_limit,
__private const DATA_TYPE leakyrelu_coefficient,
__private const short in_height, __private const short in_height,
__private const short in_width, __private const short in_width,
__private const short in_ch_blks, __private const short in_ch_blks,
...@@ -238,11 +240,11 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS ...@@ -238,11 +240,11 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS
in_hb_idx += 1; in_hb_idx += 1;
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif #endif
const short out_x_base = mul24(out_ch_blk, out_width); const short out_x_base = mul24(out_ch_blk, out_width);
......
...@@ -22,6 +22,7 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS ...@@ -22,6 +22,7 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private const int dilated_h_offset, __private const int dilated_h_offset,
__private const int dilated_w_offset, __private const int dilated_w_offset,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__global OUT_DATA_TYPE *output) { __global OUT_DATA_TYPE *output) {
const int out_wc_blk_idx = get_global_id(0); const int out_wc_blk_idx = get_global_id(0);
const int out_hb_idx = get_global_id(1); const int out_hb_idx = get_global_id(1);
...@@ -85,11 +86,11 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS ...@@ -85,11 +86,11 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS
} }
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif #endif
int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx), int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx),
......
...@@ -9,6 +9,7 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS ...@@ -9,6 +9,7 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
#endif #endif
__write_only image2d_t output, __write_only image2d_t output,
__private const float relux_max_limit, __private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height, __private const int in_height,
__private const int in_width, __private const int in_width,
__private const int out_height, __private const int out_height,
...@@ -108,12 +109,12 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS ...@@ -108,12 +109,12 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
} }
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit); out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit); out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit); out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit); out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
out4 = do_activation(out4, relux_max_limit); out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient);
#endif #endif
...@@ -146,4 +147,4 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS ...@@ -146,4 +147,4 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
out_pos.x += stride_w; out_pos.x += stride_w;
WRITE_IMAGET(output, out_pos, out4); WRITE_IMAGET(output, out_pos, out4);
} }
} }
\ No newline at end of file
...@@ -12,7 +12,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS ...@@ -12,7 +12,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS
__private const int input_height, __private const int input_height,
__private const int input_width, __private const int input_width,
__private const int input_channel, __private const int input_channel,
__private const float relux_max_limit) { __private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int batch_idx = get_global_id(0); const int batch_idx = get_global_id(0);
const int out_blk_idx = get_global_id(1); const int out_blk_idx = get_global_id(1);
const int input_chan_blk = (input_channel + 3) >> 2; const int input_chan_blk = (input_channel + 3) >> 2;
...@@ -56,8 +57,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS ...@@ -56,8 +57,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS
input_coord.y++; input_coord.y++;
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit); result = do_activation(result, relux_max_limit, leakyrelu_coefficient);
#endif #endif
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
...@@ -77,7 +78,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS ...@@ -77,7 +78,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS
__private const int input_width, __private const int input_width,
__private const int in_chan_blks, __private const int in_chan_blks,
__private const int out_blks, __private const int out_blks,
__private const float relux_max_limit) { __private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int inter_out_idx = get_global_id(0); const int inter_out_idx = get_global_id(0);
const int width_blk_idx = get_global_id(1); const int width_blk_idx = get_global_id(1);
const int width_blk_count = global_size_dim1; const int width_blk_count = global_size_dim1;
...@@ -147,8 +149,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS ...@@ -147,8 +149,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS
inter_idx += 4; inter_idx += 4;
} }
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit); result = do_activation(result, relux_max_limit, leakyrelu_coefficient);
#endif #endif
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result); WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
......
...@@ -127,7 +127,8 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS ...@@ -127,7 +127,8 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
__private const int out_width, __private const int out_width,
__private const int round_hw, __private const int round_hw,
__private const int round_w, __private const int round_w,
__private const float relux_max_limit) { __private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int width_idx = get_global_id(0); const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1); const int height_idx = get_global_id(1);
...@@ -203,11 +204,11 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS ...@@ -203,11 +204,11 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
#endif #endif
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
in0[0] = do_activation(in0[0], relux_max_limit); in0[0] = do_activation(in0[0], relux_max_limit, leakyrelu_coefficient);
in0[1] = do_activation(in0[1], relux_max_limit); in0[1] = do_activation(in0[1], relux_max_limit, leakyrelu_coefficient);
in1[0] = do_activation(in1[0], relux_max_limit); in1[0] = do_activation(in1[0], relux_max_limit, leakyrelu_coefficient);
in1[1] = do_activation(in1[1], relux_max_limit); in1[1] = do_activation(in1[1], relux_max_limit, leakyrelu_coefficient);
#endif #endif
WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]); WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]);
...@@ -395,7 +396,8 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS ...@@ -395,7 +396,8 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
__private const int out_width, __private const int out_width,
__private const int round_hw, __private const int round_hw,
__private const int round_w, __private const int round_w,
__private const float relux_max_limit) { __private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int width_idx = get_global_id(0); const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1); const int height_idx = get_global_id(1);
...@@ -515,23 +517,23 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS ...@@ -515,23 +517,23 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
out3[3] += bias_value; out3[3] += bias_value;
#endif #endif
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID) #if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0[0] = do_activation(out0[0], relux_max_limit); out0[0] = do_activation(out0[0], relux_max_limit, leakyrelu_coefficient);
out0[1] = do_activation(out0[1], relux_max_limit); out0[1] = do_activation(out0[1], relux_max_limit, leakyrelu_coefficient);
out0[2] = do_activation(out0[2], relux_max_limit); out0[2] = do_activation(out0[2], relux_max_limit, leakyrelu_coefficient);
out0[3] = do_activation(out0[3], relux_max_limit); out0[3] = do_activation(out0[3], relux_max_limit, leakyrelu_coefficient);
out1[0] = do_activation(out1[0], relux_max_limit); out1[0] = do_activation(out1[0], relux_max_limit, leakyrelu_coefficient);
out1[1] = do_activation(out1[1], relux_max_limit); out1[1] = do_activation(out1[1], relux_max_limit, leakyrelu_coefficient);
out1[2] = do_activation(out1[2], relux_max_limit); out1[2] = do_activation(out1[2], relux_max_limit, leakyrelu_coefficient);
out1[3] = do_activation(out1[3], relux_max_limit); out1[3] = do_activation(out1[3], relux_max_limit, leakyrelu_coefficient);
out2[0] = do_activation(out2[0], relux_max_limit); out2[0] = do_activation(out2[0], relux_max_limit, leakyrelu_coefficient);
out2[1] = do_activation(out2[1], relux_max_limit); out2[1] = do_activation(out2[1], relux_max_limit, leakyrelu_coefficient);
out2[2] = do_activation(out2[2], relux_max_limit); out2[2] = do_activation(out2[2], relux_max_limit, leakyrelu_coefficient);
out2[3] = do_activation(out2[3], relux_max_limit); out2[3] = do_activation(out2[3], relux_max_limit, leakyrelu_coefficient);
out3[0] = do_activation(out3[0], relux_max_limit); out3[0] = do_activation(out3[0], relux_max_limit, leakyrelu_coefficient);
out3[1] = do_activation(out3[1], relux_max_limit); out3[1] = do_activation(out3[1], relux_max_limit, leakyrelu_coefficient);
out3[2] = do_activation(out3[2], relux_max_limit); out3[2] = do_activation(out3[2], relux_max_limit, leakyrelu_coefficient);
out3[3] = do_activation(out3[3], relux_max_limit); out3[3] = do_activation(out3[3], relux_max_limit, leakyrelu_coefficient);
#endif #endif
const int num = min(4, out_width - out_width_idx); const int num = min(4, out_width - out_width_idx);
...@@ -556,4 +558,4 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS ...@@ -556,4 +558,4 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
for (int i = 0; i < num; ++i) { for (int i = 0; i < num; ++i) {
WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 3), out3[i]); WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 3), out3[i]);
} }
} }
\ No newline at end of file
...@@ -45,6 +45,7 @@ class OpenCLConv2dKernel { ...@@ -45,6 +45,7 @@ class OpenCLConv2dKernel {
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const int winograd_blk_size, const int winograd_blk_size,
Tensor *output) = 0; Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConv2dKernel); MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConv2dKernel);
......
...@@ -36,6 +36,7 @@ class OpenCLDeconv2dKernel { ...@@ -36,6 +36,7 @@ class OpenCLDeconv2dKernel {
const int *padding_data, const int *padding_data,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape, const std::vector<index_t> &output_shape,
Tensor *output) = 0; Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDeconv2dKernel); MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDeconv2dKernel);
......
...@@ -38,6 +38,7 @@ class OpenCLDepthwiseConv2dKernel { ...@@ -38,6 +38,7 @@ class OpenCLDepthwiseConv2dKernel {
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) = 0; Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseConv2dKernel); MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseConv2dKernel);
}; };
......
...@@ -39,6 +39,7 @@ class OpenCLDepthwiseDeconv2dKernel { ...@@ -39,6 +39,7 @@ class OpenCLDepthwiseDeconv2dKernel {
const int group, const int group,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector <index_t> &output_shape, const std::vector <index_t> &output_shape,
Tensor *output) = 0; Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseDeconv2dKernel); MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseDeconv2dKernel);
......
...@@ -35,6 +35,7 @@ class OpenCLFullyConnectedKernel { ...@@ -35,6 +35,7 @@ class OpenCLFullyConnectedKernel {
const Tensor *bias, const Tensor *bias,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) = 0; Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLFullyConnectedKernel); MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLFullyConnectedKernel);
}; };
......
...@@ -35,8 +35,10 @@ template <typename T> ...@@ -35,8 +35,10 @@ template <typename T>
class ActivationKernel : public OpenCLActivationKernel { class ActivationKernel : public OpenCLActivationKernel {
public: public:
ActivationKernel(ActivationType type, ActivationKernel(ActivationType type,
T relux_max_limit) T relux_max_limit,
: activation_(type), relux_max_limit_(relux_max_limit) {} T leakyrelu_coefficient)
: activation_(type), relux_max_limit_(relux_max_limit),
leakyrelu_coefficient_(leakyrelu_coefficient) {}
MaceStatus Compute( MaceStatus Compute(
OpContext *context, OpContext *context,
...@@ -47,6 +49,7 @@ class ActivationKernel : public OpenCLActivationKernel { ...@@ -47,6 +49,7 @@ class ActivationKernel : public OpenCLActivationKernel {
private: private:
ActivationType activation_; ActivationType activation_;
T relux_max_limit_; T relux_max_limit_;
T leakyrelu_coefficient_;
cl::Kernel kernel_; cl::Kernel kernel_;
uint32_t kwg_size_; uint32_t kwg_size_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
...@@ -128,6 +131,7 @@ MaceStatus ActivationKernel<T>::Compute( ...@@ -128,6 +131,7 @@ MaceStatus ActivationKernel<T>::Compute(
kernel_.setArg(idx++, *(alpha->opencl_image())); kernel_.setArg(idx++, *(alpha->opencl_image()));
} }
kernel_.setArg(idx++, static_cast<float>(relux_max_limit_)); kernel_.setArg(idx++, static_cast<float>(relux_max_limit_));
kernel_.setArg(idx++, static_cast<float>(leakyrelu_coefficient_));
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
input_shape_ = input->shape(); input_shape_ = input->shape();
......
...@@ -37,7 +37,8 @@ class BatchNormKernel : public OpenCLBatchNormKernel { ...@@ -37,7 +37,8 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
BatchNormKernel( BatchNormKernel(
const float epsilon, const float epsilon,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit); const float relux_max_limit,
const float leakyrelu_coefficient);
MaceStatus Compute(OpContext *context, MaceStatus Compute(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *scale, const Tensor *scale,
...@@ -50,6 +51,7 @@ class BatchNormKernel : public OpenCLBatchNormKernel { ...@@ -50,6 +51,7 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
const float epsilon_; const float epsilon_;
const ActivationType activation_; const ActivationType activation_;
const float relux_max_limit_; const float relux_max_limit_;
const float leakyrelu_coefficient_;
cl::Kernel kernel_; cl::Kernel kernel_;
uint32_t kwg_size_; uint32_t kwg_size_;
std::vector<index_t> input_shape_; std::vector<index_t> input_shape_;
...@@ -58,10 +60,12 @@ class BatchNormKernel : public OpenCLBatchNormKernel { ...@@ -58,10 +60,12 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
template <typename T> template <typename T>
BatchNormKernel<T>::BatchNormKernel(const float epsilon, BatchNormKernel<T>::BatchNormKernel(const float epsilon,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit) const float relux_max_limit,
const float leakyrelu_coefficient)
: epsilon_(epsilon), : epsilon_(epsilon),
activation_(activation), activation_(activation),
relux_max_limit_(relux_max_limit) {} relux_max_limit_(relux_max_limit),
leakyrelu_coefficient_(leakyrelu_coefficient) {}
template <typename T> template <typename T>
MaceStatus BatchNormKernel<T>::Compute( MaceStatus BatchNormKernel<T>::Compute(
...@@ -115,6 +119,9 @@ MaceStatus BatchNormKernel<T>::Compute( ...@@ -115,6 +119,9 @@ MaceStatus BatchNormKernel<T>::Compute(
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation_; LOG(FATAL) << "Unknown activation type: " << activation_;
} }
...@@ -140,6 +147,7 @@ MaceStatus BatchNormKernel<T>::Compute( ...@@ -140,6 +147,7 @@ MaceStatus BatchNormKernel<T>::Compute(
} }
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit_); kernel_.setArg(idx++, relux_max_limit_);
kernel_.setArg(idx++, leakyrelu_coefficient_);
input_shape_ = input->shape(); input_shape_ = input->shape();
} }
......
...@@ -38,6 +38,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context, ...@@ -38,6 +38,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -53,6 +54,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context, ...@@ -53,6 +54,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -68,6 +70,7 @@ extern MaceStatus Conv2d(OpContext *context, ...@@ -68,6 +70,7 @@ extern MaceStatus Conv2d(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -81,6 +84,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, ...@@ -81,6 +84,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
const int *padding, const int *padding,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
const int wino_blk_size, const int wino_blk_size,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
...@@ -109,6 +113,7 @@ class Conv2dKernel : public OpenCLConv2dKernel { ...@@ -109,6 +113,7 @@ class Conv2dKernel : public OpenCLConv2dKernel {
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const int wino_blk_size, const int wino_blk_size,
Tensor *output) override; Tensor *output) override;
...@@ -169,6 +174,7 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -169,6 +174,7 @@ MaceStatus Conv2dKernel<T>::Compute(
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const int wino_blk_size, const int wino_blk_size,
Tensor *output) { Tensor *output) {
index_t kernel_h = filter->dim(2); index_t kernel_h = filter->dim(2);
...@@ -217,6 +223,7 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -217,6 +223,7 @@ MaceStatus Conv2dKernel<T>::Compute(
paddings.data(), paddings.data(),
activation, activation,
relux_max_limit, relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value, DataTypeToEnum<T>::value,
wino_blk_size, wino_blk_size,
&input_shape_, &input_shape_,
...@@ -235,6 +242,7 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -235,6 +242,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations, dilations,
activation, activation,
relux_max_limit, relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value, DataTypeToEnum<T>::value,
&input_shape_, &input_shape_,
output, output,
...@@ -252,6 +260,7 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -252,6 +260,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations, dilations,
activation, activation,
relux_max_limit, relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value, DataTypeToEnum<T>::value,
&input_shape_, &input_shape_,
output, output,
...@@ -269,6 +278,7 @@ MaceStatus Conv2dKernel<T>::Compute( ...@@ -269,6 +278,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations, dilations,
activation, activation,
relux_max_limit, relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value, DataTypeToEnum<T>::value,
&input_shape_, &input_shape_,
output, output,
......
...@@ -76,6 +76,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context, ...@@ -76,6 +76,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -125,6 +126,9 @@ extern MaceStatus Conv2dK1x1(OpContext *context, ...@@ -125,6 +126,9 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -154,6 +158,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context, ...@@ -154,6 +158,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, *(output->opencl_image()));
// FIXME handle flexable data type: half not supported // FIXME handle flexable data type: half not supported
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<int>(input_height)); kernel->setArg(idx++, static_cast<int>(input_height));
kernel->setArg(idx++, static_cast<int>(input_width)); kernel->setArg(idx++, static_cast<int>(input_width));
kernel->setArg(idx++, static_cast<int>(input_channel_blocks)); kernel->setArg(idx++, static_cast<int>(input_channel_blocks));
......
...@@ -69,6 +69,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context, ...@@ -69,6 +69,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -110,6 +111,9 @@ extern MaceStatus Conv2dK3x3(OpContext *context, ...@@ -110,6 +111,9 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -138,6 +142,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context, ...@@ -138,6 +142,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
} }
kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<int>(input->dim(1))); kernel->setArg(idx++, static_cast<int>(input->dim(1)));
kernel->setArg(idx++, static_cast<int>(input->dim(2))); kernel->setArg(idx++, static_cast<int>(input->dim(2)));
kernel->setArg(idx++, static_cast<int>(input_channel_blocks)); kernel->setArg(idx++, static_cast<int>(input_channel_blocks));
......
...@@ -77,6 +77,7 @@ extern MaceStatus Conv2d(OpContext *context, ...@@ -77,6 +77,7 @@ extern MaceStatus Conv2d(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -118,6 +119,9 @@ extern MaceStatus Conv2d(OpContext *context, ...@@ -118,6 +119,9 @@ extern MaceStatus Conv2d(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -146,6 +150,7 @@ extern MaceStatus Conv2d(OpContext *context, ...@@ -146,6 +150,7 @@ extern MaceStatus Conv2d(OpContext *context,
} }
kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1))); kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1)));
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2))); kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2)));
kernel->setArg(idx++, static_cast<uint32_t>(input_channel_blocks)); kernel->setArg(idx++, static_cast<uint32_t>(input_channel_blocks));
......
...@@ -42,6 +42,7 @@ class Deconv2dKernel : public OpenCLDeconv2dKernel { ...@@ -42,6 +42,7 @@ class Deconv2dKernel : public OpenCLDeconv2dKernel {
const int *padding_data, const int *padding_data,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape, const std::vector<index_t> &output_shape,
Tensor *output) override; Tensor *output) override;
...@@ -61,6 +62,7 @@ MaceStatus Deconv2dKernel<T>::Compute( ...@@ -61,6 +62,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
const int *padding_data, const int *padding_data,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape, const std::vector<index_t> &output_shape,
Tensor *output) { Tensor *output) {
std::vector<size_t> output_image_shape; std::vector<size_t> output_image_shape;
...@@ -119,6 +121,9 @@ MaceStatus Deconv2dKernel<T>::Compute( ...@@ -119,6 +121,9 @@ MaceStatus Deconv2dKernel<T>::Compute(
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -146,6 +151,7 @@ MaceStatus Deconv2dKernel<T>::Compute( ...@@ -146,6 +151,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
} }
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit); kernel_.setArg(idx++, relux_max_limit);
kernel_.setArg(idx++, leakyrelu_coefficient);
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(3))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(3)));
......
...@@ -73,6 +73,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -73,6 +73,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -126,6 +127,9 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -126,6 +127,9 @@ MaceStatus DepthwiseConv2d(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -159,6 +163,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -159,6 +163,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
} }
kernel->setArg(idx++, *(output->opencl_image())); kernel->setArg(idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<int16_t>(input_height)); kernel->setArg(idx++, static_cast<int16_t>(input_height));
kernel->setArg(idx++, static_cast<int16_t>(input_width)); kernel->setArg(idx++, static_cast<int16_t>(input_width));
kernel->setArg(idx++, static_cast<int16_t>(input_channel_blocks)); kernel->setArg(idx++, static_cast<int16_t>(input_channel_blocks));
......
...@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context, ...@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
Tensor *output, Tensor *output,
...@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel { ...@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel {
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) override; Tensor *output) override;
private: private:
...@@ -80,6 +82,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute( ...@@ -80,6 +82,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) { Tensor *output) {
index_t kernel_h = filter->dim(2); index_t kernel_h = filter->dim(2);
index_t kernel_w = filter->dim(3); index_t kernel_w = filter->dim(3);
...@@ -118,8 +121,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute( ...@@ -118,8 +121,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
return depthwise::DepthwiseConv2d( return depthwise::DepthwiseConv2d(
context, &kernel_, input, filter, bias, strides[0], paddings.data(), context, &kernel_, input, filter, bias, strides[0], paddings.data(),
dilations, activation, relux_max_limit, DataTypeToEnum<T>::value, dilations, activation, relux_max_limit, leakyrelu_coefficient,
&input_shape_, output, &kwg_size_); DataTypeToEnum<T>::value, &input_shape_, output, &kwg_size_);
} }
} // namespace image } // namespace image
......
...@@ -43,6 +43,7 @@ class DepthwiseDeconv2dKernel : public OpenCLDepthwiseDeconv2dKernel { ...@@ -43,6 +43,7 @@ class DepthwiseDeconv2dKernel : public OpenCLDepthwiseDeconv2dKernel {
const int group, const int group,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape, const std::vector<index_t> &output_shape,
Tensor *output) override; Tensor *output) override;
...@@ -63,6 +64,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute( ...@@ -63,6 +64,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
const int group, const int group,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape, const std::vector<index_t> &output_shape,
Tensor *output) { Tensor *output) {
const index_t batch = output_shape[0]; const index_t batch = output_shape[0];
...@@ -125,6 +127,9 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute( ...@@ -125,6 +127,9 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -152,6 +157,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute( ...@@ -152,6 +157,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
} }
kernel_.setArg(idx++, *(output->opencl_image())); kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit); kernel_.setArg(idx++, relux_max_limit);
kernel_.setArg(idx++, leakyrelu_coefficient);
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2))); kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(height)); kernel_.setArg(idx++, static_cast<int32_t>(height));
......
...@@ -40,6 +40,7 @@ class FullyConnectedKernel : public OpenCLFullyConnectedKernel { ...@@ -40,6 +40,7 @@ class FullyConnectedKernel : public OpenCLFullyConnectedKernel {
const Tensor *bias, const Tensor *bias,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) override; Tensor *output) override;
private: private:
...@@ -57,6 +58,7 @@ MaceStatus FullyConnectedKernel<T>::Compute( ...@@ -57,6 +58,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
const Tensor *bias, const Tensor *bias,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) { Tensor *output) {
std::vector<index_t> output_shape = {input->dim(0), 1, 1, weight->dim(0)}; std::vector<index_t> output_shape = {input->dim(0), 1, 1, weight->dim(0)};
std::vector<size_t> output_image_shape; std::vector<size_t> output_image_shape;
...@@ -98,6 +100,9 @@ MaceStatus FullyConnectedKernel<T>::Compute( ...@@ -98,6 +100,9 @@ MaceStatus FullyConnectedKernel<T>::Compute(
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -148,6 +153,7 @@ MaceStatus FullyConnectedKernel<T>::Compute( ...@@ -148,6 +153,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
kernel_.setArg(idx++, static_cast<int>(RoundUpDiv4(input->dim(3)))); kernel_.setArg(idx++, static_cast<int>(RoundUpDiv4(input->dim(3))));
kernel_.setArg(idx++, static_cast<int>(output_blocks)); kernel_.setArg(idx++, static_cast<int>(output_blocks));
kernel_.setArg(idx++, relux_max_limit); kernel_.setArg(idx++, relux_max_limit);
kernel_.setArg(idx++, leakyrelu_coefficient);
input_shape_ = input->shape(); input_shape_ = input->shape();
} }
......
...@@ -115,6 +115,7 @@ MaceStatus WinogradOutputTransform(OpContext *context, ...@@ -115,6 +115,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
const int wino_blk_size, const int wino_blk_size,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed, const bool input_changed,
Tensor *output_tensor, Tensor *output_tensor,
uint32_t *kwg_size, uint32_t *kwg_size,
...@@ -164,6 +165,9 @@ MaceStatus WinogradOutputTransform(OpContext *context, ...@@ -164,6 +165,9 @@ MaceStatus WinogradOutputTransform(OpContext *context,
case SIGMOID: case SIGMOID:
built_options.emplace("-DUSE_SIGMOID"); built_options.emplace("-DUSE_SIGMOID");
break; break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default: default:
LOG(FATAL) << "Unknown activation type: " << activation; LOG(FATAL) << "Unknown activation type: " << activation;
} }
...@@ -199,6 +203,7 @@ MaceStatus WinogradOutputTransform(OpContext *context, ...@@ -199,6 +203,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
kernel->setArg(idx++, static_cast<uint32_t>(round_h * round_w)); kernel->setArg(idx++, static_cast<uint32_t>(round_h * round_w));
kernel->setArg(idx++, static_cast<uint32_t>(round_w)); kernel->setArg(idx++, static_cast<uint32_t>(round_w));
kernel->setArg(idx++, relux_max_limit); kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
} }
const std::vector<uint32_t> lws = {*kwg_size / 8, 8, 0}; const std::vector<uint32_t> lws = {*kwg_size / 8, 8, 0};
std::string tuning_key = std::string tuning_key =
...@@ -222,6 +227,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, ...@@ -222,6 +227,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
const int *paddings, const int *paddings,
const ActivationType activation, const ActivationType activation,
const float relux_max_limit, const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt, const DataType dt,
const int wino_blk_size, const int wino_blk_size,
std::vector<index_t> *prev_input_shape, std::vector<index_t> *prev_input_shape,
...@@ -338,7 +344,8 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context, ...@@ -338,7 +344,8 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
MACE_RETURN_IF_ERROR(WinogradOutputTransform( MACE_RETURN_IF_ERROR(WinogradOutputTransform(
context, kernels[2], mm_output.get(), bias, context, kernels[2], mm_output.get(), bias,
dt, round_h, round_w, wino_blk_size, activation, relux_max_limit, dt, round_h, round_w, wino_blk_size, activation, relux_max_limit,
input_changed, output, kwg_size[2], &t_output_future)) leakyrelu_coefficient, input_changed, output, kwg_size[2],
&t_output_future))
MergeMultipleFutureWaitFn({t_input_future, mm_future, t_output_future}, MergeMultipleFutureWaitFn({t_input_future, mm_future, t_output_future},
context->future()); context->future());
......
...@@ -168,6 +168,7 @@ class MaceKeyword(object): ...@@ -168,6 +168,7 @@ class MaceKeyword(object):
mace_element_type_str = 'type' mace_element_type_str = 'type'
mace_activation_type_str = 'activation' mace_activation_type_str = 'activation'
mace_activation_max_limit_str = 'max_limit' mace_activation_max_limit_str = 'max_limit'
mace_activation_leakyrelu_coefficient_str = 'leakyrelu_coefficient'
mace_resize_size_str = 'size' mace_resize_size_str = 'size'
mace_batch_to_space_crops_str = 'crops' mace_batch_to_space_crops_str = 'crops'
mace_paddings_str = 'paddings' mace_paddings_str = 'paddings'
......
...@@ -493,6 +493,14 @@ class CaffeConverter(base_converter.ConverterInterface): ...@@ -493,6 +493,14 @@ class CaffeConverter(base_converter.ConverterInterface):
mace_pb2.DT_FLOAT, alpha_data) mace_pb2.DT_FLOAT, alpha_data)
op.input.extend([alpha_tensor_name]) op.input.extend([alpha_tensor_name])
negative_slope = caffe_op.layer.relu_param.negative_slope
if caffe_op.type == 'ReLU' and negative_slope != 0:
param_arg = op.arg.add()
param_arg.name = MaceKeyword.mace_activation_leakyrelu_coefficient_str # noqa
param_arg.f = caffe_op.layer.relu_param.negative_slope
type_arg.s = six.b(ActivationType.LEAKYRELU.name)
def convert_folded_batchnorm(self, caffe_op): def convert_folded_batchnorm(self, caffe_op):
op = self.convert_general_op(caffe_op) op = self.convert_general_op(caffe_op)
op.type = MaceOp.BatchNorm.name op.type = MaceOp.BatchNorm.name
......
...@@ -286,10 +286,10 @@ class OnnxConverter(base_converter.ConverterInterface): ...@@ -286,10 +286,10 @@ class OnnxConverter(base_converter.ConverterInterface):
activation_type = { activation_type = {
OnnxOpType.Relu.name: ActivationType.RELU, OnnxOpType.Relu.name: ActivationType.RELU,
OnnxOpType.LeakyRelu.name: ActivationType.LEAKYRELU,
OnnxOpType.PRelu.name: ActivationType.PRELU, OnnxOpType.PRelu.name: ActivationType.PRELU,
OnnxOpType.Tanh.name: ActivationType.TANH, OnnxOpType.Tanh.name: ActivationType.TANH,
OnnxOpType.Sigmoid.name: ActivationType.SIGMOID, OnnxOpType.Sigmoid.name: ActivationType.SIGMOID,
OnnxOpType.LeakyRelu.name: ActivationType.LEAKYRELU,
} }
def __init__(self, option, src_model_file): def __init__(self, option, src_model_file):
......
...@@ -893,7 +893,9 @@ class Transformer(base_converter.ConverterInterface): ...@@ -893,7 +893,9 @@ class Transformer(base_converter.ConverterInterface):
op.output[0] = consumer_op.output[0] op.output[0] = consumer_op.output[0]
for arg in consumer_op.arg: for arg in consumer_op.arg:
if arg.name == MaceKeyword.mace_activation_type_str \ if arg.name == MaceKeyword.mace_activation_type_str \
or arg.name == MaceKeyword.mace_activation_max_limit_str: # noqa or arg.name == \
MaceKeyword.mace_activation_max_limit_str \
or arg.name == MaceKeyword.mace_activation_leakyrelu_coefficient_str: # noqa
op.arg.extend([arg]) op.arg.extend([arg])
self.replace_quantize_info(op, consumer_op) self.replace_quantize_info(op, consumer_op)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册