提交 27314662 编写于 作者: 刘托

Merge branch 'support_caffe_relu_slope' into 'master'

support caffe relu with negative_slope

See merge request !910
......@@ -36,9 +36,11 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
: Operation(context),
activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit",
0.0f)) {}
0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
......@@ -58,7 +60,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
alpha_ptr, output_ptr);
} else {
DoActivation(input_ptr, output_ptr, output->size(), activation_,
relux_max_limit_);
relux_max_limit_, leakyrelu_coefficient_);
}
return MaceStatus::MACE_SUCCESS;
}
......@@ -66,6 +68,7 @@ class ActivationOp<DeviceType::CPU, float> : public Operation {
private:
ActivationType activation_;
float relux_max_limit_;
float leakyrelu_coefficient_;
};
......@@ -80,11 +83,14 @@ class ActivationOp<DeviceType::GPU, T> : public Operation {
"NOOP"));
auto relux_max_limit = static_cast<T>(
Operation::GetOptionalArg<float>("max_limit", 0.0f));
auto leakyrelu_coefficient = static_cast<T>(
Operation::GetOptionalArg<float>("leakyrelu_coefficient", 0.0f));
MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(
new opencl::image::ActivationKernel<T>(type, relux_max_limit));
new opencl::image::ActivationKernel<T>(type, relux_max_limit,
leakyrelu_coefficient));
} else {
MACE_NOT_IMPLEMENTED;
}
......
......@@ -62,7 +62,8 @@ void DoActivation(const T *input_ptr,
T *output_ptr,
const index_t size,
const ActivationType type,
const float relux_max_limit) {
const float relux_max_limit,
const float leakyrelu_coefficient) {
MACE_CHECK(DataTypeToEnum<T>::value != DataType::DT_HALF);
switch (type) {
......@@ -97,7 +98,7 @@ void DoActivation(const T *input_ptr,
#pragma omp parallel for schedule(runtime)
for (index_t i = 0; i < size; ++i) {
output_ptr[i] = std::max(input_ptr[i], static_cast<T>(0))
+ std::min(input_ptr[i], static_cast<T>(0)) * relux_max_limit;
+ leakyrelu_coefficient * std::min(input_ptr[i], static_cast<T>(0));
}
break;
default:
......@@ -110,7 +111,8 @@ inline void DoActivation(const float *input_ptr,
float *output_ptr,
const index_t size,
const ActivationType type,
const float relux_max_limit) {
const float relux_max_limit,
const float leakyrelu_coefficient) {
switch (type) {
case NOOP:
break;
......@@ -133,7 +135,7 @@ inline void DoActivation(const float *input_ptr,
}
break;
case LEAKYRELU:
LeakyReluNeon(input_ptr, relux_max_limit, size, output_ptr);
LeakyReluNeon(input_ptr, leakyrelu_coefficient, size, output_ptr);
break;
default:
LOG(FATAL) << "Unknown activation type: " << type;
......
......@@ -52,6 +52,42 @@ TEST_F(ActivationOpTest, OPENCLSimpleRelu) {
TestSimpleRelu<DeviceType::GPU>();
}
namespace {
template <DeviceType D>
void TestSimpleLeakyRelu() {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", {2, 2, 2, 2},
{-7, 7, -6, 6, -5, 5, -4, 4, -3, 3, -2, 2, -1, 1, 0, 0});
OpDefBuilder("Activation", "ReluTest")
.Input("Input")
.Output("Output")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
auto expected = net.CreateTensor<float>(
{2, 2, 2, 2},
{-0.7, 7, -0.6, 6, -0.5, 5, -0.4, 4, -0.3, 3, -0.2, 2, -0.1, 1, 0, 0});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
} // namespace
TEST_F(ActivationOpTest, CPUSimpleLeakyRelu) {
TestSimpleLeakyRelu<DeviceType::CPU>();
}
TEST_F(ActivationOpTest, OPENCLSimpleLeakyRelu) {
TestSimpleLeakyRelu<DeviceType::GPU>();
}
namespace {
template <DeviceType D>
void TestUnalignedSimpleRelu() {
......
......@@ -35,10 +35,12 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
explicit BatchNormOp(OpConstructContext *context)
: Operation(context),
epsilon_(Operation::GetOptionalArg<float>("epsilon",
static_cast<float>(1e-4))),
static_cast<float>(1e-4))),
activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
MaceStatus Run(OpContext *context) override {
MACE_UNUSED(context);
......@@ -121,7 +123,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
}
}
DoActivation(output_ptr, output_ptr, output->size(), activation_,
relux_max_limit_);
relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS;
}
......@@ -130,6 +132,7 @@ class BatchNormOp<DeviceType::CPU, float> : public Operation {
float epsilon_;
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
protected:
MACE_OP_INPUT_TAGS(INPUT, SCALE, OFFSET, MEAN, VAR);
......@@ -148,11 +151,13 @@ class BatchNormOp<DeviceType::GPU, T> : public Operation {
ActivationType activation = ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation", "NOOP"));
float relux_max_limit = Operation::GetOptionalArg<float>("max_limit", 0.0f);
float leakyrelu_coefficient = Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f);
MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) {
mem_type = MemoryType::GPU_IMAGE;
kernel_.reset(new opencl::image::BatchNormKernel<T>(
epsilon, activation, relux_max_limit));
epsilon, activation, relux_max_limit, leakyrelu_coefficient));
} else {
MACE_NOT_IMPLEMENTED;
}
......
......@@ -88,8 +88,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
// Add input data
net.AddRandomInput<DeviceType::GPU, float>("Input",
{batch, height, width, channels});
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels}, true);
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels}, true);
net.AddRandomInput<DeviceType::GPU, float>("Scale", {channels}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Offset", {channels}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Mean", {channels}, true);
net.AddRandomInput<DeviceType::GPU, float>("Var", {channels}, true);
......@@ -105,6 +105,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Input("Var")
.AddFloatArg("epsilon", 1e-3)
.Output("OutputNCHW")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef());
// run cpu
......@@ -126,6 +128,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Input("Var")
.AddFloatArg("epsilon", 1e-3)
.Output("Output")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef());
// Tuning
......
......@@ -58,6 +58,8 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)),
is_filter_transformed_(false) {}
MaceStatus Run(OpContext *context) override {
......@@ -520,7 +522,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
}
DoActivation(output_data, output_data, output->size(), activation_,
relux_max_limit_);
relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS;
}
......@@ -703,6 +705,7 @@ class Conv2dOp<DeviceType::CPU, float> : public ConvPool2dOpBase {
private:
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
bool is_filter_transformed_;
SGemm sgemm_;
......@@ -721,7 +724,9 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
MaceStatus Run(OpContext *context) override {
const Tensor *input = this->Input(INPUT);
......@@ -944,6 +949,7 @@ class Conv2dOp<DeviceType::CPU, uint8_t> : public ConvPool2dOpBase {
private:
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
private:
MACE_OP_INPUT_TAGS(INPUT, FILTER, BIAS);
......@@ -961,6 +967,8 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)),
wino_block_size_(Operation::GetOptionalArg<int>("wino_block_size", 0)) {
MemoryType mem_type;
if (context->device()->gpu_runtime()->UseImageMemory()) {
......@@ -1007,12 +1015,13 @@ class Conv2dOp<DeviceType::GPU, T> : public ConvPool2dOpBase {
return kernel_->Compute(context, input, filter, bias,
strides_.data(), padding_type_, paddings_,
dilations_.data(), activation_, relux_max_limit_,
wino_block_size_, output);
leakyrelu_coefficient_, wino_block_size_, output);
}
private:
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
std::unique_ptr<OpenCLConv2dKernel> kernel_;
int wino_block_size_;
......
......@@ -527,8 +527,9 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
// Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
net.AddRandomInput<D, T>(
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, true);
net.AddRandomInput<D, T>("Bias", {output_channels}, true);
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, true,
false);
net.AddRandomInput<D, T>("Bias", {output_channels}, true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
......@@ -541,6 +542,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
......@@ -564,6 +567,8 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddIntArg("wino_block_size", wino_blk_size)
.Finalize(net.NewOperatorDef());
......
......@@ -292,7 +292,8 @@ class Deconv2dOp<DeviceType::CPU, float> : public Deconv2dOpBase {
output_data,
output->size(),
activation_,
relux_max_limit_);
relux_max_limit_,
leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS;
}
......@@ -443,7 +444,8 @@ class Deconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
return kernel_->Compute(context, input, filter, bias,
strides_.data(), in_paddings.data(), activation_,
relux_max_limit_, out_shape, output);
relux_max_limit_, leakyrelu_coefficient_,
out_shape, output);
}
private:
......
......@@ -47,7 +47,9 @@ class Deconv2dOpBase : public Operation {
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(
Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(
Operation::GetOptionalArg<float>("leakyrelu_coefficient", 0.0f)) {}
static void CalcDeconvShape_Caffe(
const index_t *input_shape, // NHWC
......@@ -191,6 +193,7 @@ class Deconv2dOpBase : public Operation {
const FrameworkType model_type_;
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
};
template <typename T>
......
......@@ -377,8 +377,9 @@ void TestComplexDeconvNxN(const int batch,
// Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, input_channels});
net.AddRandomInput<D, T>(
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, true);
net.AddRandomInput<D, T>("Bias", {output_channels}, true);
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, true,
false);
net.AddRandomInput<D, T>("Bias", {output_channels}, true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
int out_h = 0;
......@@ -418,6 +419,8 @@ void TestComplexDeconvNxN(const int batch,
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntsArg("padding_values", paddings)
.AddIntArg("framework_type", model_type)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
} else {
......@@ -454,6 +457,8 @@ void TestComplexDeconvNxN(const int batch,
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntsArg("padding_values", paddings)
.AddIntArg("framework_type", model_type)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
} else {
......
......@@ -49,10 +49,13 @@ class DepthwiseConv2dOpBase : public ConvPool2dOpBase {
activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
protected:
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
};
template <DeviceType D, class T>
......@@ -218,7 +221,7 @@ class DepthwiseConv2dOp<DeviceType::CPU, float> : public DepthwiseConv2dOpBase {
}
DoActivation(output_data, output_data, output->size(), activation_,
relux_max_limit_);
relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS;
}
......@@ -524,7 +527,7 @@ class DepthwiseConv2dOp<DeviceType::GPU, T> : public DepthwiseConv2dOpBase {
return kernel_->Compute(context, input, filter, bias,
strides_.data(), padding_type_, paddings_,
dilations_.data(), activation_, relux_max_limit_,
output);
leakyrelu_coefficient_, output);
}
private:
......
......@@ -244,10 +244,10 @@ void TestNxNS12(const index_t height, const index_t width) {
net.AddRandomInput<DeviceType::GPU, float>(
"Input", {batch, height, width, channel});
net.AddRandomInput<DeviceType::GPU, float>(
"Filter", {multiplier, channel, kernel_h, kernel_w}, true);
"Filter", {multiplier, channel, kernel_h, kernel_w}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Bias",
{multiplier * channel},
true);
true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
......@@ -260,8 +260,8 @@ void TestNxNS12(const index_t height, const index_t width) {
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<float>::value))
.AddStringArg("activation", "RELUX")
.AddFloatArg("max_limit", 6.0)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef());
// Run on cpu
......@@ -283,8 +283,8 @@ void TestNxNS12(const index_t height, const index_t width) {
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddStringArg("activation", "RELUX")
.AddFloatArg("max_limit", 6.0)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1)
.Finalize(net.NewOperatorDef());
net.RunOp(DeviceType::GPU);
......
......@@ -281,7 +281,8 @@ class DepthwiseDeconv2dOp<DeviceType::CPU, float>
output_data,
output->size(),
activation_,
relux_max_limit_);
relux_max_limit_,
leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS;
}
......@@ -458,6 +459,7 @@ class DepthwiseDeconv2dOp<DeviceType::GPU, T> : public Deconv2dOpBase {
group_,
activation_,
relux_max_limit_,
leakyrelu_coefficient_,
out_shape,
output);
}
......
......@@ -185,12 +185,13 @@ void RandomTest(index_t batch,
GenerateRandomRealTypeData({multiplier, channel, kernel, kernel},
&filter_data);
net.AddInputFromArray<DeviceType::GPU, float>(
"Filter", {multiplier, channel, kernel, kernel}, filter_data, true);
"Filter", {multiplier, channel, kernel, kernel}, filter_data, true,
false);
std::vector<float> bias_data(channel * multiplier);
GenerateRandomRealTypeData({channel * multiplier}, &bias_data);
net.AddInputFromArray<DeviceType::GPU, float>("Bias",
{channel * multiplier},
bias_data, true);
bias_data, true, false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
......@@ -203,6 +204,8 @@ void RandomTest(index_t batch,
.AddIntsArg("padding_values", {padding, padding})
.AddIntArg("group", channel)
.AddIntsArg("dilations", {1, 1})
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<float>::value))
.Finalize(net.NewOperatorDef());
// Run
......@@ -224,6 +227,8 @@ void RandomTest(index_t batch,
.AddIntsArg("strides", {stride, stride})
.AddIntsArg("padding_values", {padding, padding})
.AddIntArg("group", channel)
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
......
......@@ -41,10 +41,13 @@ class FullyConnectedOpBase : public Operation {
activation_(ops::StringToActivationType(
Operation::GetOptionalArg<std::string>("activation",
"NOOP"))),
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)) {}
relux_max_limit_(Operation::GetOptionalArg<float>("max_limit", 0.0f)),
leakyrelu_coefficient_(Operation::GetOptionalArg<float>(
"leakyrelu_coefficient", 0.0f)) {}
protected:
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
MACE_OP_INPUT_TAGS(INPUT, WEIGHT, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
......@@ -104,7 +107,7 @@ class FullyConnectedOp<DeviceType::CPU, float> : public FullyConnectedOpBase {
}
DoActivation(output_ptr, output_ptr, output->size(), activation_,
relux_max_limit_);
relux_max_limit_, leakyrelu_coefficient_);
return MaceStatus::MACE_SUCCESS;
}
......@@ -226,7 +229,8 @@ class FullyConnectedOp<DeviceType::GPU, T> : public FullyConnectedOpBase {
"The shape of Weight: ", MakeString(weight->shape()),
" don't match.");
return kernel_->Compute(
context, input, weight, bias, activation_, relux_max_limit_, output);
context, input, weight, bias, activation_, relux_max_limit_,
leakyrelu_coefficient_, output);
}
private:
......
......@@ -123,10 +123,11 @@ void Random(const index_t batch,
// Add input data
net.AddRandomInput<DeviceType::GPU, float>("Input",
{batch, height, width, channels});
{batch, height, width, channels}, false, false);
net.AddRandomInput<DeviceType::GPU, float>(
"Weight", {out_channel, channels, height, width}, true);
net.AddRandomInput<DeviceType::GPU, float>("Bias", {out_channel}, true);
"Weight", {out_channel, channels, height, width}, true, false);
net.AddRandomInput<DeviceType::GPU, float>("Bias", {out_channel}, true,
false);
net.TransformDataFormat<DeviceType::CPU, float>("Input", NHWC, "InputNCHW",
NCHW);
......@@ -135,6 +136,8 @@ void Random(const index_t batch,
.Input("Weight")
.Input("Bias")
.Output("OutputNCHW")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.Finalize(net.NewOperatorDef());
// run cpu
......@@ -152,6 +155,8 @@ void Random(const index_t batch,
.Input("Weight")
.Input("Bias")
.Output("Output")
.AddStringArg("activation", "LEAKYRELU")
.AddFloatArg("leakyrelu_coefficient", 0.1f)
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.Finalize(net.NewOperatorDef());
......
......@@ -38,6 +38,7 @@ extern MaceStatus Conv2d1x1(OpContext *context,
const DataType dt,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output,
StatsFuture *future);
......@@ -52,6 +53,7 @@ extern MaceStatus Conv2dGeneral(OpContext *context,
const DataType dt,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output,
StatsFuture *future);
......@@ -81,6 +83,7 @@ class Conv2dKernel : public OpenCLConv2dKernel {
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const int winograd_blk_size,
Tensor *output) override;
......@@ -120,6 +123,7 @@ MaceStatus Conv2dKernel<T>::Compute(
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const int winograd_blk_size,
Tensor *output) {
MACE_UNUSED(winograd_blk_size);
......@@ -221,14 +225,14 @@ MaceStatus Conv2dKernel<T>::Compute(
return conv2d::Conv2d1x1(
context, &kernels_[1], pad_input, filter, bias, strides,
DataTypeToEnum<T>::v(), activation, relux_max_limit,
input_changed, output, &conv_future);
leakyrelu_coefficient, input_changed, output, &conv_future);
};
} else {
conv_func = [&](const Tensor *pad_input, Tensor *output) -> MaceStatus {
return conv2d::Conv2dGeneral(
context, &kernels_[1], pad_input, filter, bias, strides, dilations,
DataTypeToEnum<T>::v(), activation, relux_max_limit,
input_changed, output, &conv_future);
leakyrelu_coefficient, input_changed, output, &conv_future);
};
}
MACE_RETURN_IF_ERROR(conv_func(padded_input_ptr, output));
......
......@@ -32,6 +32,7 @@ MaceStatus Conv2d1x1(OpContext *context,
const DataType dt,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output,
StatsFuture *future) {
......@@ -71,6 +72,9 @@ MaceStatus Conv2d1x1(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -106,6 +110,7 @@ MaceStatus Conv2d1x1(OpContext *context,
kernel->setArg(idx++, strides[0]);
kernel->setArg(idx++, strides[1]);
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, *(output->opencl_buffer()));
}
......
......@@ -33,6 +33,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
const DataType dt,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output,
StatsFuture *future) {
......@@ -76,6 +77,9 @@ MaceStatus Conv2dGeneral(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -120,6 +124,7 @@ MaceStatus Conv2dGeneral(OpContext *context,
kernel->setArg(idx++, static_cast<int32_t>(
dilations[1] * in_channel));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, *(output->opencl_buffer()));
}
......
......@@ -33,6 +33,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const DataType dt,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output,
StatsFuture *future) {
......@@ -76,6 +77,9 @@ MaceStatus DepthwiseConv2d(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -116,6 +120,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
kernel->setArg(idx++, static_cast<int32_t>(
dilations[1] * in_channel));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, *(output->opencl_buffer()));
}
......
......@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const DataType dt,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output,
StatsFuture *future);
......@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel {
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) override;
private:
......@@ -81,6 +83,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) {
StatsFuture pad_future, dw_conv_future;
index_t filter_w = filter->dim(3);
......@@ -175,7 +178,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
depthwise::DepthwiseConv2d(
context, &kernels_[1], padded_input_ptr, filter, bias, strides,
dilations, DataTypeToEnum<T>::v(), activation, relux_max_limit,
input_changed, output, &dw_conv_future));
leakyrelu_coefficient, input_changed, output, &dw_conv_future));
MergeMultipleFutureWaitFn({pad_future, dw_conv_future}, context->future());
return MaceStatus::MACE_SUCCESS;
}
......
......@@ -7,6 +7,7 @@ __kernel void activation(OUT_OF_RANGE_PARAMS
__read_only image2d_t alpha,
#endif
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__write_only image2d_t output) {
const int ch_blk = get_global_id(0);
const int w = get_global_id(1);
......@@ -24,9 +25,9 @@ __kernel void activation(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb));
#ifdef USE_PRELU
DATA_TYPE4 prelu_alpha = READ_IMAGET(alpha, SAMPLER, (int2)(ch_blk, 0));
DATA_TYPE4 out = do_activation(in, prelu_alpha, relux_max_limit);
DATA_TYPE4 out = do_activation(in, prelu_alpha, relux_max_limit, leakyrelu_coefficient);
#else
DATA_TYPE4 out = do_activation(in, relux_max_limit);
DATA_TYPE4 out = do_activation(in, relux_max_limit, leakyrelu_coefficient);
#endif
WRITE_IMAGET(output, (int2)(pos, hb), out);
......
......@@ -11,7 +11,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS
__private const float epsilon,
#endif
__write_only image2d_t output,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int ch_blk = get_global_id(0);
const int w = get_global_id(1);
const int hb = get_global_id(2);
......@@ -43,8 +44,8 @@ __kernel void batch_norm(OUT_OF_RANGE_PARAMS
DATA_TYPE4 in = READ_IMAGET(input, SAMPLER, (int2)(pos, hb));
DATA_TYPE4 out = mad(in, bn_scale, bn_offset);
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out = do_activation(out, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out = do_activation(out, relux_max_limit, leakyrelu_coefficient);
#endif
WRITE_IMAGET(output, (int2)(pos, hb), out);
......
......@@ -86,7 +86,8 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in,
#ifdef USE_PRELU
DATA_TYPE4 prelu_alpha,
#endif
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
DATA_TYPE4 out;
#ifdef USE_RELU
out = fmax(in, (DATA_TYPE)0);
......@@ -104,7 +105,7 @@ inline DATA_TYPE4 do_activation(DATA_TYPE4 in,
out = do_sigmoid(in);
#endif
#ifdef USE_LEAKYRELU
out = fmax(in, (DATA_TYPE)0) * relux_max_limit;
out = select(leakyrelu_coefficient * in, in, in >= (DATA_TYPE)0);
#endif
return out;
}
......
......@@ -9,6 +9,7 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height,
__private const int in_width,
__private const int in_ch_blks,
......@@ -123,11 +124,11 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif
const int out_x_base = mul24(out_ch_blk, out_width);
......
......@@ -9,6 +9,7 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height,
__private const int in_width,
__private const int in_ch_blks,
......@@ -96,11 +97,11 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
filter_x_base += 4;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif
const int out_x_base = mul24(out_ch_blk, width);
......
......@@ -17,6 +17,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private const int stride_h,
__private const int stride_w,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__global OUT_DATA_TYPE *output) {
const int out_wc_blk_idx = get_global_id(0);
const int out_hb_idx = get_global_id(1);
......@@ -79,9 +80,9 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
in_offset += 4;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
#endif
int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx),
......
......@@ -9,6 +9,7 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height,
__private const int in_width,
__private const int in_ch_blks,
......@@ -128,12 +129,12 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
out4 = do_activation(out4, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient);
#endif
const int out_x_base = mul24(out_ch_blk, out_width);
......
......@@ -22,6 +22,7 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private const int dilated_h_offset,
__private const int dilated_w_offset,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__global OUT_DATA_TYPE *output) {
const int out_wc_blk_idx = get_global_id(0);
const int out_hb_idx = get_global_id(1);
......@@ -107,11 +108,11 @@ __kernel void conv2d(BUFFER_OUT_OF_RANGE_PARAMS
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif
int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx),
......
......@@ -9,6 +9,7 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height,
__private const int in_width,
__private const int in_channels,
......@@ -127,12 +128,12 @@ __kernel void deconv_2d(OUT_OF_RANGE_PARAMS
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
out4 = do_activation(out4, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient);
#endif
int2 out_pos;
......
......@@ -10,6 +10,7 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const short in_height,
__private const short in_width,
__private const short in_ch_blks,
......@@ -112,11 +113,11 @@ __kernel void depthwise_conv2d(OUT_OF_RANGE_PARAMS
in_hb_idx += dilation_h;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif
const short out_x_base = mul24(out_ch_blk, out_width);
......@@ -145,6 +146,7 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const DATA_TYPE relux_max_limit,
__private const DATA_TYPE leakyrelu_coefficient,
__private const short in_height,
__private const short in_width,
__private const short in_ch_blks,
......@@ -238,11 +240,11 @@ __kernel void depthwise_conv2d_s1(OUT_OF_RANGE_PARAMS
in_hb_idx += 1;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif
const short out_x_base = mul24(out_ch_blk, out_width);
......
......@@ -22,6 +22,7 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS
__private const int dilated_h_offset,
__private const int dilated_w_offset,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__global OUT_DATA_TYPE *output) {
const int out_wc_blk_idx = get_global_id(0);
const int out_hb_idx = get_global_id(1);
......@@ -85,11 +86,11 @@ __kernel void depthwise_conv2d(BUFFER_OUT_OF_RANGE_PARAMS
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
#endif
int out_offset = mad24(mad24(mad24(batch_idx, out_height, out_height_idx),
......
......@@ -9,6 +9,7 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
#endif
__write_only image2d_t output,
__private const float relux_max_limit,
__private const float leakyrelu_coefficient,
__private const int in_height,
__private const int in_width,
__private const int out_height,
......@@ -108,12 +109,12 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
}
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit);
out1 = do_activation(out1, relux_max_limit);
out2 = do_activation(out2, relux_max_limit);
out3 = do_activation(out3, relux_max_limit);
out4 = do_activation(out4, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0 = do_activation(out0, relux_max_limit, leakyrelu_coefficient);
out1 = do_activation(out1, relux_max_limit, leakyrelu_coefficient);
out2 = do_activation(out2, relux_max_limit, leakyrelu_coefficient);
out3 = do_activation(out3, relux_max_limit, leakyrelu_coefficient);
out4 = do_activation(out4, relux_max_limit, leakyrelu_coefficient);
#endif
......@@ -146,4 +147,4 @@ __kernel void depthwise_deconv2d(OUT_OF_RANGE_PARAMS
out_pos.x += stride_w;
WRITE_IMAGET(output, out_pos, out4);
}
}
\ No newline at end of file
}
......@@ -12,7 +12,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS
__private const int input_height,
__private const int input_width,
__private const int input_channel,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int batch_idx = get_global_id(0);
const int out_blk_idx = get_global_id(1);
const int input_chan_blk = (input_channel + 3) >> 2;
......@@ -56,8 +57,8 @@ __kernel void fully_connected(OUT_OF_RANGE_PARAMS
input_coord.y++;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit, leakyrelu_coefficient);
#endif
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
......@@ -77,7 +78,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS
__private const int input_width,
__private const int in_chan_blks,
__private const int out_blks,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int inter_out_idx = get_global_id(0);
const int width_blk_idx = get_global_id(1);
const int width_blk_count = global_size_dim1;
......@@ -147,8 +149,8 @@ __kernel void fully_connected_width(OUT_OF_RANGE_PARAMS
inter_idx += 4;
}
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
result = do_activation(result, relux_max_limit, leakyrelu_coefficient);
#endif
WRITE_IMAGET(output, (int2)(out_blk_idx, batch_idx), result);
......
......@@ -127,7 +127,8 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
__private const int out_width,
__private const int round_hw,
__private const int round_w,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
......@@ -203,11 +204,11 @@ __kernel void winograd_inverse_transform_2x2(OUT_OF_RANGE_PARAMS
#endif
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
in0[0] = do_activation(in0[0], relux_max_limit);
in0[1] = do_activation(in0[1], relux_max_limit);
in1[0] = do_activation(in1[0], relux_max_limit);
in1[1] = do_activation(in1[1], relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
in0[0] = do_activation(in0[0], relux_max_limit, leakyrelu_coefficient);
in0[1] = do_activation(in0[1], relux_max_limit, leakyrelu_coefficient);
in1[0] = do_activation(in1[0], relux_max_limit, leakyrelu_coefficient);
in1[1] = do_activation(in1[1], relux_max_limit, leakyrelu_coefficient);
#endif
WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]);
......@@ -395,7 +396,8 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
__private const int out_width,
__private const int round_hw,
__private const int round_w,
__private const float relux_max_limit) {
__private const float relux_max_limit,
__private const float leakyrelu_coefficient) {
const int width_idx = get_global_id(0);
const int height_idx = get_global_id(1);
......@@ -515,23 +517,23 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
out3[3] += bias_value;
#endif
#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0[0] = do_activation(out0[0], relux_max_limit);
out0[1] = do_activation(out0[1], relux_max_limit);
out0[2] = do_activation(out0[2], relux_max_limit);
out0[3] = do_activation(out0[3], relux_max_limit);
out1[0] = do_activation(out1[0], relux_max_limit);
out1[1] = do_activation(out1[1], relux_max_limit);
out1[2] = do_activation(out1[2], relux_max_limit);
out1[3] = do_activation(out1[3], relux_max_limit);
out2[0] = do_activation(out2[0], relux_max_limit);
out2[1] = do_activation(out2[1], relux_max_limit);
out2[2] = do_activation(out2[2], relux_max_limit);
out2[3] = do_activation(out2[3], relux_max_limit);
out3[0] = do_activation(out3[0], relux_max_limit);
out3[1] = do_activation(out3[1], relux_max_limit);
out3[2] = do_activation(out3[2], relux_max_limit);
out3[3] = do_activation(out3[3], relux_max_limit);
#if defined(USE_RELU) || defined(USE_LEAKYRELU) || defined(USE_RELUX) || defined(USE_TANH) || defined(USE_SIGMOID)
out0[0] = do_activation(out0[0], relux_max_limit, leakyrelu_coefficient);
out0[1] = do_activation(out0[1], relux_max_limit, leakyrelu_coefficient);
out0[2] = do_activation(out0[2], relux_max_limit, leakyrelu_coefficient);
out0[3] = do_activation(out0[3], relux_max_limit, leakyrelu_coefficient);
out1[0] = do_activation(out1[0], relux_max_limit, leakyrelu_coefficient);
out1[1] = do_activation(out1[1], relux_max_limit, leakyrelu_coefficient);
out1[2] = do_activation(out1[2], relux_max_limit, leakyrelu_coefficient);
out1[3] = do_activation(out1[3], relux_max_limit, leakyrelu_coefficient);
out2[0] = do_activation(out2[0], relux_max_limit, leakyrelu_coefficient);
out2[1] = do_activation(out2[1], relux_max_limit, leakyrelu_coefficient);
out2[2] = do_activation(out2[2], relux_max_limit, leakyrelu_coefficient);
out2[3] = do_activation(out2[3], relux_max_limit, leakyrelu_coefficient);
out3[0] = do_activation(out3[0], relux_max_limit, leakyrelu_coefficient);
out3[1] = do_activation(out3[1], relux_max_limit, leakyrelu_coefficient);
out3[2] = do_activation(out3[2], relux_max_limit, leakyrelu_coefficient);
out3[3] = do_activation(out3[3], relux_max_limit, leakyrelu_coefficient);
#endif
const int num = min(4, out_width - out_width_idx);
......@@ -556,4 +558,4 @@ __kernel void winograd_inverse_transform_4x4(OUT_OF_RANGE_PARAMS
for (int i = 0; i < num; ++i) {
WRITE_IMAGET(output, (int2)(coord_x + i, coord_y + 3), out3[i]);
}
}
\ No newline at end of file
}
......@@ -45,6 +45,7 @@ class OpenCLConv2dKernel {
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const int winograd_blk_size,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLConv2dKernel);
......
......@@ -36,6 +36,7 @@ class OpenCLDeconv2dKernel {
const int *padding_data,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDeconv2dKernel);
......
......@@ -38,6 +38,7 @@ class OpenCLDepthwiseConv2dKernel {
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseConv2dKernel);
};
......
......@@ -39,6 +39,7 @@ class OpenCLDepthwiseDeconv2dKernel {
const int group,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector <index_t> &output_shape,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLDepthwiseDeconv2dKernel);
......
......@@ -35,6 +35,7 @@ class OpenCLFullyConnectedKernel {
const Tensor *bias,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) = 0;
MACE_EMPTY_VIRTUAL_DESTRUCTOR(OpenCLFullyConnectedKernel);
};
......
......@@ -35,8 +35,10 @@ template <typename T>
class ActivationKernel : public OpenCLActivationKernel {
public:
ActivationKernel(ActivationType type,
T relux_max_limit)
: activation_(type), relux_max_limit_(relux_max_limit) {}
T relux_max_limit,
T leakyrelu_coefficient)
: activation_(type), relux_max_limit_(relux_max_limit),
leakyrelu_coefficient_(leakyrelu_coefficient) {}
MaceStatus Compute(
OpContext *context,
......@@ -47,6 +49,7 @@ class ActivationKernel : public OpenCLActivationKernel {
private:
ActivationType activation_;
T relux_max_limit_;
T leakyrelu_coefficient_;
cl::Kernel kernel_;
uint32_t kwg_size_;
std::vector<index_t> input_shape_;
......@@ -128,6 +131,7 @@ MaceStatus ActivationKernel<T>::Compute(
kernel_.setArg(idx++, *(alpha->opencl_image()));
}
kernel_.setArg(idx++, static_cast<float>(relux_max_limit_));
kernel_.setArg(idx++, static_cast<float>(leakyrelu_coefficient_));
kernel_.setArg(idx++, *(output->opencl_image()));
input_shape_ = input->shape();
......
......@@ -37,7 +37,8 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
BatchNormKernel(
const float epsilon,
const ActivationType activation,
const float relux_max_limit);
const float relux_max_limit,
const float leakyrelu_coefficient);
MaceStatus Compute(OpContext *context,
const Tensor *input,
const Tensor *scale,
......@@ -50,6 +51,7 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
const float epsilon_;
const ActivationType activation_;
const float relux_max_limit_;
const float leakyrelu_coefficient_;
cl::Kernel kernel_;
uint32_t kwg_size_;
std::vector<index_t> input_shape_;
......@@ -58,10 +60,12 @@ class BatchNormKernel : public OpenCLBatchNormKernel {
template <typename T>
BatchNormKernel<T>::BatchNormKernel(const float epsilon,
const ActivationType activation,
const float relux_max_limit)
const float relux_max_limit,
const float leakyrelu_coefficient)
: epsilon_(epsilon),
activation_(activation),
relux_max_limit_(relux_max_limit) {}
relux_max_limit_(relux_max_limit),
leakyrelu_coefficient_(leakyrelu_coefficient) {}
template <typename T>
MaceStatus BatchNormKernel<T>::Compute(
......@@ -115,6 +119,9 @@ MaceStatus BatchNormKernel<T>::Compute(
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation_;
}
......@@ -140,6 +147,7 @@ MaceStatus BatchNormKernel<T>::Compute(
}
kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit_);
kernel_.setArg(idx++, leakyrelu_coefficient_);
input_shape_ = input->shape();
}
......
......@@ -38,6 +38,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -53,6 +54,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -68,6 +70,7 @@ extern MaceStatus Conv2d(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -81,6 +84,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
const int *padding,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
const int wino_blk_size,
std::vector<index_t> *prev_input_shape,
......@@ -109,6 +113,7 @@ class Conv2dKernel : public OpenCLConv2dKernel {
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const int wino_blk_size,
Tensor *output) override;
......@@ -169,6 +174,7 @@ MaceStatus Conv2dKernel<T>::Compute(
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const int wino_blk_size,
Tensor *output) {
index_t kernel_h = filter->dim(2);
......@@ -217,6 +223,7 @@ MaceStatus Conv2dKernel<T>::Compute(
paddings.data(),
activation,
relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value,
wino_blk_size,
&input_shape_,
......@@ -235,6 +242,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations,
activation,
relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value,
&input_shape_,
output,
......@@ -252,6 +260,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations,
activation,
relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value,
&input_shape_,
output,
......@@ -269,6 +278,7 @@ MaceStatus Conv2dKernel<T>::Compute(
dilations,
activation,
relux_max_limit,
leakyrelu_coefficient,
DataTypeToEnum<T>::value,
&input_shape_,
output,
......
......@@ -76,6 +76,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -125,6 +126,9 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -154,6 +158,7 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
kernel->setArg(idx++, *(output->opencl_image()));
// FIXME handle flexable data type: half not supported
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<int>(input_height));
kernel->setArg(idx++, static_cast<int>(input_width));
kernel->setArg(idx++, static_cast<int>(input_channel_blocks));
......
......@@ -69,6 +69,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -110,6 +111,9 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -138,6 +142,7 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
}
kernel->setArg(idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<int>(input->dim(1)));
kernel->setArg(idx++, static_cast<int>(input->dim(2)));
kernel->setArg(idx++, static_cast<int>(input_channel_blocks));
......
......@@ -77,6 +77,7 @@ extern MaceStatus Conv2d(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -118,6 +119,9 @@ extern MaceStatus Conv2d(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -146,6 +150,7 @@ extern MaceStatus Conv2d(OpContext *context,
}
kernel->setArg(idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(1)));
kernel->setArg(idx++, static_cast<uint32_t>(input->dim(2)));
kernel->setArg(idx++, static_cast<uint32_t>(input_channel_blocks));
......
......@@ -42,6 +42,7 @@ class Deconv2dKernel : public OpenCLDeconv2dKernel {
const int *padding_data,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape,
Tensor *output) override;
......@@ -61,6 +62,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
const int *padding_data,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape,
Tensor *output) {
std::vector<size_t> output_image_shape;
......@@ -119,6 +121,9 @@ MaceStatus Deconv2dKernel<T>::Compute(
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -146,6 +151,7 @@ MaceStatus Deconv2dKernel<T>::Compute(
}
kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit);
kernel_.setArg(idx++, leakyrelu_coefficient);
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(3)));
......
......@@ -73,6 +73,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -126,6 +127,9 @@ MaceStatus DepthwiseConv2d(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -159,6 +163,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
}
kernel->setArg(idx++, *(output->opencl_image()));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
kernel->setArg(idx++, static_cast<int16_t>(input_height));
kernel->setArg(idx++, static_cast<int16_t>(input_width));
kernel->setArg(idx++, static_cast<int16_t>(input_channel_blocks));
......
......@@ -39,6 +39,7 @@ MaceStatus DepthwiseConv2d(OpContext *context,
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
std::vector<index_t> *prev_input_shape,
Tensor *output,
......@@ -60,6 +61,7 @@ class DepthwiseConv2dKernel : public OpenCLDepthwiseConv2dKernel {
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) override;
private:
......@@ -80,6 +82,7 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
const int *dilations,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) {
index_t kernel_h = filter->dim(2);
index_t kernel_w = filter->dim(3);
......@@ -118,8 +121,8 @@ MaceStatus DepthwiseConv2dKernel<T>::Compute(
return depthwise::DepthwiseConv2d(
context, &kernel_, input, filter, bias, strides[0], paddings.data(),
dilations, activation, relux_max_limit, DataTypeToEnum<T>::value,
&input_shape_, output, &kwg_size_);
dilations, activation, relux_max_limit, leakyrelu_coefficient,
DataTypeToEnum<T>::value, &input_shape_, output, &kwg_size_);
}
} // namespace image
......
......@@ -43,6 +43,7 @@ class DepthwiseDeconv2dKernel : public OpenCLDepthwiseDeconv2dKernel {
const int group,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape,
Tensor *output) override;
......@@ -63,6 +64,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
const int group,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const std::vector<index_t> &output_shape,
Tensor *output) {
const index_t batch = output_shape[0];
......@@ -125,6 +127,9 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -152,6 +157,7 @@ MaceStatus DepthwiseDeconv2dKernel<T>::Compute(
}
kernel_.setArg(idx++, *(output->opencl_image()));
kernel_.setArg(idx++, relux_max_limit);
kernel_.setArg(idx++, leakyrelu_coefficient);
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(1)));
kernel_.setArg(idx++, static_cast<int32_t>(input->dim(2)));
kernel_.setArg(idx++, static_cast<int32_t>(height));
......
......@@ -40,6 +40,7 @@ class FullyConnectedKernel : public OpenCLFullyConnectedKernel {
const Tensor *bias,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) override;
private:
......@@ -57,6 +58,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
const Tensor *bias,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
Tensor *output) {
std::vector<index_t> output_shape = {input->dim(0), 1, 1, weight->dim(0)};
std::vector<size_t> output_image_shape;
......@@ -98,6 +100,9 @@ MaceStatus FullyConnectedKernel<T>::Compute(
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -148,6 +153,7 @@ MaceStatus FullyConnectedKernel<T>::Compute(
kernel_.setArg(idx++, static_cast<int>(RoundUpDiv4(input->dim(3))));
kernel_.setArg(idx++, static_cast<int>(output_blocks));
kernel_.setArg(idx++, relux_max_limit);
kernel_.setArg(idx++, leakyrelu_coefficient);
input_shape_ = input->shape();
}
......
......@@ -115,6 +115,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
const int wino_blk_size,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const bool input_changed,
Tensor *output_tensor,
uint32_t *kwg_size,
......@@ -164,6 +165,9 @@ MaceStatus WinogradOutputTransform(OpContext *context,
case SIGMOID:
built_options.emplace("-DUSE_SIGMOID");
break;
case LEAKYRELU:
built_options.emplace("-DUSE_LEAKYRELU");
break;
default:
LOG(FATAL) << "Unknown activation type: " << activation;
}
......@@ -199,6 +203,7 @@ MaceStatus WinogradOutputTransform(OpContext *context,
kernel->setArg(idx++, static_cast<uint32_t>(round_h * round_w));
kernel->setArg(idx++, static_cast<uint32_t>(round_w));
kernel->setArg(idx++, relux_max_limit);
kernel->setArg(idx++, leakyrelu_coefficient);
}
const std::vector<uint32_t> lws = {*kwg_size / 8, 8, 0};
std::string tuning_key =
......@@ -222,6 +227,7 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
const int *paddings,
const ActivationType activation,
const float relux_max_limit,
const float leakyrelu_coefficient,
const DataType dt,
const int wino_blk_size,
std::vector<index_t> *prev_input_shape,
......@@ -338,7 +344,8 @@ extern MaceStatus WinogradConv2dK3x3S1(OpContext *context,
MACE_RETURN_IF_ERROR(WinogradOutputTransform(
context, kernels[2], mm_output.get(), bias,
dt, round_h, round_w, wino_blk_size, activation, relux_max_limit,
input_changed, output, kwg_size[2], &t_output_future))
leakyrelu_coefficient, input_changed, output, kwg_size[2],
&t_output_future))
MergeMultipleFutureWaitFn({t_input_future, mm_future, t_output_future},
context->future());
......
......@@ -168,6 +168,7 @@ class MaceKeyword(object):
mace_element_type_str = 'type'
mace_activation_type_str = 'activation'
mace_activation_max_limit_str = 'max_limit'
mace_activation_leakyrelu_coefficient_str = 'leakyrelu_coefficient'
mace_resize_size_str = 'size'
mace_batch_to_space_crops_str = 'crops'
mace_paddings_str = 'paddings'
......
......@@ -493,6 +493,14 @@ class CaffeConverter(base_converter.ConverterInterface):
mace_pb2.DT_FLOAT, alpha_data)
op.input.extend([alpha_tensor_name])
negative_slope = caffe_op.layer.relu_param.negative_slope
if caffe_op.type == 'ReLU' and negative_slope != 0:
param_arg = op.arg.add()
param_arg.name = MaceKeyword.mace_activation_leakyrelu_coefficient_str # noqa
param_arg.f = caffe_op.layer.relu_param.negative_slope
type_arg.s = six.b(ActivationType.LEAKYRELU.name)
def convert_folded_batchnorm(self, caffe_op):
op = self.convert_general_op(caffe_op)
op.type = MaceOp.BatchNorm.name
......
......@@ -286,10 +286,10 @@ class OnnxConverter(base_converter.ConverterInterface):
activation_type = {
OnnxOpType.Relu.name: ActivationType.RELU,
OnnxOpType.LeakyRelu.name: ActivationType.LEAKYRELU,
OnnxOpType.PRelu.name: ActivationType.PRELU,
OnnxOpType.Tanh.name: ActivationType.TANH,
OnnxOpType.Sigmoid.name: ActivationType.SIGMOID,
OnnxOpType.LeakyRelu.name: ActivationType.LEAKYRELU,
}
def __init__(self, option, src_model_file):
......
......@@ -898,7 +898,9 @@ class Transformer(base_converter.ConverterInterface):
op.output[0] = consumer_op.output[0]
for arg in consumer_op.arg:
if arg.name == MaceKeyword.mace_activation_type_str \
or arg.name == MaceKeyword.mace_activation_max_limit_str: # noqa
or arg.name == \
MaceKeyword.mace_activation_max_limit_str \
or arg.name == MaceKeyword.mace_activation_leakyrelu_coefficient_str: # noqa
op.arg.extend([arg])
self.replace_quantize_info(op, consumer_op)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册