diff --git a/mace/core/operator.cc b/mace/core/operator.cc index 94e4f22fc1fac73400b7b0fec0f4d466d2731113..7c4d922ba8065b31ca84812d07de7ad0a771a792 100644 --- a/mace/core/operator.cc +++ b/mace/core/operator.cc @@ -77,7 +77,6 @@ extern void Register_Pooling(OperatorRegistry *op_registry); extern void Register_ResizeBilinear(OperatorRegistry *op_registry); extern void Register_Softmax(OperatorRegistry *op_registry); extern void Register_SpaceToBatchND(OperatorRegistry *op_registry); -extern void Register_FoldedBatchNorm(OperatorRegistry *op_registry); extern void Register_GEMM(OperatorRegistry *op_registry); extern void Register_WinogradTransform(OperatorRegistry *op_registry); extern void Register_WinogradInverseTransform(OperatorRegistry *op_registry); @@ -101,7 +100,6 @@ OperatorRegistry::OperatorRegistry() { Register_ResizeBilinear(this); Register_Softmax(this); Register_SpaceToBatchND(this); - Register_FoldedBatchNorm(this); Register_GEMM(this); Register_WinogradTransform(this); Register_WinogradInverseTransform(this); diff --git a/mace/core/registry.h b/mace/core/registry.h index 5c82ef2e4b60ce3645e6c5708a2b7442f9e8a85e..5a233bcd88815a12e533049dae552b4b93434d9c 100644 --- a/mace/core/registry.h +++ b/mace/core/registry.h @@ -19,7 +19,7 @@ class Registry { void Register(const SrcType &key, Creator creator) { VLOG(2) << "Registering: " << key; std::lock_guard lock(register_mutex_); - MACE_CHECK(registry_.count(key) == 0, "Key already registered."); + MACE_CHECK(registry_.count(key) == 0, "Key already registered: ", key); registry_[key] = creator; } diff --git a/mace/kernels/opencl/cl/winograd_transform.cl b/mace/kernels/opencl/cl/winograd_transform.cl index e5f43411434b05c0c67717dac209ab3b429f3cac..daecd39f9d5e4e45f835166e86b1daba8428574b 100644 --- a/mace/kernels/opencl/cl/winograd_transform.cl +++ b/mace/kernels/opencl/cl/winograd_transform.cl @@ -107,17 +107,35 @@ __kernel void winograd_transform_2x2(__read_only image2d_t input, } __kernel void winograd_inverse_transform_2x2(__read_only image2d_t input, +#ifdef BIAS + __read_only image2d_t bias, /* cout%4 * cout/4 */ +#endif __write_only image2d_t output, __private const int out_height, __private const int out_width, __private const int round_hw, - __private const int round_w) { + __private const int round_w, + __private const DATA_TYPE relux_max_limit, + __private const DATA_TYPE prelu_alpha) { const int width_idx = get_global_id(0); const int height_idx = get_global_id(1); const int out_channel = get_global_size(1); int width = width_idx; int height = height_idx; + const int batch = width_idx / round_hw; + int t = width_idx % round_hw; + const int out_height_idx = (t / round_w) << 1; + const int out_width_idx = (t % round_w) << 1; + const int out_chan_idx = height_idx; + const int coord_x = mad24(out_chan_idx, out_width, out_width_idx); + const int coord_y = mad24(batch, out_height, out_height_idx); + +#ifdef BIAS + DATA_TYPE4 bias_value = + READ_IMAGET(bias, SAMPLER, (int2)(out_chan_idx, 0)); +#endif + DATA_TYPE4 in0[4], in1[4], in2[4], in3[4]; #pragma unroll @@ -157,13 +175,20 @@ __kernel void winograd_inverse_transform_2x2(__read_only image2d_t input, in1[0] = in1[0] + in1[1] + in1[2]; in1[1] = in1[1] - in1[2] - in1[3]; - const int batch = width_idx / round_hw; - int t = width_idx % round_hw; - const int out_height_idx = (t / round_w) << 1; - const int out_width_idx = (t % round_w) << 1; - const int out_chan_idx = height_idx; - const int coord_x = mad24(out_chan_idx, out_width, out_width_idx); - const int coord_y = mad24(batch, out_height, out_height_idx); +#ifdef BIAS + in0[0] += bias_value; + in0[1] += bias_value; + in1[0] += bias_value; + in1[1] += bias_value; +#endif + + +#if defined(USE_RELU) || defined(USE_RELUX) || defined(USE_PRELU) || defined(USE_TANH) || defined(USE_SIGMOID) + in0[0] = do_activation(in0[0], relux_max_limit, prelu_alpha); + in0[1] = do_activation(in0[1], relux_max_limit, prelu_alpha); + in1[0] = do_activation(in1[0], relux_max_limit, prelu_alpha); + in1[1] = do_activation(in1[1], relux_max_limit, prelu_alpha); +#endif WRITE_IMAGET(output, (int2)(coord_x, coord_y), in0[0]); diff --git a/mace/kernels/opencl/winograd_transform.cc b/mace/kernels/opencl/winograd_transform.cc index 78ce863a92124ead787fe9bf57c27b551823248f..10c31c7fe7615c7b43e07e07f138fbf30106c214 100644 --- a/mace/kernels/opencl/winograd_transform.cc +++ b/mace/kernels/opencl/winograd_transform.cc @@ -109,6 +109,7 @@ void WinogradTransformFunctor::operator()(const Tensor *i template void WinogradInverseTransformFunctor::operator()(const Tensor *input_tensor, + const Tensor *bias, Tensor *output_tensor, StatsFuture *future) { std::vector output_shape = {batch_, height_, width_, input_tensor->dim(1)}; @@ -121,10 +122,29 @@ void WinogradInverseTransformFunctor::operator()(const Te built_options.emplace("-Dwinograd_inverse_transform_2x2=" + obfuscated_kernel_name); built_options.emplace("-DDATA_TYPE=" + DtToUpstreamCLDt(DataTypeToEnum::value)); built_options.emplace("-DCMD_DATA_TYPE=" + DtToUpstreamCLCMDDt(DataTypeToEnum::value)); - if ((input_tensor->dim(1) % 4 == 0 || input_tensor->dim(0) == 1) && - input_tensor->dim(2) % 4 == 0) { - built_options.emplace("-DDIVISIBLE_FOUR"); + built_options.emplace(bias != nullptr ? "-DBIAS" : ""); + switch (activation_) { + case NOOP: + break; + case RELU: + built_options.emplace("-DUSE_RELU"); + break; + case RELUX: + built_options.emplace("-DUSE_RELUX"); + break; + case PRELU: + built_options.emplace("-DUSE_PRELU"); + break; + case TANH: + built_options.emplace("-DUSE_TANH"); + break; + case SIGMOID: + built_options.emplace("-DUSE_SIGMOID"); + break; + defeult: + LOG(FATAL) << "Unknown activation type: " << activation_; } + auto runtime = OpenCLRuntime::Global(); auto wino_kernel = runtime->BuildKernel("winograd_transform", obfuscated_kernel_name, @@ -134,11 +154,16 @@ void WinogradInverseTransformFunctor::operator()(const Te const uint32_t round_w = (width_ + 1) / 2; uint32_t idx = 0; wino_kernel.setArg(idx++, *(static_cast(input_tensor->buffer()))); + if (bias != nullptr) { + wino_kernel.setArg(idx++, *(static_cast(bias->buffer()))); + } wino_kernel.setArg(idx++, *(static_cast(output_tensor->buffer()))); wino_kernel.setArg(idx++, static_cast(output_shape[1])); wino_kernel.setArg(idx++, static_cast(output_shape[2])); wino_kernel.setArg(idx++, static_cast(round_h * round_w)); wino_kernel.setArg(idx++, static_cast(round_w)); + wino_kernel.setArg(idx++, relux_max_limit_); + wino_kernel.setArg(idx++, prelu_alpha_); const size_t gws[2] = {static_cast(input_tensor->dim(2)), static_cast(RoundUpDiv4(input_tensor->dim(1)))}; diff --git a/mace/kernels/winograd_transform.h b/mace/kernels/winograd_transform.h index 62284a074048e4ffa406e0d31b9fc8972bc4fe70..a71bda24b120f3eab77171dd2836c606151b6486 100644 --- a/mace/kernels/winograd_transform.h +++ b/mace/kernels/winograd_transform.h @@ -8,6 +8,7 @@ #include "mace/core/future.h" #include "mace/core/tensor.h" #include "mace/kernels/conv_pool_2d_util.h" +#include "mace/kernels/activation.h" namespace mace { namespace kernels { @@ -47,22 +48,37 @@ struct WinogradTransformFunctor : WinogradTransformFuncto struct WinogradInverseTransformFunctorBase { WinogradInverseTransformFunctorBase(const int batch, const int height, - const int width) - : batch_(batch), height_(height), width_(width) {} + const int width, + const ActivationType activation, + const float relux_max_limit, + const float prelu_alpha) + : batch_(batch), + height_(height), + width_(width), + activation_(activation), + relux_max_limit_(relux_max_limit), + prelu_alpha_(prelu_alpha) {} const int batch_; const int height_; const int width_; + const ActivationType activation_; + const float relux_max_limit_; + const float prelu_alpha_; }; template struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { WinogradInverseTransformFunctor(const int batch, const int height, - const int width) - : WinogradInverseTransformFunctorBase(batch, height, width) {} + const int width, + const ActivationType activation, + const float relux_max_limit, + const float prelu_alpha) + : WinogradInverseTransformFunctorBase(batch, height, width, activation, relux_max_limit, prelu_alpha) {} void operator()(const Tensor *input, + const Tensor *bias, Tensor *output, StatsFuture *future) { MACE_NOT_IMPLEMENTED; @@ -74,10 +90,14 @@ template struct WinogradInverseTransformFunctor : WinogradInverseTransformFunctorBase { WinogradInverseTransformFunctor(const int batch, const int height, - const int width) - : WinogradInverseTransformFunctorBase(batch, height, width) {} + const int width, + const ActivationType activation, + const float relux_max_limit, + const float prelu_alpha) + : WinogradInverseTransformFunctorBase(batch, height, width, activation, relux_max_limit, prelu_alpha) {} void operator()(const Tensor *input, + const Tensor *bias, Tensor *output, StatsFuture *future); }; diff --git a/mace/ops/activation_benchmark.cc b/mace/ops/activation_benchmark.cc index 63d0cf7fa3dd6545d98db7a3c834ac065268eead..8010bc24dea8effe2750826e9d1c2bc8bb99fe9e 100644 --- a/mace/ops/activation_benchmark.cc +++ b/mace/ops/activation_benchmark.cc @@ -20,7 +20,7 @@ static void ReluBenchmark( if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluBM") .Input("InputImage") @@ -79,7 +79,7 @@ static void ReluxBenchmark( if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluxBM") .Input("InputImage") @@ -140,7 +140,7 @@ static void PreluBenchmark( if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "PreluBM") .Input("InputImage") @@ -201,7 +201,7 @@ static void TanhBenchmark( if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "TanhBM") .Input("InputImage") @@ -260,7 +260,7 @@ static void SigmoidBenchmark( if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "SigmoidBM") .Input("InputImage") diff --git a/mace/ops/activation_test.cc b/mace/ops/activation_test.cc index e99579ab4bd7be3bc9f4af17351174284ef53acd..2fd1078c88ef3151268c2ff548a281bfb1bf3b3e 100644 --- a/mace/ops/activation_test.cc +++ b/mace/ops/activation_test.cc @@ -20,7 +20,7 @@ void TestSimpleRelu() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluTest") .Input("InputImage") @@ -33,7 +33,7 @@ void TestSimpleRelu() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluTest") .Input("Input") @@ -70,7 +70,7 @@ void TestUnalignedSimpleRelu() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluTest") .Input("InputImage") @@ -83,7 +83,7 @@ void TestUnalignedSimpleRelu() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluTest") .Input("Input") @@ -125,7 +125,7 @@ void TestSimpleRelux() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluxTest") .Input("InputImage") @@ -139,7 +139,7 @@ void TestSimpleRelux() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluxTest") .Input("Input") @@ -179,7 +179,7 @@ void TestSimpleReluRelux() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "ReluxTest") .Input("InputImage") @@ -193,7 +193,7 @@ void TestSimpleReluRelux() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "ReluxTest") .Input("Input") @@ -237,7 +237,7 @@ void TestSimplePrelu() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "PreluTest") .Input("InputImage") @@ -251,7 +251,7 @@ void TestSimplePrelu() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "PreluTest") .Input("Input") @@ -293,7 +293,7 @@ void TestSimpleTanh() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "TanhTest") .Input("InputImage") @@ -306,7 +306,7 @@ void TestSimpleTanh() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "TanhTest") .Input("Input") @@ -348,7 +348,7 @@ void TestSimpleSigmoid() { if (D == DeviceType::OPENCL) { BufferToImage(net, "Input", "InputImage", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); OpDefBuilder("Activation", "SigmoidTest") .Input("InputImage") @@ -361,7 +361,7 @@ void TestSimpleSigmoid() { // Transfer output ImageToBuffer(net, "OutputImage", "Output", - kernels::BufferType::IN_OUT); + kernels::BufferType::IN_OUT_CHANNEL); } else { OpDefBuilder("Activation", "SigmoidTest") .Input("Input") diff --git a/mace/ops/conv_2d_benchmark.cc b/mace/ops/conv_2d_benchmark.cc index 713e08ebb48d7455881209122a601b6b8e7d7c3a..b02eb17e63d5e1e3cf126b6be0284bdb9f73d954 100644 --- a/mace/ops/conv_2d_benchmark.cc +++ b/mace/ops/conv_2d_benchmark.cc @@ -96,17 +96,18 @@ static void Conv2d(int iters, BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL); // ICNet -//BM_CONV_2D(1, 512, 15, 15, 1, 1, 1, VALID, 1024, half); +BM_CONV_2D(1, 512, 15, 15, 1, 1, 1, VALID, 1024, half); //// SNPE GPU ExecutionDuration = 448us, % ALU Utilization = 105 -//BM_CONV_2D(1, 64, 60, 60, 1, 1, 1, VALID, 128, half); +BM_CONV_2D(1, 64, 60, 60, 1, 1, 1, VALID, 128, half); //// SNPE GPU ExecutionDuration = 258us, % ALU Utilization = 108 -//BM_CONV_2D(1, 32, 60, 60, 1, 1, 1, VALID, 128, half); -// -//BM_CONV_2D(1, 128, 60, 60, 3, 3, 1, VALID, 128, half); +BM_CONV_2D(1, 32, 60, 60, 1, 1, 1, VALID, 128, half); + +BM_CONV_2D(1, 128, 60, 60, 3, 3, 1, VALID, 128, half); //// SNPE GPU ExecutionDuration = 506us, % ALU Utilization = 106.8 -//BM_CONV_2D(1, 32, 60, 60, 3, 3, 1, SAME, 32, half); -//BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half); -//BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half); +BM_CONV_2D(1, 32, 60, 60, 3, 3, 1, SAME, 32, half); +BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half); +BM_CONV_2D(1, 512, 64, 64, 1, 1, 1, SAME, 256, half); + BM_CONV_2D(1, 128, 16, 16, 3, 3, 1, VALID, 32, half); BM_CONV_2D(1, 128, 64, 64, 3, 3, 1, VALID, 32, half); BM_CONV_2D(1, 128, 128, 128, 3, 3, 1, VALID, 32, half); diff --git a/mace/ops/folded_batch_norm.cc b/mace/ops/folded_batch_norm.cc index 5a04c48dd8f2000c9a33b175ec5c67f4c4aebe81..9915bee4128f1e3766a91070d1cae48e044f459f 100644 --- a/mace/ops/folded_batch_norm.cc +++ b/mace/ops/folded_batch_norm.cc @@ -7,10 +7,11 @@ namespace mace { void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { - REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") - .Device(DeviceType::CPU) - .TypeConstraint("T") - .Build(), + REGISTER_OPERATOR(op_registry, + OpKeyBuilder("FoldedBatchNorm") + .Device(DeviceType::CPU) + .TypeConstraint("T") + .Build(), FoldedBatchNormOp); #if MACE_ENABLE_NEON @@ -21,16 +22,18 @@ void Register_FoldedBatchNorm(OperatorRegistry *op_registry) { FoldedBatchNormOp); #endif // MACE_ENABLE_NEON - REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") - .Device(DeviceType::OPENCL) - .TypeConstraint("T") - .Build(), + REGISTER_OPERATOR(op_registry, + OpKeyBuilder("FoldedBatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), FoldedBatchNormOp); - REGISTER_OPERATOR(op_registry, OpKeyBuilder("FoldedBatchNorm") - .Device(DeviceType::OPENCL) - .TypeConstraint("T") - .Build(), + REGISTER_OPERATOR(op_registry, + OpKeyBuilder("FoldedBatchNorm") + .Device(DeviceType::OPENCL) + .TypeConstraint("T") + .Build(), FoldedBatchNormOp); } diff --git a/mace/ops/winograd_convolution_test.cc b/mace/ops/winograd_convolution_test.cc index 3aa058a560742f0eb27c792541e55a8f8dc03f8a..d5e54a9746186cf64a9164d90b2e8d863af87a42 100644 --- a/mace/ops/winograd_convolution_test.cc +++ b/mace/ops/winograd_convolution_test.cc @@ -11,7 +11,6 @@ namespace mace { class WinogradConvlutionTest : public OpsTestBase {}; - void TransposeFilter(const std::vector &input, const std::vector &input_shape, std::vector &output) { @@ -48,14 +47,18 @@ void WinogradConvolution(const index_t batch, GenerateRandomRealTypeData(filter_shape, filter_data); net.AddRandomInput("Input", {batch, height, width, in_channels}); net.AddInputFromArray("Filter", filter_shape, filter_data); + net.AddRandomInput("Bias", {out_channels}); BufferToImage(net, "Input", "InputImage", kernels::BufferType::IN_OUT_CHANNEL); BufferToImage(net, "Filter", "FilterImage", kernels::BufferType::FILTER); + BufferToImage(net, "Bias", "BiasImage", + kernels::BufferType::ARGUMENT); OpDefBuilder("Conv2D", "Conv2dTest") .Input("InputImage") .Input("FilterImage") + .Input("BiasImage") .Output("OutputImage") .AddIntsArg("strides", {1, 1}) .AddIntArg("padding", padding) @@ -102,6 +105,7 @@ void WinogradConvolution(const index_t batch, // Inverse transform OpDefBuilder("WinogradInverseTransform", "WinogradInverseTransformTest") .Input("WinoGemm") + .Input("BiasImage") .AddIntArg("batch", batch) .AddIntArg("height", output_shape[1]) .AddIntArg("width", output_shape[2]) @@ -113,7 +117,7 @@ void WinogradConvolution(const index_t batch, net.Sync(); ImageToBuffer(net, "WinoOutputImage", "WinoOutput", - kernels::BufferType::IN_OUT_CHANNEL); + kernels::BufferType::IN_OUT_CHANNEL); if (DataTypeToEnum::value == DataType::DT_HALF) { ExpectTensorNear(expected, *net.GetOutput("WinoOutput"), 1e-1); } else { @@ -121,7 +125,6 @@ void WinogradConvolution(const index_t batch, } } - TEST_F(WinogradConvlutionTest, AlignedConvolution) { WinogradConvolution(1, 32, 32, 32, 16, Padding::VALID); WinogradConvolution(1, 32, 32, 32, 16, Padding::SAME); diff --git a/mace/ops/winograd_inverse_transform.h b/mace/ops/winograd_inverse_transform.h index c620246cd74b1c2d63488701cfd3e36151f5ed8b..4c20769f1fd461f393c1c57e58bc5f089197ed7c 100644 --- a/mace/ops/winograd_inverse_transform.h +++ b/mace/ops/winograd_inverse_transform.h @@ -9,6 +9,7 @@ #include "mace/core/operator.h" #include "mace/kernels/winograd_transform.h" +#include "mace/kernels/activation.h" namespace mace { @@ -19,13 +20,18 @@ class WinogradInverseTransformOp : public Operator { : Operator(op_def, ws), functor_(OperatorBase::GetSingleArgument("batch", 1), OperatorBase::GetSingleArgument("height", 0), - OperatorBase::GetSingleArgument("width", 0)) {} + OperatorBase::GetSingleArgument("width", 0), + kernels::StringToActivationType( + OperatorBase::GetSingleArgument("activation", + "NOOP")), + OperatorBase::GetSingleArgument("max_limit", 0.0f), + OperatorBase::GetSingleArgument("alpha", 0.0f)) {} bool Run(StatsFuture *future) override { const Tensor *input_tensor = this->Input(INPUT); + const Tensor *bias = this->InputSize() == 2 ? this->Input(BIAS) : nullptr; Tensor *output_tensor = this->Output(OUTPUT); - - functor_(input_tensor, output_tensor, future); + functor_(input_tensor, bias, output_tensor, future); return true; } @@ -33,7 +39,7 @@ class WinogradInverseTransformOp : public Operator { kernels::WinogradInverseTransformFunctor functor_; protected: - OP_INPUT_TAGS(INPUT); + OP_INPUT_TAGS(INPUT, BIAS); OP_OUTPUT_TAGS(OUTPUT); }; diff --git a/mace/proto/BUILD b/mace/proto/BUILD index 5222b06bda6e1681b15ac7f60317376c5d34fa3d..8649197b94508615dd395a991bccfe5205042804 100644 --- a/mace/proto/BUILD +++ b/mace/proto/BUILD @@ -10,15 +10,6 @@ licenses(["notice"]) # Apache 2.0 load("@com_google_protobuf//:protobuf.bzl", "py_proto_library") -py_proto_library( - name = "mace_py", - srcs = ["mace.proto"], - default_runtime = "@com_google_protobuf//:protobuf_python", - protoc = "@com_google_protobuf//:protoc", - srcs_version = "PY2AND3", - deps = ["@com_google_protobuf//:protobuf_python"], -) - py_proto_library( name = "caffe_py", srcs = ["caffe.proto"],