diff --git a/mace/ops/quantization_util.cc b/mace/ops/quantization_util.cc index d34e77455b7389ff0bfc30fe85196d5128a6991d..9df5c6fdf2b0e87a08088799793941f47eb6f922 100644 --- a/mace/ops/quantization_util.cc +++ b/mace/ops/quantization_util.cc @@ -1,4 +1,4 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. +// Copyright 2018 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/mace/ops/quantization_util.h b/mace/ops/quantization_util.h index 2e8806efc67b173d7c5845ecf3c244ba5fca0579..3e6beeb0eea9439b54e8d9f90ecd8b3a74ac675b 100644 --- a/mace/ops/quantization_util.h +++ b/mace/ops/quantization_util.h @@ -1,4 +1,4 @@ -// Copyright 2018 Xiaomi, Inc. All rights reserved. +// Copyright 2018 The MACE Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/mace/ops/reduce.cc b/mace/ops/reduce.cc index 88e909c0f5a52705b2d3a9a486ca4c5445bbf91f..f4a147cc7b8191f5323cf38acd532830a44948c9 100644 --- a/mace/ops/reduce.cc +++ b/mace/ops/reduce.cc @@ -73,6 +73,9 @@ class ReduceOp : public ReduceOpBase { const Tensor *input = this->Input(0); Tensor *output = this->Output(0); Simplify(input); + // Use the same scale and zero point with input and output. + output->SetScale(input->scale()); + output->SetZeroPoint(input->zero_point()); output->Resize(out_shape_); Compute(input, output); return MaceStatus::MACE_SUCCESS; @@ -92,7 +95,8 @@ class ReduceOp : public ReduceOpBase { axis_[i] + input->dim_size(); auto df = static_cast(Operation::GetOptionalArg( "data_format", DataFormat::DF_NONE)); - if (df == DataFormat::NHWC && input->dim_size() == 4) { + if (df == DataFormat::NHWC && DataTypeToEnum::value != DT_UINT8 + && input->dim_size() == 4) { if (index == 1 || index == 2) index = index + 1; else if (index == 3) index = 1; } @@ -132,7 +136,7 @@ class ReduceOp : public ReduceOpBase { } } - void compute_reduce_1(const T *input, ReduceType type, T *output) { + void Reduce1Dims(const T *input, ReduceType type, T *output) { if (reduce_first_axis_) { if (type == ReduceType::MEAN) { T tmp = 0; @@ -166,7 +170,7 @@ class ReduceOp : public ReduceOpBase { } } - void compute_reduce_2(const T *input, ReduceType type, T *output) { + void Reduce2Dims(const T *input, ReduceType type, T *output) { if (reduce_first_axis_) { if (type == ReduceType::MEAN) { #pragma omp parallel for schedule(runtime) @@ -250,7 +254,7 @@ class ReduceOp : public ReduceOpBase { } } - void compute_reduce_3(const T *input, ReduceType type, T *output) { + void Reduce3Dims(const T *input, ReduceType type, T *output) { if (reduce_first_axis_) { if (type == ReduceType::MEAN) { #pragma omp parallel for collapse(1) schedule(runtime) @@ -364,7 +368,7 @@ class ReduceOp : public ReduceOpBase { } } - void compute_reduce_4(const T *input, ReduceType type, T *output) { + void Reduce4Dims(const T *input, ReduceType type, T *output) { if (reduce_first_axis_) { if (type == ReduceType::MEAN) { #pragma omp parallel for collapse(2) schedule(runtime) @@ -498,7 +502,6 @@ class ReduceOp : public ReduceOpBase { } } - void Compute(const Tensor *input, Tensor *output) { Tensor::MappingGuard input_mapper(input); const T *input_ptr = input->data(); @@ -507,16 +510,16 @@ class ReduceOp : public ReduceOpBase { memset(output_ptr, 0, output->size() * sizeof(T)); switch (data_reshape_.size()) { case 1: - compute_reduce_1(input_ptr, reduce_type_, output_ptr); + Reduce1Dims(input_ptr, reduce_type_, output_ptr); break; case 2: - compute_reduce_2(input_ptr, reduce_type_, output_ptr); + Reduce2Dims(input_ptr, reduce_type_, output_ptr); break; case 3: - compute_reduce_3(input_ptr, reduce_type_, output_ptr); + Reduce3Dims(input_ptr, reduce_type_, output_ptr); break; case 4: - compute_reduce_4(input_ptr, reduce_type_, output_ptr); + Reduce4Dims(input_ptr, reduce_type_, output_ptr); break; default: MACE_CHECK(false, "not implemented in mace") @@ -532,6 +535,311 @@ class ReduceOp : public ReduceOpBase { std::vector out_shape_; }; +#ifdef MACE_ENABLE_QUANTIZE +template <> +void ReduceOp::Reduce1Dims( + const uint8_t *input, ReduceType type, uint8_t *output) { + if (reduce_first_axis_) { + if (type == ReduceType::MEAN) { + uint32_t tmp = 0; + for (int i = 0; i < data_reshape_[0]; ++i) { + tmp = tmp + input[i]; + } + output[0] = static_cast( + (tmp + data_reshape_[0] / 2) / data_reshape_[0]); + } else if (type == ReduceType::MIN) { + uint8_t tmp = input[0]; + for (int i = 1; i < data_reshape_[0]; ++i) { + tmp = std::min(tmp, input[i]); + } + output[0] = tmp; + } else if (type == ReduceType::MAX) { + uint8_t tmp = input[0]; + for (int i = 1; i < data_reshape_[0]; ++i) { + tmp = std::max(tmp, input[i]); + } + output[0] = tmp; + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + memcpy(output, input, data_reshape_[0] * sizeof(uint8_t)); + } +} + +template <> +void ReduceOp::Reduce2Dims( + const uint8_t *input, ReduceType type, uint8_t *output) { + if (reduce_first_axis_) { + if (type == ReduceType::MEAN) { +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + uint32_t tmp = 0; + for (int j = 0; j < data_reshape_[0]; ++j) { + tmp += input[j * data_reshape_[1] + i]; + } + output[i] = static_cast( + (tmp + data_reshape_[0] / 2) / data_reshape_[0]); + } + } else if (type == ReduceType::MIN) { +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + uint8_t tmp = input[i]; + for (int j = 1; j < data_reshape_[0]; ++j) { + tmp = std::min(tmp, input[j * data_reshape_[1] + i]); + } + output[i] = tmp; + } + } else if (type == ReduceType::MAX) { +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + uint8_t tmp = input[i]; + for (int j = 1; j < data_reshape_[0]; ++j) { + tmp = std::max(tmp, input[j * data_reshape_[1] + i]); + } + output[i] = tmp; + } + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + if (type == ReduceType::MEAN) { +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + uint32_t tmp = 0; + for (int j = 0; j < data_reshape_[1]; ++j) { + tmp += input[i * data_reshape_[1] + j]; + } + output[i] = static_cast( + (tmp + data_reshape_[1] / 2) / data_reshape_[1]); + } + } else if (type == ReduceType::MIN) { +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + uint8_t tmp = input[i * data_reshape_[1]]; + for (int j = 1; j < data_reshape_[1]; ++j) { + tmp = std::min(tmp, input[i * data_reshape_[1] + j]); + } + output[i] = tmp; + } + } else if (type == ReduceType::MAX) { +#pragma omp parallel for schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + uint8_t tmp = input[i * data_reshape_[1]]; + for (int j = 1; j < data_reshape_[1]; ++j) { + tmp = std::max(tmp, input[i * data_reshape_[1] + j]); + } + output[i] = tmp; + } + } else { + MACE_NOT_IMPLEMENTED; + } + } +} + +template <> +void ReduceOp::Reduce3Dims( + const uint8_t *input, ReduceType type, uint8_t *output) { + if (reduce_first_axis_) { + if (type == ReduceType::MEAN) { +#pragma omp parallel for collapse(1) schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + uint32_t tmp = 0; + for (int j = 0; j < data_reshape_[2]; ++j) { + for (int k = 0; k < data_reshape_[0]; ++k) { + tmp += input[(k * data_reshape_[1] + i) * data_reshape_[2] + j]; + } + } + index_t dim = data_reshape_[0] * data_reshape_[2]; + output[i] = static_cast((tmp + dim / 2) / dim); + } + } else if (type == ReduceType::MIN) { +#pragma omp parallel for collapse(1) schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + uint8_t tmp = input[i * data_reshape_[2]]; + for (int j = 0; j < data_reshape_[2]; ++j) { + for (int k = 0; k < data_reshape_[0]; ++k) { + tmp = std::min(tmp, + input[(k * data_reshape_[1] + i) * data_reshape_[2] + + j]); + } + } + output[i] = tmp; + } + } else if (type == ReduceType::MAX) { +#pragma omp parallel for collapse(1) schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + uint8_t tmp = input[i * data_reshape_[2]]; + for (int j = 0; j < data_reshape_[2]; ++j) { + for (int k = 0; k < data_reshape_[0]; ++k) { + tmp = + std::max(tmp, + input[(k * data_reshape_[1] + i) + * data_reshape_[2] + j]); + } + } + output[i] = tmp; + } + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + if (type == ReduceType::MEAN) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + for (int j = 0; j < data_reshape_[2]; ++j) { + uint32_t tmp = 0; + for (int k = 0; k < data_reshape_[1]; ++k) { + tmp += input[(i * data_reshape_[1] + k) * data_reshape_[2] + j]; + } + output[i * data_reshape_[2] + j] = + static_cast((tmp + data_reshape_[1] / 2) / + data_reshape_[1]); + } + } + } else if (type == ReduceType::MIN) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + for (int j = 0; j < data_reshape_[2]; ++j) { + uint8_t tmp = input[i * data_reshape_[1] * data_reshape_[2] + j]; + for (int k = 1; k < data_reshape_[1]; ++k) { + tmp = std::min(tmp, + input[(i * data_reshape_[1] + k) * + data_reshape_[2] + j]); + } + output[i * data_reshape_[2] + j] = tmp; + } + } + } else if (type == ReduceType::MAX) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + for (int j = 0; j < data_reshape_[2]; ++j) { + uint8_t tmp = input[i * data_reshape_[1] * data_reshape_[2] + j]; + for (int k = 1; k < data_reshape_[1]; ++k) { + tmp = std::max(tmp, + input[(i * data_reshape_[1] + k) * + data_reshape_[2] + j]); + } + output[i * data_reshape_[2] + j] = tmp; + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } +} + +template <> +void ReduceOp::Reduce4Dims( + const uint8_t *input, ReduceType type, uint8_t *output) { + if (reduce_first_axis_) { + if (type == ReduceType::MEAN) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + for (int j = 0; j < data_reshape_[3]; ++j) { + uint32_t tmp = 0; + for (int k = 0; k < data_reshape_[2]; ++k) { + for (int t = 0; t < data_reshape_[0]; ++t) { + tmp += input[((t * data_reshape_[1] + i) * + data_reshape_[2] + k)*data_reshape_[3] + j]; + } + } + index_t dim = data_reshape_[0] * data_reshape_[2]; + output[i * data_reshape_[3] + j] = + static_cast((tmp + dim / 2) / dim); + } + } + } else if (type == ReduceType::MIN) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + for (int j = 0; j < data_reshape_[3]; ++j) { + uint8_t tmp = input[i * data_reshape_[2] * data_reshape_[3] + j]; + for (int k = 0; k < data_reshape_[2]; ++k) { + for (int t = 0; t < data_reshape_[0]; ++t) { + tmp = std::min(tmp, + input[((t * data_reshape_[1] + i) * + data_reshape_[2] + k)*data_reshape_[3] + j]); + } + } + output[i * data_reshape_[3] + j] = tmp; + } + } + } else if (type == ReduceType::MAX) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[1]; ++i) { + for (int j = 0; j < data_reshape_[3]; ++j) { + uint8_t tmp = input[i * data_reshape_[2] * data_reshape_[3] + j]; + for (int k = 0; k < data_reshape_[2]; ++k) { + for (int t = 0; t < data_reshape_[0]; ++t) { + tmp = std::max(tmp, + input[((t * data_reshape_[1] + i) * + data_reshape_[2] + k)*data_reshape_[3] + j]); + } + } + output[i * data_reshape_[3] + j] = tmp; + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } else { + if (type == ReduceType::MEAN) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + for (int j = 0; j < data_reshape_[2]; ++j) { + uint32_t tmp = 0; + for (int k = 0; k < data_reshape_[1]; ++k) { + for (int t = 0; t < data_reshape_[3]; ++t) { + tmp += input[((i * data_reshape_[1] + k) * + data_reshape_[2] + j)*data_reshape_[3] + t]; + } + } + index_t dim = data_reshape_[1] * data_reshape_[3]; + output[i * data_reshape_[2] + j] = + static_cast((tmp + dim / 2) / dim); + } + } + } else if (type == ReduceType::MIN) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + for (int j = 0; j < data_reshape_[2]; ++j) { + uint8_t tmp = input[(i * data_reshape_[1] * + data_reshape_[2] + j)*data_reshape_[3]]; + for (int k = 0; k < data_reshape_[1]; ++k) { + for (int t = 0; t < data_reshape_[3]; ++t) { + tmp = + std::min(tmp, + input[((i * data_reshape_[1] + k) * + data_reshape_[2] + j)*data_reshape_[3] + t]); + } + } + output[i * data_reshape_[2] + j] = tmp; + } + } + } else if (type == ReduceType::MAX) { +#pragma omp parallel for collapse(2) schedule(runtime) + for (int i = 0; i < data_reshape_[0]; ++i) { + for (int j = 0; j < data_reshape_[2]; ++j) { + uint8_t tmp = input[(i * data_reshape_[1] * + data_reshape_[2] + j)*data_reshape_[3]]; + for (int k = 0; k < data_reshape_[1]; ++k) { + for (int t = 0; t < data_reshape_[3]; ++t) { + tmp = + std::max(tmp, + input[((i * data_reshape_[1] + k) * + data_reshape_[2] + j)*data_reshape_[3] + t]); + } + } + output[i * data_reshape_[2] + j] = tmp; + } + } + } else { + MACE_NOT_IMPLEMENTED; + } + } +} +#endif // MACE_ENABLE_QUANTIZE + #ifdef MACE_ENABLE_OPENCL template class ReduceOp : public ReduceOpBase { @@ -562,7 +870,10 @@ class ReduceOp : public ReduceOpBase { void RegisterReduce(OpRegistryBase *op_registry) { MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, DeviceType::CPU, float); - +#ifdef MACE_ENABLE_QUANTIZE + MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, + DeviceType::CPU, uint8_t); +#endif // MACE_ENABLE_QUANTIZE #ifdef MACE_ENABLE_OPENCL MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp, DeviceType::GPU, float); diff --git a/mace/ops/reduce_test.cc b/mace/ops/reduce_test.cc index 62d6cf4c23b96a508eef9c98ff2f61ddecd7904e..78a9f9345a8ca4da9eae0a0beedcb8dd1fbed49c 100644 --- a/mace/ops/reduce_test.cc +++ b/mace/ops/reduce_test.cc @@ -644,6 +644,89 @@ TEST_F(ReduceOpTest, GPURandomHalf) { RandomTest({1, 511, 561, 11}, {1, 2}); } +namespace { + +void TestQuant(const std::vector &input_shape, + const std::vector &axis) { + auto func = [&](ReduceType type) { + OpsTestNet net; + net.AddRandomInput( + "Input", input_shape, false, false); + net.TransformDataFormat( + "Input", NHWC, "InputNCHW", NCHW); + net.AddRandomInput( + "OutputNCHW", input_shape, false, true, true); + + OpDefBuilder("Reduce", "ReduceTest") + .Input("InputNCHW") + .AddIntsArg("axis", axis) + .AddIntArg("keepdims", 1) + .AddIntArg("reduce_type", type) + .AddIntArg("data_format", DataFormat::NHWC) + .Output("OutputNCHW") + .AddIntArg("T", DT_FLOAT) + .Finalize(net.NewOperatorDef()); + net.RunOp(CPU); + net.TransformDataFormat( + "OutputNCHW", NCHW, "Output", NHWC); + + OpDefBuilder("Quantize", "QuantizeInput") + .Input("Input") + .Output("QuantizedInput") + .OutputType({DT_UINT8}) + .AddIntArg("T", DT_UINT8) + .AddIntArg("non_zero", true) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + net.AddRandomInput("QuantizedOutput", + input_shape); + OpDefBuilder("Reduce", "ReduceTest") + .Input("QuantizedInput") + .Output("QuantizedOutput") + .AddIntsArg("axis", axis) + .AddIntArg("keepdims", 1) + .AddIntArg("reduce_type", type) + .AddIntArg("data_format", DataFormat::NHWC) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + + OpDefBuilder("Dequantize", "DeQuantizeTest") + .Input("QuantizedOutput") + .Output("DequantizedOutput") + .OutputType({DT_FLOAT}) + .AddIntArg("T", DT_UINT8) + .Finalize(net.NewOperatorDef()); + net.RunOp(); + // Check + ExpectTensorSimilar(*net.GetOutput("Output"), + *net.GetTensor("DequantizedOutput"), 0.01); + }; + + for (ReduceType type : {MEAN, MIN, MAX}) { + func(type); + } +} +} // namespace + +TEST_F(ReduceOpTest, Quant) { + // reduce 1, first axis + TestQuant({1, 1, 3, 4}, {2, 3}); + // reduce 2, first axis + TestQuant({1, 4, 4, 320}, {1, 2}); + // reduce 2, not first axis + TestQuant({16, 320, 4, 4}, {2, 3}); + // reduce 3, first axis + TestQuant({1, 4, 323, 4}, {1, 3}); + // reduce 3, not first axis + TestQuant({15, 117, 15, 32}, {2}); + // reduce 4, first axis + TestQuant({4, 323, 4, 4}, {0, 2}); + // reduce 4, not first axis + TestQuant({32, 4, 323, 16}, {1, 3}); +} + } // namespace test } // namespace ops } // namespace mace diff --git a/mace/python/tools/converter_tool/hexagon_converter.py b/mace/python/tools/converter_tool/hexagon_converter.py index f37ab7ba15027cc021d55d381352c6ec436ddfb3..c3e590f658b6b24227d2cfb8d4e01bf8b60f30e3 100644 --- a/mace/python/tools/converter_tool/hexagon_converter.py +++ b/mace/python/tools/converter_tool/hexagon_converter.py @@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import MaceKeyword from mace.python.tools.converter_tool.base_converter import MaceOp from mace.python.tools.converter_tool.base_converter import PaddingMode from mace.python.tools.converter_tool.base_converter import PoolingType +from mace.python.tools.converter_tool.base_converter import ReduceType from mace.python.tools.convert_util import mace_check from mace.python.tools import graph_util @@ -63,6 +64,7 @@ class HexagonOps(object): MaceOp.Quantize.name: HexagonOp.QuantizeINPUT_f_to_8.name, MaceOp.Pooling.name: [HexagonOp.QuantizedAvgPool_8.name, HexagonOp.QuantizedMaxPool_8.name], + MaceOp.Reduce.name: HexagonOp.QuantizedAvgPool_8.name, MaceOp.ResizeBilinear.name: HexagonOp.QuantizedResizeBilinear_8.name, MaceOp.SpaceToBatchND.name: HexagonOp.SpaceToBatchND_8.name, @@ -222,6 +224,43 @@ class HexagonConverter(base_converter.ConverterInterface): strides_tensor.dims.extend( [1, strides_arg.ints[0], strides_arg.ints[1], 1]) op.input.extend([window_tensor.name, strides_tensor.name]) + elif op.type == MaceOp.Reduce.name: + self.add_min_max_const_node(op, op.input[0]) + reduce_type_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_reduce_type_str) + mace_check(reduce_type_arg.i == ReduceType.MEAN.value, + "Hexagon Reduce only supports Mean now.") + keep_dims_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_keepdims_str) + mace_check(keep_dims_arg.i == 1, + "Hexagon Reduce Mean only supports keep dims now.") + axis_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_axis_str) + mace_check(1 <= len(axis_arg.ints) <= 2, + "Hexagon Reduce Mean only supports spatial now.") + for i in axis_arg.ints: + mace_check(1 <= i <= 2, + "Hexagon Reduce Mean only supports spatial now") + producer_op_name, _ = get_op_and_port_from_tensor(op.input[0]) + input_dims = None + for producer_op in self._model.op: + if producer_op.name == producer_op_name: + input_dims = producer_op.output_shape[0].dims + break + mace_check(input_dims is not None, "Missing input shape.") + window_tensor = self._model.tensors.add() + window_tensor.name = op.name + '/window:0' + window_tensor.data_type = mace_pb2.DT_INT32 + if len(axis_arg.ints) == 1: + dim1, dim2 = (input_dims[1], 1) \ + if axis_arg.ints[0] == 1 else (1, input_dims[2]) + else: + dim1, dim2 = input_dims[1], input_dims[2] + window_tensor.dims.extend([1, dim1, dim2, 1]) + strides_tensor = self._model.tensors.add() + strides_tensor.name = op.name + '/strides:0' + strides_tensor.data_type = mace_pb2.DT_INT32 + strides_tensor.dims.extend([1, dim1, dim2, 1]) + op.input.extend([window_tensor.name, strides_tensor.name]) elif op.type == MaceOp.ResizeBilinear.name: newdim_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_resize_size_str) diff --git a/mace/python/tools/converter_tool/transformer.py b/mace/python/tools/converter_tool/transformer.py index 9ea6b6d83628cfef70c58db0b401472d29565776..cf426941b933add7e5f5e5b7e6627a9a290c99de 100644 --- a/mace/python/tools/converter_tool/transformer.py +++ b/mace/python/tools/converter_tool/transformer.py @@ -113,7 +113,6 @@ class Transformer(base_converter.ConverterInterface): self._consts = {} self._consumers = {} self._producer = {} - self._target_data_format = DataFormat.NHWC self._quantize_activation_info = {} self._quantized_tensor = set() @@ -996,8 +995,7 @@ class Transformer(base_converter.ConverterInterface): if arg.name == MaceKeyword.mace_paddings_str: mace_check(len(arg.ints) == 8, "pad dim rank should be 8.") - if ConverterUtil.data_format(op) == DataFormat.NCHW \ - and self._target_data_format == DataFormat.NHWC: # noqa + if ConverterUtil.data_format(op) == DataFormat.NCHW: print("Transpose pad args: %s(%s)" % (op.name, op.type)) self.transpose_shape(arg.ints, @@ -1006,7 +1004,6 @@ class Transformer(base_converter.ConverterInterface): for arg in op.arg: if arg.name == MaceKeyword.mace_axis_str: if (ConverterUtil.data_format(op) == DataFormat.NCHW - and self._target_data_format == DataFormat.NHWC and len(op.output_shape[0].dims) == 4): print("Transpose concat/split args: %s(%s)" % (op.name, op.type)) @@ -1023,8 +1020,7 @@ class Transformer(base_converter.ConverterInterface): len(input_shape) == 2: axis_arg = ConverterUtil.get_arg( op, MaceKeyword.mace_axis_str) - if axis_arg.i == 1 \ - and self._target_data_format == DataFormat.NHWC: # noqa + if axis_arg.i == 1: axis_arg.i = 3 elif op.type == MaceOp.Squeeze.name: @@ -1041,8 +1037,7 @@ class Transformer(base_converter.ConverterInterface): for arg in op.arg: if arg.name == MaceKeyword.mace_axis_str: if ConverterUtil.data_format( - op) == DataFormat.NCHW \ - and self._target_data_format == DataFormat.NHWC: # noqa + op) == DataFormat.NCHW: print("Transpose reduce args: %s(%s)" % (op.name, op.type)) reduce_axises = list(arg.ints) @@ -1062,15 +1057,12 @@ class Transformer(base_converter.ConverterInterface): # transpose op output shape data_format = ConverterUtil.data_format(op) if data_format is not None \ - and data_format != self._target_data_format: + and data_format != DataFormat.NHWC: print("Transpose output shapes: %s(%s)" % (op.name, op.type)) for output_shape in op.output_shape: if len(output_shape.dims) == 4: self.transpose_shape(output_shape.dims, [0, 2, 3, 1]) - ConverterUtil.get_arg(op, - MaceKeyword.mace_data_format_str).i = \ - self._target_data_format.value return False @@ -1683,6 +1675,7 @@ class Transformer(base_converter.ConverterInterface): print("Add default quantize info for ops like Pooling, Softmax") for op in self._model.op: if op.type in [MaceOp.Pooling.name, + MaceOp.Reduce.name, MaceOp.Squeeze.name, MaceOp.Reshape.name, MaceOp.ResizeBilinear.name,