提交 0caade30 编写于 作者: B Bin Li

Add Reduce for quantized CPU and DSP

上级 c23719f2
// Copyright 2018 Xiaomi, Inc. All rights reserved.
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......
// Copyright 2018 Xiaomi, Inc. All rights reserved.
// Copyright 2018 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
......
......@@ -73,6 +73,9 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
const Tensor *input = this->Input(0);
Tensor *output = this->Output(0);
Simplify(input);
// Use the same scale and zero point with input and output.
output->SetScale(input->scale());
output->SetZeroPoint(input->zero_point());
output->Resize(out_shape_);
Compute(input, output);
return MaceStatus::MACE_SUCCESS;
......@@ -92,7 +95,8 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
axis_[i] + input->dim_size();
auto df = static_cast<DataFormat>(Operation::GetOptionalArg<int>(
"data_format", DataFormat::DF_NONE));
if (df == DataFormat::NHWC && input->dim_size() == 4) {
if (df == DataFormat::NHWC && DataTypeToEnum<T>::value != DT_UINT8
&& input->dim_size() == 4) {
if (index == 1 || index == 2) index = index + 1;
else if (index == 3) index = 1;
}
......@@ -132,7 +136,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void compute_reduce_1(const T *input, ReduceType type, T *output) {
void Reduce1Dims(const T *input, ReduceType type, T *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
T tmp = 0;
......@@ -166,7 +170,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void compute_reduce_2(const T *input, ReduceType type, T *output) {
void Reduce2Dims(const T *input, ReduceType type, T *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
#pragma omp parallel for schedule(runtime)
......@@ -250,7 +254,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void compute_reduce_3(const T *input, ReduceType type, T *output) {
void Reduce3Dims(const T *input, ReduceType type, T *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
#pragma omp parallel for collapse(1) schedule(runtime)
......@@ -364,7 +368,7 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void compute_reduce_4(const T *input, ReduceType type, T *output) {
void Reduce4Dims(const T *input, ReduceType type, T *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
#pragma omp parallel for collapse(2) schedule(runtime)
......@@ -498,7 +502,6 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
}
}
void Compute(const Tensor *input, Tensor *output) {
Tensor::MappingGuard input_mapper(input);
const T *input_ptr = input->data<T>();
......@@ -507,16 +510,16 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
memset(output_ptr, 0, output->size() * sizeof(T));
switch (data_reshape_.size()) {
case 1:
compute_reduce_1(input_ptr, reduce_type_, output_ptr);
Reduce1Dims(input_ptr, reduce_type_, output_ptr);
break;
case 2:
compute_reduce_2(input_ptr, reduce_type_, output_ptr);
Reduce2Dims(input_ptr, reduce_type_, output_ptr);
break;
case 3:
compute_reduce_3(input_ptr, reduce_type_, output_ptr);
Reduce3Dims(input_ptr, reduce_type_, output_ptr);
break;
case 4:
compute_reduce_4(input_ptr, reduce_type_, output_ptr);
Reduce4Dims(input_ptr, reduce_type_, output_ptr);
break;
default:
MACE_CHECK(false, "not implemented in mace")
......@@ -532,6 +535,311 @@ class ReduceOp<DeviceType::CPU, T> : public ReduceOpBase {
std::vector<index_t> out_shape_;
};
#ifdef MACE_ENABLE_QUANTIZE
template <>
void ReduceOp<DeviceType::CPU, uint8_t>::Reduce1Dims(
const uint8_t *input, ReduceType type, uint8_t *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
uint32_t tmp = 0;
for (int i = 0; i < data_reshape_[0]; ++i) {
tmp = tmp + input[i];
}
output[0] = static_cast<uint8_t>(
(tmp + data_reshape_[0] / 2) / data_reshape_[0]);
} else if (type == ReduceType::MIN) {
uint8_t tmp = input[0];
for (int i = 1; i < data_reshape_[0]; ++i) {
tmp = std::min<uint8_t>(tmp, input[i]);
}
output[0] = tmp;
} else if (type == ReduceType::MAX) {
uint8_t tmp = input[0];
for (int i = 1; i < data_reshape_[0]; ++i) {
tmp = std::max<uint8_t>(tmp, input[i]);
}
output[0] = tmp;
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
memcpy(output, input, data_reshape_[0] * sizeof(uint8_t));
}
}
template <>
void ReduceOp<DeviceType::CPU, uint8_t>::Reduce2Dims(
const uint8_t *input, ReduceType type, uint8_t *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
uint32_t tmp = 0;
for (int j = 0; j < data_reshape_[0]; ++j) {
tmp += input[j * data_reshape_[1] + i];
}
output[i] = static_cast<uint8_t>(
(tmp + data_reshape_[0] / 2) / data_reshape_[0]);
}
} else if (type == ReduceType::MIN) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
uint8_t tmp = input[i];
for (int j = 1; j < data_reshape_[0]; ++j) {
tmp = std::min(tmp, input[j * data_reshape_[1] + i]);
}
output[i] = tmp;
}
} else if (type == ReduceType::MAX) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
uint8_t tmp = input[i];
for (int j = 1; j < data_reshape_[0]; ++j) {
tmp = std::max(tmp, input[j * data_reshape_[1] + i]);
}
output[i] = tmp;
}
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
if (type == ReduceType::MEAN) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
uint32_t tmp = 0;
for (int j = 0; j < data_reshape_[1]; ++j) {
tmp += input[i * data_reshape_[1] + j];
}
output[i] = static_cast<uint8_t>(
(tmp + data_reshape_[1] / 2) / data_reshape_[1]);
}
} else if (type == ReduceType::MIN) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
uint8_t tmp = input[i * data_reshape_[1]];
for (int j = 1; j < data_reshape_[1]; ++j) {
tmp = std::min(tmp, input[i * data_reshape_[1] + j]);
}
output[i] = tmp;
}
} else if (type == ReduceType::MAX) {
#pragma omp parallel for schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
uint8_t tmp = input[i * data_reshape_[1]];
for (int j = 1; j < data_reshape_[1]; ++j) {
tmp = std::max(tmp, input[i * data_reshape_[1] + j]);
}
output[i] = tmp;
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
}
template <>
void ReduceOp<DeviceType::CPU, uint8_t>::Reduce3Dims(
const uint8_t *input, ReduceType type, uint8_t *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
#pragma omp parallel for collapse(1) schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
uint32_t tmp = 0;
for (int j = 0; j < data_reshape_[2]; ++j) {
for (int k = 0; k < data_reshape_[0]; ++k) {
tmp += input[(k * data_reshape_[1] + i) * data_reshape_[2] + j];
}
}
index_t dim = data_reshape_[0] * data_reshape_[2];
output[i] = static_cast<uint8_t>((tmp + dim / 2) / dim);
}
} else if (type == ReduceType::MIN) {
#pragma omp parallel for collapse(1) schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
uint8_t tmp = input[i * data_reshape_[2]];
for (int j = 0; j < data_reshape_[2]; ++j) {
for (int k = 0; k < data_reshape_[0]; ++k) {
tmp = std::min(tmp,
input[(k * data_reshape_[1] + i) * data_reshape_[2]
+ j]);
}
}
output[i] = tmp;
}
} else if (type == ReduceType::MAX) {
#pragma omp parallel for collapse(1) schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
uint8_t tmp = input[i * data_reshape_[2]];
for (int j = 0; j < data_reshape_[2]; ++j) {
for (int k = 0; k < data_reshape_[0]; ++k) {
tmp =
std::max(tmp,
input[(k * data_reshape_[1] + i)
* data_reshape_[2] + j]);
}
}
output[i] = tmp;
}
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
if (type == ReduceType::MEAN) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
for (int j = 0; j < data_reshape_[2]; ++j) {
uint32_t tmp = 0;
for (int k = 0; k < data_reshape_[1]; ++k) {
tmp += input[(i * data_reshape_[1] + k) * data_reshape_[2] + j];
}
output[i * data_reshape_[2] + j] =
static_cast<uint8_t>((tmp + data_reshape_[1] / 2) /
data_reshape_[1]);
}
}
} else if (type == ReduceType::MIN) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
for (int j = 0; j < data_reshape_[2]; ++j) {
uint8_t tmp = input[i * data_reshape_[1] * data_reshape_[2] + j];
for (int k = 1; k < data_reshape_[1]; ++k) {
tmp = std::min(tmp,
input[(i * data_reshape_[1] + k) *
data_reshape_[2] + j]);
}
output[i * data_reshape_[2] + j] = tmp;
}
}
} else if (type == ReduceType::MAX) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
for (int j = 0; j < data_reshape_[2]; ++j) {
uint8_t tmp = input[i * data_reshape_[1] * data_reshape_[2] + j];
for (int k = 1; k < data_reshape_[1]; ++k) {
tmp = std::max(tmp,
input[(i * data_reshape_[1] + k) *
data_reshape_[2] + j]);
}
output[i * data_reshape_[2] + j] = tmp;
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
}
template <>
void ReduceOp<DeviceType::CPU, uint8_t>::Reduce4Dims(
const uint8_t *input, ReduceType type, uint8_t *output) {
if (reduce_first_axis_) {
if (type == ReduceType::MEAN) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
for (int j = 0; j < data_reshape_[3]; ++j) {
uint32_t tmp = 0;
for (int k = 0; k < data_reshape_[2]; ++k) {
for (int t = 0; t < data_reshape_[0]; ++t) {
tmp += input[((t * data_reshape_[1] + i) *
data_reshape_[2] + k)*data_reshape_[3] + j];
}
}
index_t dim = data_reshape_[0] * data_reshape_[2];
output[i * data_reshape_[3] + j] =
static_cast<uint8_t>((tmp + dim / 2) / dim);
}
}
} else if (type == ReduceType::MIN) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
for (int j = 0; j < data_reshape_[3]; ++j) {
uint8_t tmp = input[i * data_reshape_[2] * data_reshape_[3] + j];
for (int k = 0; k < data_reshape_[2]; ++k) {
for (int t = 0; t < data_reshape_[0]; ++t) {
tmp = std::min(tmp,
input[((t * data_reshape_[1] + i) *
data_reshape_[2] + k)*data_reshape_[3] + j]);
}
}
output[i * data_reshape_[3] + j] = tmp;
}
}
} else if (type == ReduceType::MAX) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[1]; ++i) {
for (int j = 0; j < data_reshape_[3]; ++j) {
uint8_t tmp = input[i * data_reshape_[2] * data_reshape_[3] + j];
for (int k = 0; k < data_reshape_[2]; ++k) {
for (int t = 0; t < data_reshape_[0]; ++t) {
tmp = std::max(tmp,
input[((t * data_reshape_[1] + i) *
data_reshape_[2] + k)*data_reshape_[3] + j]);
}
}
output[i * data_reshape_[3] + j] = tmp;
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
} else {
if (type == ReduceType::MEAN) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
for (int j = 0; j < data_reshape_[2]; ++j) {
uint32_t tmp = 0;
for (int k = 0; k < data_reshape_[1]; ++k) {
for (int t = 0; t < data_reshape_[3]; ++t) {
tmp += input[((i * data_reshape_[1] + k) *
data_reshape_[2] + j)*data_reshape_[3] + t];
}
}
index_t dim = data_reshape_[1] * data_reshape_[3];
output[i * data_reshape_[2] + j] =
static_cast<uint8_t>((tmp + dim / 2) / dim);
}
}
} else if (type == ReduceType::MIN) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
for (int j = 0; j < data_reshape_[2]; ++j) {
uint8_t tmp = input[(i * data_reshape_[1] *
data_reshape_[2] + j)*data_reshape_[3]];
for (int k = 0; k < data_reshape_[1]; ++k) {
for (int t = 0; t < data_reshape_[3]; ++t) {
tmp =
std::min(tmp,
input[((i * data_reshape_[1] + k) *
data_reshape_[2] + j)*data_reshape_[3] + t]);
}
}
output[i * data_reshape_[2] + j] = tmp;
}
}
} else if (type == ReduceType::MAX) {
#pragma omp parallel for collapse(2) schedule(runtime)
for (int i = 0; i < data_reshape_[0]; ++i) {
for (int j = 0; j < data_reshape_[2]; ++j) {
uint8_t tmp = input[(i * data_reshape_[1] *
data_reshape_[2] + j)*data_reshape_[3]];
for (int k = 0; k < data_reshape_[1]; ++k) {
for (int t = 0; t < data_reshape_[3]; ++t) {
tmp =
std::max(tmp,
input[((i * data_reshape_[1] + k) *
data_reshape_[2] + j)*data_reshape_[3] + t]);
}
}
output[i * data_reshape_[2] + j] = tmp;
}
}
} else {
MACE_NOT_IMPLEMENTED;
}
}
}
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
template <typename T>
class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase {
......@@ -562,7 +870,10 @@ class ReduceOp<DeviceType::GPU, T> : public ReduceOpBase {
void RegisterReduce(OpRegistryBase *op_registry) {
MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp,
DeviceType::CPU, float);
#ifdef MACE_ENABLE_QUANTIZE
MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp,
DeviceType::CPU, uint8_t);
#endif // MACE_ENABLE_QUANTIZE
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OP(op_registry, "Reduce", ReduceOp,
DeviceType::GPU, float);
......
......@@ -644,6 +644,89 @@ TEST_F(ReduceOpTest, GPURandomHalf) {
RandomTest<DeviceType::GPU, half>({1, 511, 561, 11}, {1, 2});
}
namespace {
void TestQuant(const std::vector<index_t> &input_shape,
const std::vector<int> &axis) {
auto func = [&](ReduceType type) {
OpsTestNet net;
net.AddRandomInput<CPU, float>(
"Input", input_shape, false, false);
net.TransformDataFormat<DeviceType::CPU, float>(
"Input", NHWC, "InputNCHW", NCHW);
net.AddRandomInput<DeviceType::CPU, float>(
"OutputNCHW", input_shape, false, true, true);
OpDefBuilder("Reduce", "ReduceTest")
.Input("InputNCHW")
.AddIntsArg("axis", axis)
.AddIntArg("keepdims", 1)
.AddIntArg("reduce_type", type)
.AddIntArg("data_format", DataFormat::NHWC)
.Output("OutputNCHW")
.AddIntArg("T", DT_FLOAT)
.Finalize(net.NewOperatorDef());
net.RunOp(CPU);
net.TransformDataFormat<DeviceType::CPU, float>(
"OutputNCHW", NCHW, "Output", NHWC);
OpDefBuilder("Quantize", "QuantizeInput")
.Input("Input")
.Output("QuantizedInput")
.OutputType({DT_UINT8})
.AddIntArg("T", DT_UINT8)
.AddIntArg("non_zero", true)
.Finalize(net.NewOperatorDef());
net.RunOp();
net.AddRandomInput<DeviceType::CPU, uint8_t>("QuantizedOutput",
input_shape);
OpDefBuilder("Reduce", "ReduceTest")
.Input("QuantizedInput")
.Output("QuantizedOutput")
.AddIntsArg("axis", axis)
.AddIntArg("keepdims", 1)
.AddIntArg("reduce_type", type)
.AddIntArg("data_format", DataFormat::NHWC)
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
OpDefBuilder("Dequantize", "DeQuantizeTest")
.Input("QuantizedOutput")
.Output("DequantizedOutput")
.OutputType({DT_FLOAT})
.AddIntArg("T", DT_UINT8)
.Finalize(net.NewOperatorDef());
net.RunOp();
// Check
ExpectTensorSimilar<float>(*net.GetOutput("Output"),
*net.GetTensor("DequantizedOutput"), 0.01);
};
for (ReduceType type : {MEAN, MIN, MAX}) {
func(type);
}
}
} // namespace
TEST_F(ReduceOpTest, Quant) {
// reduce 1, first axis
TestQuant({1, 1, 3, 4}, {2, 3});
// reduce 2, first axis
TestQuant({1, 4, 4, 320}, {1, 2});
// reduce 2, not first axis
TestQuant({16, 320, 4, 4}, {2, 3});
// reduce 3, first axis
TestQuant({1, 4, 323, 4}, {1, 3});
// reduce 3, not first axis
TestQuant({15, 117, 15, 32}, {2});
// reduce 4, first axis
TestQuant({4, 323, 4, 4}, {0, 2});
// reduce 4, not first axis
TestQuant({32, 4, 323, 16}, {1, 3});
}
} // namespace test
} // namespace ops
} // namespace mace
......@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import MaceKeyword
from mace.python.tools.converter_tool.base_converter import MaceOp
from mace.python.tools.converter_tool.base_converter import PaddingMode
from mace.python.tools.converter_tool.base_converter import PoolingType
from mace.python.tools.converter_tool.base_converter import ReduceType
from mace.python.tools.convert_util import mace_check
from mace.python.tools import graph_util
......@@ -63,6 +64,7 @@ class HexagonOps(object):
MaceOp.Quantize.name: HexagonOp.QuantizeINPUT_f_to_8.name,
MaceOp.Pooling.name: [HexagonOp.QuantizedAvgPool_8.name,
HexagonOp.QuantizedMaxPool_8.name],
MaceOp.Reduce.name: HexagonOp.QuantizedAvgPool_8.name,
MaceOp.ResizeBilinear.name:
HexagonOp.QuantizedResizeBilinear_8.name,
MaceOp.SpaceToBatchND.name: HexagonOp.SpaceToBatchND_8.name,
......@@ -222,6 +224,43 @@ class HexagonConverter(base_converter.ConverterInterface):
strides_tensor.dims.extend(
[1, strides_arg.ints[0], strides_arg.ints[1], 1])
op.input.extend([window_tensor.name, strides_tensor.name])
elif op.type == MaceOp.Reduce.name:
self.add_min_max_const_node(op, op.input[0])
reduce_type_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_reduce_type_str)
mace_check(reduce_type_arg.i == ReduceType.MEAN.value,
"Hexagon Reduce only supports Mean now.")
keep_dims_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_keepdims_str)
mace_check(keep_dims_arg.i == 1,
"Hexagon Reduce Mean only supports keep dims now.")
axis_arg = ConverterUtil.get_arg(op, MaceKeyword.mace_axis_str)
mace_check(1 <= len(axis_arg.ints) <= 2,
"Hexagon Reduce Mean only supports spatial now.")
for i in axis_arg.ints:
mace_check(1 <= i <= 2,
"Hexagon Reduce Mean only supports spatial now")
producer_op_name, _ = get_op_and_port_from_tensor(op.input[0])
input_dims = None
for producer_op in self._model.op:
if producer_op.name == producer_op_name:
input_dims = producer_op.output_shape[0].dims
break
mace_check(input_dims is not None, "Missing input shape.")
window_tensor = self._model.tensors.add()
window_tensor.name = op.name + '/window:0'
window_tensor.data_type = mace_pb2.DT_INT32
if len(axis_arg.ints) == 1:
dim1, dim2 = (input_dims[1], 1) \
if axis_arg.ints[0] == 1 else (1, input_dims[2])
else:
dim1, dim2 = input_dims[1], input_dims[2]
window_tensor.dims.extend([1, dim1, dim2, 1])
strides_tensor = self._model.tensors.add()
strides_tensor.name = op.name + '/strides:0'
strides_tensor.data_type = mace_pb2.DT_INT32
strides_tensor.dims.extend([1, dim1, dim2, 1])
op.input.extend([window_tensor.name, strides_tensor.name])
elif op.type == MaceOp.ResizeBilinear.name:
newdim_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_resize_size_str)
......
......@@ -113,7 +113,6 @@ class Transformer(base_converter.ConverterInterface):
self._consts = {}
self._consumers = {}
self._producer = {}
self._target_data_format = DataFormat.NHWC
self._quantize_activation_info = {}
self._quantized_tensor = set()
......@@ -996,8 +995,7 @@ class Transformer(base_converter.ConverterInterface):
if arg.name == MaceKeyword.mace_paddings_str:
mace_check(len(arg.ints) == 8,
"pad dim rank should be 8.")
if ConverterUtil.data_format(op) == DataFormat.NCHW \
and self._target_data_format == DataFormat.NHWC: # noqa
if ConverterUtil.data_format(op) == DataFormat.NCHW:
print("Transpose pad args: %s(%s)"
% (op.name, op.type))
self.transpose_shape(arg.ints,
......@@ -1006,7 +1004,6 @@ class Transformer(base_converter.ConverterInterface):
for arg in op.arg:
if arg.name == MaceKeyword.mace_axis_str:
if (ConverterUtil.data_format(op) == DataFormat.NCHW
and self._target_data_format == DataFormat.NHWC
and len(op.output_shape[0].dims) == 4):
print("Transpose concat/split args: %s(%s)"
% (op.name, op.type))
......@@ -1023,8 +1020,7 @@ class Transformer(base_converter.ConverterInterface):
len(input_shape) == 2:
axis_arg = ConverterUtil.get_arg(
op, MaceKeyword.mace_axis_str)
if axis_arg.i == 1 \
and self._target_data_format == DataFormat.NHWC: # noqa
if axis_arg.i == 1:
axis_arg.i = 3
elif op.type == MaceOp.Squeeze.name:
......@@ -1041,8 +1037,7 @@ class Transformer(base_converter.ConverterInterface):
for arg in op.arg:
if arg.name == MaceKeyword.mace_axis_str:
if ConverterUtil.data_format(
op) == DataFormat.NCHW \
and self._target_data_format == DataFormat.NHWC: # noqa
op) == DataFormat.NCHW:
print("Transpose reduce args: %s(%s)"
% (op.name, op.type))
reduce_axises = list(arg.ints)
......@@ -1062,15 +1057,12 @@ class Transformer(base_converter.ConverterInterface):
# transpose op output shape
data_format = ConverterUtil.data_format(op)
if data_format is not None \
and data_format != self._target_data_format:
and data_format != DataFormat.NHWC:
print("Transpose output shapes: %s(%s)" % (op.name, op.type))
for output_shape in op.output_shape:
if len(output_shape.dims) == 4:
self.transpose_shape(output_shape.dims,
[0, 2, 3, 1])
ConverterUtil.get_arg(op,
MaceKeyword.mace_data_format_str).i = \
self._target_data_format.value
return False
......@@ -1683,6 +1675,7 @@ class Transformer(base_converter.ConverterInterface):
print("Add default quantize info for ops like Pooling, Softmax")
for op in self._model.op:
if op.type in [MaceOp.Pooling.name,
MaceOp.Reduce.name,
MaceOp.Squeeze.name,
MaceOp.Reshape.name,
MaceOp.ResizeBilinear.name,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册