提交 56572de5 编写于 作者: 刘托

Merge branch 'master' into 'revert-d1d7a29c'

# Conflicts:
#   tools/python/transform/hexagon_converter.py
#   tools/python/transform/tensorflow_converter.py
#   tools/python/transform/transformer.py
...@@ -432,7 +432,7 @@ bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor, ...@@ -432,7 +432,7 @@ bool HexagonDSPWrapper::ExecuteGraph(const Tensor &input_tensor,
} }
MACE_CHECK(output_bytes == output_tensor->raw_size(), MACE_CHECK(output_bytes == output_tensor->raw_size(),
"wrong output bytes inferred."); "wrong output bytes inferred.");
return res == 0; return true;
} }
bool HexagonDSPWrapper::ExecuteGraphNew( bool HexagonDSPWrapper::ExecuteGraphNew(
...@@ -495,6 +495,7 @@ bool HexagonDSPWrapper::ExecuteGraphNew( ...@@ -495,6 +495,7 @@ bool HexagonDSPWrapper::ExecuteGraphNew(
num_inputs * kNumMetaData, num_inputs * kNumMetaData,
outputs.data(), outputs.data(),
num_outputs * kNumMetaData); num_outputs * kNumMetaData);
MACE_CHECK(res == 0, "execute error");
// handle hexagon output // handle hexagon output
for (size_t i = 0; i < num_outputs; ++i) { for (size_t i = 0; i < num_outputs; ++i) {
...@@ -504,12 +505,12 @@ bool HexagonDSPWrapper::ExecuteGraphNew( ...@@ -504,12 +505,12 @@ bool HexagonDSPWrapper::ExecuteGraphNew(
outputs[index].depth}; outputs[index].depth};
MACE_CHECK(output_shape.size() == output_info_[i].shape.size(), MACE_CHECK(output_shape.size() == output_info_[i].shape.size(),
output_shape.size(), " vs ", output_info_[i].shape.size(), output_shape.size(), " vs ", output_info_[i].shape.size(),
"wrong output shape inferred"); " wrong output shape inferred");
for (size_t j = 0; j < output_shape.size(); ++j) { for (size_t j = 0; j < output_shape.size(); ++j) {
MACE_CHECK(static_cast<index_t>(output_shape[j]) MACE_CHECK(static_cast<index_t>(output_shape[j])
== output_info_[i].shape[j], == output_info_[i].shape[j],
output_shape[j], " vs ", output_info_[i].shape[j], output_shape[j], " vs ", output_info_[i].shape[j],
"wrong output shape inferred"); " wrong output shape[", j, "] inferred");
} }
auto output_tensor = output_tensors->at(output_info_[i].name); auto output_tensor = output_tensors->at(output_info_[i].name);
MACE_CHECK(static_cast<index_t>(outputs[index].data_valid_len) MACE_CHECK(static_cast<index_t>(outputs[index].data_valid_len)
...@@ -518,7 +519,7 @@ bool HexagonDSPWrapper::ExecuteGraphNew( ...@@ -518,7 +519,7 @@ bool HexagonDSPWrapper::ExecuteGraphNew(
" wrong output bytes inferred."); " wrong output bytes inferred.");
} }
return res == 0; return true;
} }
} // namespace mace } // namespace mace
...@@ -76,7 +76,8 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, ...@@ -76,7 +76,8 @@ void CalcPaddingAndOutputSize(const index_t *input_shape,
output_height = (input_height - k_extent_height) / strides[0] + 1; output_height = (input_height - k_extent_height) / strides[0] + 1;
output_width = (input_width - k_extent_width) / strides[1] + 1; output_width = (input_width - k_extent_width) / strides[1] + 1;
break; break;
case SAME:output_height = (input_height - 1) / strides[0] + 1; case SAME:
output_height = (input_height - 1) / strides[0] + 1;
output_width = (input_width - 1) / strides[1] + 1; output_width = (input_width - 1) / strides[1] + 1;
break; break;
case FULL: case FULL:
......
...@@ -17,7 +17,8 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS ...@@ -17,7 +17,8 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
__private const int out_width, __private const int out_width,
__private const int filter_height, __private const int filter_height,
__private const int filter_width, __private const int filter_width,
__private const int stride, __private const int stride_h,
__private const int stride_w,
__private const int padding_top, __private const int padding_top,
__private const int padding_left, __private const int padding_left,
__private const int dilation_h, __private const int dilation_h,
...@@ -47,12 +48,12 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS ...@@ -47,12 +48,12 @@ __kernel void conv_2d(OUT_OF_RANGE_PARAMS
DATA_TYPE4 out3 = 0; DATA_TYPE4 out3 = 0;
#endif #endif
int in_width_stride = mul24(out_w_blks, stride); int in_width_stride = mul24(out_w_blks, stride_w);
int in_width0 = mad24(out_w_blk, stride, -padding_left); int in_width0 = mad24(out_w_blk, stride_w, -padding_left);
int in_width1 = in_width0 + in_width_stride; int in_width1 = in_width0 + in_width_stride;
int in_width2 = in_width1 + in_width_stride; int in_width2 = in_width1 + in_width_stride;
int in_width3 = in_width2 + in_width_stride; int in_width3 = in_width2 + in_width_stride;
const int height_start = mad24((out_hb % out_height), stride, -padding_top); const int height_start = mad24((out_hb % out_height), stride_h, -padding_top);
int in_height_gap = select( int in_height_gap = select(
0, 0,
(-height_start + dilation_h - 1) / dilation_h, (-height_start + dilation_h - 1) / dilation_h,
......
...@@ -15,7 +15,8 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS ...@@ -15,7 +15,8 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
__private const int in_ch_blks, __private const int in_ch_blks,
__private const int height, __private const int height,
__private const int width, __private const int width,
__private const int stride) { __private const int stride_h,
__private const int stride_w) {
const int out_ch_blk = get_global_id(0); const int out_ch_blk = get_global_id(0);
const int out_w_blk = get_global_id(1); const int out_w_blk = get_global_id(1);
const int out_hb = get_global_id(2); const int out_hb = get_global_id(2);
...@@ -41,14 +42,14 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS ...@@ -41,14 +42,14 @@ __kernel void conv_2d_1x1(OUT_OF_RANGE_PARAMS
#endif #endif
int4 w; int4 w;
int in_width_stride = mul24(out_w_blks, stride); int in_width_stride = mul24(out_w_blks, stride_w);
w.x = mul24(out_w_blk, stride); w.x = mul24(out_w_blk, stride_w);
w.y = w.x + in_width_stride; w.y = w.x + in_width_stride;
w.z = w.y + in_width_stride; w.z = w.y + in_width_stride;
w.w = w.z + in_width_stride; w.w = w.z + in_width_stride;
int batch = out_hb / height; int batch = out_hb / height;
int h_idx = out_hb - mul24(batch, height); int h_idx = out_hb - mul24(batch, height);
int out_hb_idx = mul24(h_idx, stride); int out_hb_idx = mul24(h_idx, stride_h);
w.x = select(w.x, INT_MIN, w.x >= in_width); w.x = select(w.x, INT_MIN, w.x >= in_width);
w.y = select(w.y, INT_MIN, w.y >= in_width); w.y = select(w.y, INT_MIN, w.y >= in_width);
......
...@@ -15,7 +15,8 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS ...@@ -15,7 +15,8 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
__private const int in_ch_blks, __private const int in_ch_blks,
__private const int out_height, __private const int out_height,
__private const int out_width, __private const int out_width,
__private const int stride, __private const int stride_h,
__private const int stride_w,
__private const int padding_top, __private const int padding_top,
__private const int padding_left, __private const int padding_left,
__private const int dilation_h, __private const int dilation_h,
...@@ -47,13 +48,13 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS ...@@ -47,13 +48,13 @@ __kernel void conv_2d_3x3(OUT_OF_RANGE_PARAMS
DATA_TYPE4 out4 = 0; DATA_TYPE4 out4 = 0;
#endif #endif
int in_width_stride = mul24(out_w_blks, stride); int in_width_stride = mul24(out_w_blks, stride_w);
int in_width0 = mad24(out_w_blk, stride, -padding_left); int in_width0 = mad24(out_w_blk, stride_w, -padding_left);
int in_width1 = in_width0 + in_width_stride; int in_width1 = in_width0 + in_width_stride;
int in_width2 = in_width1 + in_width_stride; int in_width2 = in_width1 + in_width_stride;
int in_width3 = in_width2 + in_width_stride; int in_width3 = in_width2 + in_width_stride;
int in_width4 = in_width3 + in_width_stride; int in_width4 = in_width3 + in_width_stride;
const int height_start = mad24((out_hb % out_height), stride, -padding_top); const int height_start = mad24((out_hb % out_height), stride_h, -padding_top);
int in_height_gap = select( int in_height_gap = select(
0, 0,
(-height_start + dilation_h - 1) / dilation_h, (-height_start + dilation_h - 1) / dilation_h,
......
...@@ -73,8 +73,7 @@ MaceStatus Conv2dKernel::Compute( ...@@ -73,8 +73,7 @@ MaceStatus Conv2dKernel::Compute(
Tensor *output) { Tensor *output) {
index_t kernel_h = filter->dim(2); index_t kernel_h = filter->dim(2);
index_t kernel_w = filter->dim(3); index_t kernel_w = filter->dim(3);
if (strides[0] != strides[1] || if (dilations[0] > 1 && (strides[0] > 1 || kernel_h == 1)) {
(dilations[0] > 1 && (strides[0] > 1 || kernel_h == 1))) {
LOG(WARNING) << "OpenCL conv2d kernel with " LOG(WARNING) << "OpenCL conv2d kernel with "
<< "filter" << kernel_h << "x" << kernel_w << "," << "filter" << kernel_h << "x" << kernel_w << ","
<< " stride " << strides[0] << "x" << strides[1] << " stride " << strides[0] << "x" << strides[1]
...@@ -131,6 +130,7 @@ MaceStatus Conv2dKernel::Compute( ...@@ -131,6 +130,7 @@ MaceStatus Conv2dKernel::Compute(
filter, filter,
bias, bias,
strides[0], strides[0],
strides[1],
paddings.data(), paddings.data(),
dilations, dilations,
activation, activation,
...@@ -148,6 +148,7 @@ MaceStatus Conv2dKernel::Compute( ...@@ -148,6 +148,7 @@ MaceStatus Conv2dKernel::Compute(
filter, filter,
bias, bias,
strides[0], strides[0],
strides[1],
paddings.data(), paddings.data(),
dilations, dilations,
activation, activation,
...@@ -165,6 +166,7 @@ MaceStatus Conv2dKernel::Compute( ...@@ -165,6 +166,7 @@ MaceStatus Conv2dKernel::Compute(
filter, filter,
bias, bias,
strides[0], strides[0],
strides[1],
paddings.data(), paddings.data(),
dilations, dilations,
activation, activation,
......
...@@ -33,7 +33,8 @@ extern MaceStatus Conv2dK1x1(OpContext *context, ...@@ -33,7 +33,8 @@ extern MaceStatus Conv2dK1x1(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const int stride, const int stride_h,
const int stride_w,
const int *padding, const int *padding,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
...@@ -48,7 +49,8 @@ extern MaceStatus Conv2dK3x3(OpContext *context, ...@@ -48,7 +49,8 @@ extern MaceStatus Conv2dK3x3(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const int stride, const int stride_h,
const int stride_w,
const int *padding, const int *padding,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
...@@ -63,7 +65,8 @@ extern MaceStatus Conv2d(OpContext *context, ...@@ -63,7 +65,8 @@ extern MaceStatus Conv2d(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const int stride, const int stride_h,
const int stride_w,
const int *padding, const int *padding,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
......
...@@ -71,7 +71,8 @@ MaceStatus Conv2dK1x1(OpContext *context, ...@@ -71,7 +71,8 @@ MaceStatus Conv2dK1x1(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const int stride, const int stride_h,
const int stride_w,
const int *padding, const int *padding,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
...@@ -170,7 +171,8 @@ MaceStatus Conv2dK1x1(OpContext *context, ...@@ -170,7 +171,8 @@ MaceStatus Conv2dK1x1(OpContext *context,
kernel->setArg(idx++, static_cast<int>(input_channel_blocks)); kernel->setArg(idx++, static_cast<int>(input_channel_blocks));
kernel->setArg(idx++, static_cast<int>(height)); kernel->setArg(idx++, static_cast<int>(height));
kernel->setArg(idx++, static_cast<int>(width)); kernel->setArg(idx++, static_cast<int>(width));
kernel->setArg(idx++, stride); kernel->setArg(idx++, stride_h);
kernel->setArg(idx++, stride_w);
*prev_input_shape = input->shape(); *prev_input_shape = input->shape();
} }
......
...@@ -64,7 +64,8 @@ MaceStatus Conv2dK3x3(OpContext *context, ...@@ -64,7 +64,8 @@ MaceStatus Conv2dK3x3(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const int stride, const int stride_h,
const int stride_w,
const int *padding, const int *padding,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
...@@ -154,7 +155,8 @@ MaceStatus Conv2dK3x3(OpContext *context, ...@@ -154,7 +155,8 @@ MaceStatus Conv2dK3x3(OpContext *context,
kernel->setArg(idx++, static_cast<int>(input_channel_blocks)); kernel->setArg(idx++, static_cast<int>(input_channel_blocks));
kernel->setArg(idx++, static_cast<int>(height)); kernel->setArg(idx++, static_cast<int>(height));
kernel->setArg(idx++, static_cast<int>(width)); kernel->setArg(idx++, static_cast<int>(width));
kernel->setArg(idx++, stride); kernel->setArg(idx++, stride_h);
kernel->setArg(idx++, stride_w);
kernel->setArg(idx++, padding[0] / 2); kernel->setArg(idx++, padding[0] / 2);
kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, padding[1] / 2);
kernel->setArg(idx++, dilations[0]); kernel->setArg(idx++, dilations[0]);
......
...@@ -72,7 +72,8 @@ MaceStatus Conv2d(OpContext *context, ...@@ -72,7 +72,8 @@ MaceStatus Conv2d(OpContext *context,
const Tensor *input, const Tensor *input,
const Tensor *filter, const Tensor *filter,
const Tensor *bias, const Tensor *bias,
const int stride, const int stride_h,
const int stride_w,
const int *padding, const int *padding,
const int *dilations, const int *dilations,
const ActivationType activation, const ActivationType activation,
...@@ -164,7 +165,8 @@ MaceStatus Conv2d(OpContext *context, ...@@ -164,7 +165,8 @@ MaceStatus Conv2d(OpContext *context,
kernel->setArg(idx++, static_cast<uint32_t>(width)); kernel->setArg(idx++, static_cast<uint32_t>(width));
kernel->setArg(idx++, static_cast<uint32_t>(filter->dim(2))); kernel->setArg(idx++, static_cast<uint32_t>(filter->dim(2)));
kernel->setArg(idx++, static_cast<uint32_t>(filter->dim(3))); kernel->setArg(idx++, static_cast<uint32_t>(filter->dim(3)));
kernel->setArg(idx++, static_cast<uint32_t>(stride)); kernel->setArg(idx++, static_cast<uint32_t>(stride_h));
kernel->setArg(idx++, static_cast<uint32_t>(stride_w));
kernel->setArg(idx++, padding[0] / 2); kernel->setArg(idx++, padding[0] / 2);
kernel->setArg(idx++, padding[1] / 2); kernel->setArg(idx++, padding[1] / 2);
kernel->setArg(idx++, dilations[0]); kernel->setArg(idx++, dilations[0]);
......
...@@ -42,7 +42,8 @@ void TestNHWCSimple3x3VALID(int wino_blk_size = 0) { ...@@ -42,7 +42,8 @@ void TestNHWCSimple3x3VALID(int wino_blk_size = 0) {
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {1, 2, 3, 3}, "Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
true);
net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true); net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true);
const std::vector<index_t> output_shape = {1, 1, 1, 1}; const std::vector<index_t> output_shape = {1, 1, 1, 1};
...@@ -100,7 +101,8 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) { ...@@ -100,7 +101,8 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) {
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {1, 2, 3, 3}, "Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
true);
net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true); net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true);
const std::vector<index_t> output_shape = {1, 3, 3, 1}; const std::vector<index_t> output_shape = {1, 3, 3, 1};
...@@ -149,6 +151,65 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) { ...@@ -149,6 +151,65 @@ void TestNHWCSimple3x3SAME(int wino_blk_size = 0) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-3, 1e-3); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-3, 1e-3);
} }
} }
template <DeviceType D, typename T>
void TestNHWCSimple3x3NeqStride(int wino_blk_size = 0) {
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", {1, 3, 3, 2},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<D, float>(
"Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
true);
net.AddInputFromArray<D, float>("Bias", {1}, {0.1f}, true);
const std::vector<index_t> output_shape = {1, 3, 2, 1};
if (D == DeviceType::CPU) {
net.TransformDataFormat<DeviceType::CPU, float>(
"Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputNCHW")
.Input("Filter")
.Input("Bias")
.Output("OutputNCHW")
.AddIntsArg("strides", {1, 2})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
net.TransformDataFormat<DeviceType::CPU, float>(
"OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
} else if (D == DeviceType::GPU) {
OpDefBuilder("Conv2D", "Conv2dTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.OutputShape(output_shape)
.AddIntsArg("strides", {1, 2})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
.AddIntArg("wino_block_size", wino_blk_size)
.Finalize(net.NewOperatorDef());
net.RunOp(D);
} else {
MACE_NOT_IMPLEMENTED;
}
auto expected = net.CreateTensor<float>(
output_shape, {8.1f, 8.1f, 12.1f, 12.1f, 8.1f, 8.1f});
if (DataTypeToEnum<T>::value == DataType::DT_FLOAT) {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} else {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-3, 1e-3);
}
}
} // namespace } // namespace
TEST_F(Conv2dOpTest, CPUSimple) { TEST_F(Conv2dOpTest, CPUSimple) {
...@@ -171,7 +232,12 @@ TEST_F(Conv2dOpTest, OPENCLSimpleWinograd) { ...@@ -171,7 +232,12 @@ TEST_F(Conv2dOpTest, OPENCLSimpleWinograd) {
TestNHWCSimple3x3VALID<DeviceType::GPU, float>(2); TestNHWCSimple3x3VALID<DeviceType::GPU, float>(2);
TestNHWCSimple3x3VALID<DeviceType::GPU, half>(2); TestNHWCSimple3x3VALID<DeviceType::GPU, half>(2);
// TODO(liutuo): the precision of the last value is not normal. // TODO(liutuo): the precision of the last value is not normal.
// TestNHWCSimple3x3SAME<DeviceType::GPU, half>(4); // TestNHWCSimple3x3SAME<DeviceType::GPU, half>(4);
}
TEST_F(Conv2dOpTest, NotEqualStrideSimple) {
TestNHWCSimple3x3NeqStride<DeviceType::CPU, float>();
TestNHWCSimple3x3NeqStride<DeviceType::GPU, float>();
} }
namespace { namespace {
...@@ -186,7 +252,8 @@ void TestNHWCSimple3x3WithoutBias() { ...@@ -186,7 +252,8 @@ void TestNHWCSimple3x3WithoutBias() {
net.AddInputFromArray<D, T>( net.AddInputFromArray<D, T>(
"Filter", {1, 2, 3, 3}, "Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
true);
if (D == DeviceType::CPU) { if (D == DeviceType::CPU) {
net.TransformDataFormat<DeviceType::CPU, float>( net.TransformDataFormat<DeviceType::CPU, float>(
...@@ -316,7 +383,8 @@ void TestFusedNHWCSimple3x3VALID(int wino_blk_size = 0) { ...@@ -316,7 +383,8 @@ void TestFusedNHWCSimple3x3VALID(int wino_blk_size = 0) {
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {1, 2, 3, 3}, "Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
true);
net.AddInputFromArray<D, float>("Bias", {1}, {-0.1f}, true); net.AddInputFromArray<D, float>("Bias", {1}, {-0.1f}, true);
const std::vector<index_t> output_shape = {1, 1, 1, 1}; const std::vector<index_t> output_shape = {1, 1, 1, 1};
...@@ -372,7 +440,8 @@ void TestFusedNHWCSimple3x3WithoutBias(int wino_blk_size = 0) { ...@@ -372,7 +440,8 @@ void TestFusedNHWCSimple3x3WithoutBias(int wino_blk_size = 0) {
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {1, 2, 3, 3}, "Filter", {1, 2, 3, 3},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, {1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f}, true); 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f},
true);
const std::vector<index_t> output_shape = {1, 1, 1, 1}; const std::vector<index_t> output_shape = {1, 1, 1, 1};
if (D == DeviceType::CPU) { if (D == DeviceType::CPU) {
...@@ -502,16 +571,86 @@ void TestConv1x1() { ...@@ -502,16 +571,86 @@ void TestConv1x1() {
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5); ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
} }
template <DeviceType D>
void TestConv1x1NeqStride() {
// Construct graph
OpsTestNet net;
// Add input data
net.AddInputFromArray<D, float>(
"Input", {1, 3, 10, 5},
{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1});
net.AddInputFromArray<D, float>(
"Filter", {2, 5, 1, 1},
{1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 2.0f, 2.0f, 2.0f, 2.0f, 2.0f}, true);
net.AddInputFromArray<D, float>("Bias", {2}, {0.1f, 0.2f}, true);
if (D == DeviceType::CPU) {
net.TransformDataFormat<DeviceType::CPU, float>(
"Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("InputNCHW")
.Input("Filter")
.Input("Bias")
.Output("OutputNCHW")
.AddIntsArg("strides", {1, 2})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
net.TransformDataFormat<DeviceType::CPU, float>(
"OutputNCHW", DataFormat::NCHW, "Output", DataFormat::NHWC);
} else if (D == DeviceType::GPU) {
OpDefBuilder("Conv2D", "Conv2DTest")
.Input("Input")
.Input("Filter")
.Input("Bias")
.Output("Output")
.AddIntsArg("strides", {1, 2})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Run
net.RunOp(D);
} else {
MACE_NOT_IMPLEMENTED;
}
// Check
auto expected = net.CreateTensor<float>(
{1, 3, 5, 2},
{5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f,
5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f, 5.1f, 10.2f});
ExpectTensorNear<float>(*expected, *net.GetOutput("Output"), 1e-5);
}
} // namespace } // namespace
TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); } TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1<DeviceType::CPU>(); }
TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::GPU>(); } TEST_F(Conv2dOpTest, OPENCLConv1x1) { TestConv1x1<DeviceType::GPU>(); }
TEST_F(Conv2dOpTest, CPUConv1x1NotEqualStride) {
TestConv1x1NeqStride<DeviceType::CPU>();
}
TEST_F(Conv2dOpTest, OPENCLConv1x1NotEqualStride) {
TestConv1x1NeqStride<DeviceType::GPU>();
}
namespace { namespace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
void TestComplexConvNxNS12(const std::vector<index_t> &shape, void TestComplexConvNxN(const std::vector<index_t> &shape,
const int stride, const std::vector<int> strides,
const int wino_blk_size = 0) { const int wino_blk_size = 0) {
testing::internal::LogToStderr(); testing::internal::LogToStderr();
auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w, auto func = [&](int kernel_h, int kernel_w, int stride_h, int stride_w,
...@@ -581,35 +720,40 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape, ...@@ -581,35 +720,40 @@ void TestComplexConvNxNS12(const std::vector<index_t> &shape,
}; };
for (int kernel_size : {1, 3, 5, 7}) { for (int kernel_size : {1, 3, 5, 7}) {
func(kernel_size, kernel_size, stride, stride, VALID); func(kernel_size, kernel_size, strides[0], strides[1], VALID);
func(kernel_size, kernel_size, stride, stride, SAME); func(kernel_size, kernel_size, strides[0], strides[1], SAME);
} }
} }
} // namespace } // namespace
TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) { TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNS12) {
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 16, 16, 32}, 1); TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {1, 1});
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 16, 16, 32}, 2); TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {2, 2});
} }
TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNWinograd) { TEST_F(Conv2dOpTest, OPENCLAlignedConvNxNWinograd) {
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 16, 16, 32}, 1, 2); TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {1, 1}, 2);
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 16, 16, 32}, 1, 4); TestComplexConvNxN<DeviceType::GPU, float>({32, 16, 16, 32}, {1, 1}, 4);
} }
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) { TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS12) {
TestComplexConvNxNS12<DeviceType::GPU, float>({17, 113, 5, 7}, 1); TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {1, 1});
TestComplexConvNxNS12<DeviceType::GPU, float>({17, 113, 5, 7}, 2); TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {2, 2});
} }
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWinograd) { TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWinograd) {
TestComplexConvNxNS12<DeviceType::GPU, float>({17, 113, 5, 7}, 1, 4); TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {1, 1}, 4);
TestComplexConvNxNS12<DeviceType::GPU, float>({17, 113, 5, 7}, 1, 2); TestComplexConvNxN<DeviceType::GPU, float>({17, 113, 5, 7}, {1, 1}, 2);
} }
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) { TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNS34) {
TestComplexConvNxNS12<DeviceType::GPU, float>({31, 113, 13, 17}, 3); TestComplexConvNxN<DeviceType::GPU, float>({31, 113, 13, 17}, {3, 3});
TestComplexConvNxNS12<DeviceType::GPU, float>({32, 32, 13, 17}, 4); TestComplexConvNxN<DeviceType::GPU, float>({32, 32, 13, 17}, {4, 4});
}
TEST_F(Conv2dOpTest, OPENCLUnalignedConvNxNWithNotEqualStride) {
TestComplexConvNxN<DeviceType::GPU, float>({31, 113, 13, 17}, {1, 2});
TestComplexConvNxN<DeviceType::GPU, float>({32, 32, 13, 17}, {3, 4});
} }
namespace { namespace {
...@@ -646,9 +790,7 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape, ...@@ -646,9 +790,7 @@ void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
"Filter", {output_channels, input_channels, kernel_h, kernel_w}, "Filter", {output_channels, input_channels, kernel_h, kernel_w},
float_filter_data, true); float_filter_data, true);
net.AddInputFromArray<D, float>("Bias", net.AddInputFromArray<D, float>("Bias", {output_channels}, float_bias_data,
{output_channels},
float_bias_data,
true); true);
net.TransformDataFormat<DeviceType::CPU, float>( net.TransformDataFormat<DeviceType::CPU, float>(
...@@ -713,11 +855,11 @@ TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) { ...@@ -713,11 +855,11 @@ TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3S12) {
} }
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3Winograd) { TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv3x3Winograd) {
TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64}, TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64}, {1, 1},
{1, 1}, 2); 2);
// TODO(liutuo) : the precision error is large. // TODO(liutuo) : the precision error is large.
// TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64}, // TestHalfComplexConvNxNS12<DeviceType::GPU>({32, 32}, {3, 3, 32, 64},
// {1, 1}, 4); // {1, 1}, 4);
} }
TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv5x5S12) { TEST_F(Conv2dOpTest, OPENCLHalfAlignedConv5x5S12) {
...@@ -769,10 +911,10 @@ TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) { ...@@ -769,10 +911,10 @@ TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3S12) {
TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3Winograd) { TEST_F(Conv2dOpTest, OPENCLHalfUnalignedConv3x3Winograd) {
// TODO(liutuo) : the precision error is large. // TODO(liutuo) : the precision error is large.
// TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7}, // TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7},
// {1, 1}, 4); // {1, 1}, 4);
TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7}, TestHalfComplexConvNxNS12<DeviceType::GPU>({107, 113}, {3, 3, 5, 7}, {1, 1},
{1, 1}, 2); 2);
} }
TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) { TEST_F(Conv2dOpTest, OPENCLHalfConv5x5Dilation2) {
...@@ -1045,10 +1187,10 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) { ...@@ -1045,10 +1187,10 @@ TEST_F(Conv2dOpTest, OPENCLAlignedPad2) {
} }
TEST_F(Conv2dOpTest, OPENCLAlignedPad2Winograd) { TEST_F(Conv2dOpTest, OPENCLAlignedPad2Winograd) {
TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, {2, 2},
{2, 2}, 2); 2);
TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, TestArbitraryPadConvNxN<DeviceType::GPU, float>({128, 128, 16, 16}, {2, 2},
{2, 2}, 4); 4);
} }
TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) { TEST_F(Conv2dOpTest, OPENCLUnalignedPad4) {
...@@ -1071,15 +1213,15 @@ void TestQuantSimple3x3() { ...@@ -1071,15 +1213,15 @@ void TestQuantSimple3x3() {
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, uint8_t>( net.AddInputFromArray<DeviceType::CPU, uint8_t>(
"Filter", {1, 3, 3, 2}, "Filter", {1, 3, 3, 2}, {102, 150, 123, 135, 1, 216, 137, 47, 53, 75, 145,
{102, 150, 123, 135, 1, 216, 137, 47, 53, 75, 145, 130, 171, 62, 255, 130, 171, 62, 255, 122, 72, 211},
122, 72, 211}, true, 0.0226, 127); true, 0.0226, 127);
net.AddInputFromArray<DeviceType::CPU, uint8_t>( net.AddInputFromArray<DeviceType::CPU, uint8_t>(
"Input", {1, 3, 3, 2}, "Input", {1, 3, 3, 2}, {1, 75, 117, 161, 127, 119, 94, 151, 203, 151, 84,
{1, 75, 117, 161, 127, 119, 94, 151, 203, 151, 84, 61, 55, 142, 113, 139, 61, 55, 142, 113, 139, 3, 255},
3, 255}, false, 0.0204, 93); false, 0.0204, 93);
net.AddInputFromArray<DeviceType::CPU, int32_t>( net.AddInputFromArray<DeviceType::CPU, int32_t>("Bias", {1}, {2}, true,
"Bias", {1}, {2}, true, 0.00046104, 0); 0.00046104, 0);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("Input") .Input("Input")
...@@ -1113,17 +1255,15 @@ void TestQuant(const index_t batch, ...@@ -1113,17 +1255,15 @@ void TestQuant(const index_t batch,
enum Padding padding_type, enum Padding padding_type,
const std::vector<int> &strides) { const std::vector<int> &strides) {
OpsTestNet net; OpsTestNet net;
net.AddRandomInput<CPU, float>("Input", {batch, in_height, in_width, net.AddRandomInput<CPU, float>("Input",
in_channels}); {batch, in_height, in_width, in_channels});
net.AddRandomInput<CPU, float>("Filter", {out_channels, k_height, k_width, net.AddRandomInput<CPU, float>(
in_channels}, true); "Filter", {out_channels, k_height, k_width, in_channels}, true);
net.AddRandomInput<CPU, float>("Bias", {out_channels}, true); net.AddRandomInput<CPU, float>("Bias", {out_channels}, true);
net.TransformDataFormat<DeviceType::CPU, float>( net.TransformDataFormat<DeviceType::CPU, float>(
"Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW); "Input", DataFormat::NHWC, "InputNCHW", DataFormat::NCHW);
net.TransformFilterDataFormat<DeviceType::CPU, float>("Filter", net.TransformFilterDataFormat<DeviceType::CPU, float>(
DataFormat::OHWI, "Filter", DataFormat::OHWI, "FilterOIHW", DataFormat::OIHW);
"FilterOIHW",
DataFormat::OIHW);
OpDefBuilder("Conv2D", "Conv2dTest") OpDefBuilder("Conv2D", "Conv2dTest")
.Input("InputNCHW") .Input("InputNCHW")
...@@ -1172,10 +1312,10 @@ void TestQuant(const index_t batch, ...@@ -1172,10 +1312,10 @@ void TestQuant(const index_t batch,
auto bias_data = bias->data<float>(); auto bias_data = bias->data<float>();
float bias_scale = q_input->scale() * q_filter->scale(); float bias_scale = q_input->scale() * q_filter->scale();
std::vector<int32_t> q_bias(bias->size()); std::vector<int32_t> q_bias(bias->size());
QuantizeUtil<float, int32_t> QuantizeUtil<float, int32_t> quantize_util(
quantize_util(OpTestContext::Get()->thread_pool()); OpTestContext::Get()->thread_pool());
quantize_util.QuantizeWithScaleAndZeropoint( quantize_util.QuantizeWithScaleAndZeropoint(bias_data, bias->size(),
bias_data, bias->size(), bias_scale, 0, q_bias.data()); bias_scale, 0, q_bias.data());
net.AddInputFromArray<DeviceType::CPU, int32_t>( net.AddInputFromArray<DeviceType::CPU, int32_t>(
"QuantizedBias", {out_channels}, q_bias, true, bias_scale, 0); "QuantizedBias", {out_channels}, q_bias, true, bias_scale, 0);
......
...@@ -180,7 +180,6 @@ MaceTransposableDataFormatOps = [MaceOp.Activation, ...@@ -180,7 +180,6 @@ MaceTransposableDataFormatOps = [MaceOp.Activation,
MaceOp.Eltwise, MaceOp.Eltwise,
MaceOp.Pad, MaceOp.Pad,
MaceOp.Reduce, MaceOp.Reduce,
MaceOp.Reshape,
MaceOp.Softmax, MaceOp.Softmax,
MaceOp.Split, MaceOp.Split,
MaceOp.Squeeze, MaceOp.Squeeze,
......
...@@ -121,9 +121,9 @@ class HexagonConverter(base_converter.ConverterInterface): ...@@ -121,9 +121,9 @@ class HexagonConverter(base_converter.ConverterInterface):
# convert op node # convert op node
self.convert_ops() self.convert_ops()
self.convert_input_output_node() model_inputs = self.convert_input_output_node()
self.add_node_id() self.add_node_id(model_inputs)
return self._model return self._model
...@@ -235,8 +235,11 @@ class HexagonConverter(base_converter.ConverterInterface): ...@@ -235,8 +235,11 @@ class HexagonConverter(base_converter.ConverterInterface):
for input_node in self._option.input_nodes.values(): for input_node in self._option.input_nodes.values():
op_name = normalize_name( op_name = normalize_name(
MaceKeyword.mace_input_node_name + '_' + input_node.name) MaceKeyword.mace_input_node_name + '_' + input_node.name)
op = first_quantize_input_op \ if op_name == first_quantize_input_op.name:
if op_name == first_quantize_input_op.name else ops[op_name] op = first_quantize_input_op
quantize_input_op.name = MaceKeyword.mace_input_node_name
else:
op = ops[op_name]
mace_check(op.type == HexagonOp.QuantizeINPUT_f_to_8.name, mace_check(op.type == HexagonOp.QuantizeINPUT_f_to_8.name,
"input node type is: %s" % op.type) "input node type is: %s" % op.type)
quantize_input_op.output.extend(op.output) quantize_input_op.output.extend(op.output)
...@@ -276,7 +279,9 @@ class HexagonConverter(base_converter.ConverterInterface): ...@@ -276,7 +279,9 @@ class HexagonConverter(base_converter.ConverterInterface):
dequantize_output_op.type = HexagonOp.OUTPUT.name dequantize_output_op.type = HexagonOp.OUTPUT.name
del dequantize_output_op.input[1:] del dequantize_output_op.input[1:]
def add_node_id(self): return quantize_input_op.output
def add_node_id(self, model_inputs):
node_id_counter = 0 node_id_counter = 0
node_id_map = {} node_id_map = {}
for tensor in self._model.tensors: for tensor in self._model.tensors:
...@@ -305,7 +310,11 @@ class HexagonConverter(base_converter.ConverterInterface): ...@@ -305,7 +310,11 @@ class HexagonConverter(base_converter.ConverterInterface):
node_id = node_id_map[tensor_name] node_id = node_id_map[tensor_name]
node_input = op.node_input.add() node_input = op.node_input.add()
node_input.node_id = node_id node_input.node_id = node_id
node_input.output_port = int(port) if tensor_name in model_inputs:
for i in range(len(model_inputs)):
if model_inputs[i] == tensor_name:
port += i * 3
node_input.output_port = port
def convert_ops(self): def convert_ops(self):
print("Convert mace graph to hexagon.") print("Convert mace graph to hexagon.")
......
...@@ -1075,6 +1075,7 @@ class TensorflowConverter(base_converter.ConverterInterface): ...@@ -1075,6 +1075,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
if tf_op.type == TFOpType.FakeQuantWithMinMaxVars.name: if tf_op.type == TFOpType.FakeQuantWithMinMaxVars.name:
self._skip_tensor.add(tf_op.inputs[1].name) self._skip_tensor.add(tf_op.inputs[1].name)
self._skip_tensor.add(tf_op.inputs[2].name) self._skip_tensor.add(tf_op.inputs[2].name)
del op.input[1:]
def convert_cumsum(self, tf_op): def convert_cumsum(self, tf_op):
op = self.convert_general_op(tf_op) op = self.convert_general_op(tf_op)
......
...@@ -1734,6 +1734,7 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1734,6 +1734,7 @@ class Transformer(base_converter.ConverterInterface):
for op in net.op: for op in net.op:
if op.type == 'FakeQuantWithMinMaxVars' or \ if op.type == 'FakeQuantWithMinMaxVars' or \
op.type == 'FakeQuantWithMinMaxArgs': op.type == 'FakeQuantWithMinMaxArgs':
if op.input[0] not in self._consts:
producer_op = self._producer[op.input[0]] producer_op = self._producer[op.input[0]]
minval = ConverterUtil.get_arg(op, 'min').f minval = ConverterUtil.get_arg(op, 'min').f
maxval = ConverterUtil.get_arg(op, 'max').f maxval = ConverterUtil.get_arg(op, 'max').f
...@@ -1741,7 +1742,8 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1741,7 +1742,8 @@ class Transformer(base_converter.ConverterInterface):
self.add_quantize_info(producer_op, minval, maxval) self.add_quantize_info(producer_op, minval, maxval)
self._quantize_activation_info[op.input[0]] = quantize_info self._quantize_activation_info[op.input[0]] = quantize_info
# for add -> fakequant pattern # for add -> fakequant pattern
self._quantize_activation_info[op.output[0]] = quantize_info self._quantize_activation_info[op.output[0]] = \
quantize_info
print(op.input[0], op.output[0]) print(op.input[0], op.output[0])
op.type = MaceOp.Identity.name op.type = MaceOp.Identity.name
...@@ -1850,6 +1852,8 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1850,6 +1852,8 @@ class Transformer(base_converter.ConverterInterface):
quantize_info.scale = scale quantize_info.scale = scale
quantize_info.zero_point = zero quantize_info.zero_point = zero
self._quantize_activation_info[new_input_name] = quantize_info self._quantize_activation_info[new_input_name] = quantize_info
input_op = self._producer[input_node.name]
input_op.quantize_info.extend([quantize_info])
print("Add default quantize info for ops like Pooling, Softmax") print("Add default quantize info for ops like Pooling, Softmax")
for op in self._model.op: for op in self._model.op:
...@@ -1904,8 +1908,8 @@ class Transformer(base_converter.ConverterInterface): ...@@ -1904,8 +1908,8 @@ class Transformer(base_converter.ConverterInterface):
elif (op.type == MaceOp.Eltwise.name elif (op.type == MaceOp.Eltwise.name
and not op.quantize_info and not op.quantize_info
and len(op.input) == 2 and len(op.input) == 2
and len(op.input[0]) not in self._consts and op.input[0] not in self._consts
and len(op.input[1]) not in self._consts): and op.input[1] not in self._consts):
producer_op0 = self._producer[op.input[0]] producer_op0 = self._producer[op.input[0]]
producer_op1 = self._producer[op.input[1]] producer_op1 = self._producer[op.input[1]]
if ConverterUtil.get_arg( if ConverterUtil.get_arg(
......
...@@ -65,8 +65,8 @@ class MaceLogger: ...@@ -65,8 +65,8 @@ class MaceLogger:
+ CMDColors.ENDC) + CMDColors.ENDC)
@staticmethod @staticmethod
def error(message): def error(message, level=2):
print(CMDColors.RED + 'ERROR: ' + get_frame_info() + str(message) print(CMDColors.RED + 'ERROR: ' + get_frame_info(level) + str(message)
+ CMDColors.ENDC) + CMDColors.ENDC)
exit(1) exit(1)
...@@ -76,7 +76,7 @@ def mace_check(condition, message): ...@@ -76,7 +76,7 @@ def mace_check(condition, message):
for line in traceback.format_stack(): for line in traceback.format_stack():
print(line.strip()) print(line.strip())
MaceLogger.error(message) MaceLogger.error(message, level=3)
################################ ################################
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册