未验证 提交 42d17538 编写于 作者: W Wojciech Uss 提交者: GitHub

Add support for (de/re)quantization with shift (#27481)

上级 8da2b16d
...@@ -31,9 +31,10 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType( ...@@ -31,9 +31,10 @@ framework::OpKernelType DeQuantOp::GetExpectedKernelType(
} }
void DeQuantOpMaker::Make() { void DeQuantOpMaker::Make() {
AddInput("Input", "input data"); AddInput("Input", "Input data");
AddOutput("Output", "output data"); AddOutput("Output", "Output data");
AddAttr<float>("Scale", "scale data").SetDefault({1.0f}); AddAttr<float>("Scale", "Scale data").SetDefault({1.0f});
AddAttr<float>("Shift", "Shift data").SetDefault({0.0f});
AddComment(R"DOC(This op will dequantize data from INT8 to FP32)DOC"); AddComment(R"DOC(This op will dequantize data from INT8 to FP32)DOC");
} }
......
...@@ -16,6 +16,7 @@ limitations under the License. */ ...@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/dequantize_op.h" #include "paddle/fluid/operators/dequantize_op.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/mkldnn_helper.h" #include "paddle/fluid/platform/mkldnn_helper.h"
#include "paddle/fluid/platform/mkldnn_reuse.h" #include "paddle/fluid/platform/mkldnn_reuse.h"
...@@ -37,14 +38,29 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -37,14 +38,29 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto scale_data = ctx.Attr<float>("Scale"); auto scale_data = ctx.Attr<float>("Scale");
auto scale_shift = ctx.Attr<float>("Shift");
bool with_shift = scale_shift != 0.0f;
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
PADDLE_ENFORCE_NE(scale_data, 0.0f,
platform::errors::InvalidArgument(
"Dequantization scale cannot be 0.0"));
PADDLE_ENFORCE_GE(scale_shift, 0,
platform::errors::Unimplemented(
"Dequantization shift must be nonnegative."));
PADDLE_ENFORCE_LE(
scale_shift, 255,
platform::errors::Unimplemented(
"Dequantization shift must be less than or equal to 255."));
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine(); const auto& engine = dev_ctx.GetEngine();
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
float* output_data = output->mutable_data<float>(ctx.GetPlace()); float* output_data = output->mutable_data<float>(ctx.GetPlace());
std::vector<float> reorder_scale = {1.0f / scale_data};
float reorder_shift = -scale_shift / scale_data;
auto src_tz = paddle::framework::vectorize<int64_t>(input->dims()); auto src_tz = paddle::framework::vectorize<int64_t>(input->dims());
auto dst_tz = paddle::framework::vectorize<int64_t>(output->dims()); auto dst_tz = paddle::framework::vectorize<int64_t>(output->dims());
...@@ -65,7 +81,15 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -65,7 +81,15 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
if (reorder_p == nullptr) { if (reorder_p == nullptr) {
mkldnn::primitive_attr attri; mkldnn::primitive_attr attri;
int mask = 0; int mask = 0;
attri.set_output_scales(mask, reorder_scale); float reorder_scale = 1. / scale_data;
attri.set_output_scales(mask, {reorder_scale});
if (with_shift) {
mkldnn::post_ops post_operations;
post_operations.append_sum();
attri.set_post_ops(post_operations);
std::fill(output_data, output_data + output->numel(), reorder_shift);
}
auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt);
src_memory = std::make_shared<mkldnn::memory>( src_memory = std::make_shared<mkldnn::memory>(
...@@ -92,6 +116,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> { ...@@ -92,6 +116,8 @@ class DeQuantOpKernel : public framework::OpKernel<T> {
dst_memory = std::static_pointer_cast<mkldnn::memory>( dst_memory = std::static_pointer_cast<mkldnn::memory>(
dev_ctx.GetBlob(key_dst_mem)); dev_ctx.GetBlob(key_dst_mem));
if (with_shift)
std::fill(output_data, output_data + output->numel(), reorder_shift);
dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace())); dst_memory->set_data_handle(output->mutable_data<float>(ctx.GetPlace()));
} }
......
...@@ -36,7 +36,21 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -36,7 +36,21 @@ class QuantOpKernel : public framework::OpKernel<T> {
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto scale_data = ctx.Attr<float>("Scale"); auto scale_data = ctx.Attr<float>("Scale");
auto scale_shift = ctx.Attr<float>("Shift");
bool with_shift = scale_shift != 0.0f;
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
PADDLE_ENFORCE_NE(
scale_data, 0.0f,
platform::errors::InvalidArgument("Quantization scale cannot be 0.0"));
PADDLE_ENFORCE_GE(scale_shift, 0,
platform::errors::Unimplemented(
"Quantization shift must be nonnegative."));
PADDLE_ENFORCE_LE(
scale_shift, 255,
platform::errors::Unimplemented(
"Quantization shift must be less than or equal to 255."));
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine(); const auto& engine = dev_ctx.GetEngine();
...@@ -47,11 +61,12 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -47,11 +61,12 @@ class QuantOpKernel : public framework::OpKernel<T> {
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
bool is_negative = ctx.Attr<bool>("is_negative_input"); bool is_negative_input = ctx.Attr<bool>("is_negative_input");
bool bfloat16 = ctx.Attr<bool>("bfloat16"); bool bfloat16 = ctx.Attr<bool>("bfloat16");
std::string key =
platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_data, std::string key = platform::CreateKey(
is_negative, ctx.OutputName("Output")); platform::ThreadIDasStr(), src_tz, scale_data, scale_shift,
is_negative_input, ctx.OutputName("Output"));
const std::string key_prim = key + "@r"; const std::string key_prim = key + "@r";
const std::string key_src_mem = key + "@s"; const std::string key_src_mem = key + "@s";
const std::string key_dst_mem = key + "@d"; const std::string key_dst_mem = key + "@d";
...@@ -69,6 +84,15 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -69,6 +84,15 @@ class QuantOpKernel : public framework::OpKernel<T> {
int mask = 0; int mask = 0;
attri.set_output_scales(mask, {scale_data}); attri.set_output_scales(mask, {scale_data});
if (with_shift) {
mkldnn::post_ops post_operations;
post_operations.append_sum();
attri.set_post_ops(post_operations);
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
// memset casts scale_shift to unsigned char (uint8_t) internally
std::memset(output_data, scale_shift, output->numel());
}
auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32, auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32,
input->format()); input->format());
src_memory = std::make_shared<mkldnn::memory>( src_memory = std::make_shared<mkldnn::memory>(
...@@ -78,7 +102,7 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -78,7 +102,7 @@ class QuantOpKernel : public framework::OpKernel<T> {
if (bfloat16) { if (bfloat16) {
platform::SetDstMemoryQuantized<paddle::platform::bfloat16>( platform::SetDstMemoryQuantized<paddle::platform::bfloat16>(
ctx, output, dst_tz, engine, dst_md, dst_memory, out_format); ctx, output, dst_tz, engine, dst_md, dst_memory, out_format);
} else if (is_negative) { } else if (is_negative_input && !with_shift) {
platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine, platform::SetDstMemoryQuantized<int8_t>(ctx, output, dst_tz, engine,
dst_md, dst_memory, out_format); dst_md, dst_memory, out_format);
} else { } else {
...@@ -104,10 +128,13 @@ class QuantOpKernel : public framework::OpKernel<T> { ...@@ -104,10 +128,13 @@ class QuantOpKernel : public framework::OpKernel<T> {
if (bfloat16) { if (bfloat16) {
dst_memory->set_data_handle( dst_memory->set_data_handle(
output->mutable_data<paddle::platform::bfloat16>(place)); output->mutable_data<paddle::platform::bfloat16>(place));
} else if (is_negative) { } else if (with_shift || !is_negative_input) {
dst_memory->set_data_handle(output->mutable_data<int8_t>(place)); uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
if (with_shift) std::memset(output_data, scale_shift, output->numel());
dst_memory->set_data_handle(output_data);
} else { } else {
dst_memory->set_data_handle(output->mutable_data<uint8_t>(place)); dst_memory->set_data_handle(
output->mutable_data<int8_t>(ctx.GetPlace()));
} }
} }
......
...@@ -26,20 +26,45 @@ using dnnl::reorder; ...@@ -26,20 +26,45 @@ using dnnl::reorder;
using platform::to_void_cast; using platform::to_void_cast;
using Tensor = framework::Tensor; using Tensor = framework::Tensor;
namespace {
inline uint8_t clip_to_uint8(float x) {
return std::max(0L, std::min(255L, std::lround(x)));
}
} // namespace
template <typename T> template <typename T>
class ReQuantOpKernel : public framework::OpKernel<T> { class ReQuantOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto* input = ctx.Input<Tensor>("Input"); auto* input = ctx.Input<Tensor>("Input");
auto scale_in = ctx.Attr<float>("Scale_in"); auto scale_in = ctx.Attr<float>("Scale_in");
auto shift_in = ctx.Attr<float>("Shift_in");
auto scale_out = ctx.Attr<float>("Scale_out"); auto scale_out = ctx.Attr<float>("Scale_out");
auto shift_out = ctx.Attr<float>("Shift_out");
bool with_shift = shift_in != 0.0f || shift_out != 0.0f;
auto* output = ctx.Output<Tensor>("Output"); auto* output = ctx.Output<Tensor>("Output");
PADDLE_ENFORCE_NE(scale_in, 0.0f, platform::errors::InvalidArgument(
"Scale of input cannot be 0.0"));
PADDLE_ENFORCE_NE(scale_out, 0.0f, platform::errors::InvalidArgument(
"Scale of output cannot be 0.0"));
if (shift_in != 0.0f) {
PADDLE_ENFORCE_EQ(
input->type(), framework::proto::VarType::UINT8,
platform::errors::Unimplemented("Requantize does not support nonzero "
"shift for signed input."));
}
auto& dev_ctx = auto& dev_ctx =
ctx.template device_context<platform::MKLDNNDeviceContext>(); ctx.template device_context<platform::MKLDNNDeviceContext>();
const auto& engine = dev_ctx.GetEngine(); const auto& engine = dev_ctx.GetEngine();
auto src_tz = paddle::framework::vectorize(input->dims()); auto src_tz = paddle::framework::vectorize(input->dims());
float reorder_scale = scale_out / scale_in;
std::string key = std::string key =
platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_in, platform::CreateKey(platform::ThreadIDasStr(), src_tz, scale_in,
scale_out, ctx.OutputName("Output")); scale_out, ctx.OutputName("Output"));
...@@ -53,28 +78,37 @@ class ReQuantOpKernel : public framework::OpKernel<T> { ...@@ -53,28 +78,37 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim)); reorder_p = std::static_pointer_cast<reorder>(dev_ctx.GetBlob(key_prim));
const T* input_data = input->data<T>(); const T* input_data = input->data<T>();
T* output_data = output->mutable_data<T>(ctx.GetPlace());
if (reorder_p == nullptr) { if (reorder_p == nullptr) {
dnnl::primitive_attr attri; auto dst_tz = framework::vectorize(output->dims());
int mask = 0; auto src_dt = framework::ToMKLDNNDataType(input->type());
float scale_shift = scale_out / scale_in; auto dst_dt = with_shift ? framework::MKLDNNDataType::u8 : src_dt;
attri.set_output_scales(mask, {scale_shift});
auto dst_tz = paddle::framework::vectorize(output->dims());
dnnl::memory::data_type src_dt =
paddle::framework::ToMKLDNNDataType(input->type());
dnnl::memory::data_type dst_dt = src_dt;
auto src_md = auto src_md =
platform::MKLDNNMemDesc({src_tz}, src_dt, MKLDNNMemoryFormat::nhwc); platform::MKLDNNMemDesc({src_tz}, src_dt, MKLDNNMemoryFormat::nhwc);
src_memory = std::make_shared<dnnl::memory>(src_md, engine, src_memory = std::make_shared<dnnl::memory>(src_md, engine,
to_void_cast<T>(input_data)); to_void_cast<T>(input_data));
auto dst_md = auto dst_md =
platform::MKLDNNMemDesc({dst_tz}, dst_dt, MKLDNNMemoryFormat::nhwc); platform::MKLDNNMemDesc({dst_tz}, dst_dt, MKLDNNMemoryFormat::nhwc);
dst_memory = std::make_shared<dnnl::memory>(dst_md, engine,
to_void_cast<T>(output_data)); dnnl::primitive_attr attri;
int mask = 0;
attri.set_output_scales(mask, {reorder_scale});
if (with_shift) {
mkldnn::post_ops post_operations;
post_operations.append_sum();
attri.set_post_ops(post_operations);
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
uint8_t reorder_shift =
clip_to_uint8(shift_out - reorder_scale * shift_in);
std::memset(output_data, reorder_shift, output->numel());
dst_memory = std::make_shared<dnnl::memory>(
dst_md, engine, to_void_cast<uint8_t>(output_data));
} else {
T* output_data = output->mutable_data<T>(ctx.GetPlace());
dst_memory = std::make_shared<dnnl::memory>(
dst_md, engine, to_void_cast<T>(output_data));
}
auto reorder_pd = auto reorder_pd =
reorder::primitive_desc(*src_memory, *dst_memory, attri); reorder::primitive_desc(*src_memory, *dst_memory, attri);
...@@ -90,7 +124,17 @@ class ReQuantOpKernel : public framework::OpKernel<T> { ...@@ -90,7 +124,17 @@ class ReQuantOpKernel : public framework::OpKernel<T> {
dst_memory = dst_memory =
std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(key_dst_mem)); std::static_pointer_cast<dnnl::memory>(dev_ctx.GetBlob(key_dst_mem));
dst_memory->set_data_handle(output_data); if (with_shift) {
uint8_t* output_data = output->mutable_data<uint8_t>(ctx.GetPlace());
uint8_t reorder_shift =
clip_to_uint8(shift_out - reorder_scale * shift_in);
std::memset(output_data, reorder_shift, output->numel());
dst_memory->set_data_handle(output_data);
} else {
T* output_data = output->mutable_data<T>(ctx.GetPlace());
dst_memory->set_data_handle(output_data);
}
} }
dnnl::stream astream(engine); dnnl::stream astream(engine);
......
...@@ -31,12 +31,16 @@ framework::OpKernelType QuantOp::GetExpectedKernelType( ...@@ -31,12 +31,16 @@ framework::OpKernelType QuantOp::GetExpectedKernelType(
} }
void QuantOpMaker::Make() { void QuantOpMaker::Make() {
AddInput("Input", "input data"); AddInput("Input", "Input data");
AddOutput("Output", "output data"); AddOutput("Output", "Output data");
AddAttr<bool>("is_negative_input", AddAttr<bool>("is_negative_input",
"(bool, default false) Only used in mkldnn INT8 kernel") "(bool, default false) Only used in mkldnn INT8 kernel")
.SetDefault(false); .SetDefault(false);
AddAttr<float>("Scale", "scale data").SetDefault({1.0f}); AddAttr<float>("Scale", "Scale data").SetDefault({1.0f});
AddAttr<float>(
"Shift",
"Shift data. When Shift is non-zero, data is quantized to unsigned int8.")
.SetDefault({0.0f});
AddAttr<std::string>("output_format", AddAttr<std::string>("output_format",
"Convert format to NHWC or NCHW during quantization.") "Convert format to NHWC or NCHW during quantization.")
.SetDefault("NHWC"); .SetDefault("NHWC");
......
...@@ -31,10 +31,12 @@ framework::OpKernelType ReQuantOp::GetExpectedKernelType( ...@@ -31,10 +31,12 @@ framework::OpKernelType ReQuantOp::GetExpectedKernelType(
} }
void ReQuantOpMaker::Make() { void ReQuantOpMaker::Make() {
AddInput("Input", "input data"); AddInput("Input", "Input data");
AddOutput("Output", "output data"); AddOutput("Output", "Output data");
AddAttr<float>("Scale_in", "scale in data").SetDefault({1.0f}); AddAttr<float>("Scale_in", "Scale in data").SetDefault({1.0f});
AddAttr<float>("Scale_out", "scale out data").SetDefault({1.0f}); AddAttr<float>("Scale_out", "Scale out data").SetDefault({1.0f});
AddAttr<float>("Shift_in", "Shift in data").SetDefault({1.0f});
AddAttr<float>("Shift_out", "Shift out data").SetDefault({1.0f});
AddComment( AddComment(
R"DOC(This op will re-quantize data from INT8 with scale_in to INT8 with scale_out)DOC"); R"DOC(This op will re-quantize data from INT8 with scale_in to INT8 with scale_out)DOC");
} }
......
...@@ -22,40 +22,69 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 ...@@ -22,40 +22,69 @@ from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16
class TestDeQuantizeOp(OpTest): class TestDeQuantizeOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'dequantize' self.op_type = 'dequantize'
self.scale = 2.0 self.scale = 127.0
self.input_size = [1, 1, 5, 5] #Naive nChw16c self.shift = 0.0
self.input_size = [1, 1, 5, 5] # Naive nChw16c
self.data_type = 'int8' self.data_type = 'int8'
self.set_scale() self.set_scale()
self.set_shift()
self.set_data_type() self.set_data_type()
self.set_input_size()
if self.data_type == 'uint16':
self.prepare_input_output_bf16()
else:
self.prepare_input_int8()
self.prepare_output_int8()
def prepare_input_output_bf16(self):
output = np.random.random(self.input_size).astype(np.float32)
input = convert_float_to_uint16(output)
self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
self.outputs = {'Output': output}
def prepare_input_int8(self):
if self.data_type == 'int8': if self.data_type == 'int8':
input = (np.random.randint(0, 100, self.input_size) - 50 # input data values are integers from interval [-128, 128)
).astype(self.data_type) self.input = (np.random.randint(0, 256, self.input_size) - 128
output = (input * (1 / self.scale)).astype('float') ).astype(self.data_type)
elif self.data_type == 'uint16':
output = np.random.random(self.input_size).astype(np.float32)
input = convert_float_to_uint16(output)
else: else:
input = (np.random.randint(0, 100, # input data values are integers from interval [0, 256)
self.input_size)).astype(self.data_type) self.input = (np.random.randint(
output = (input * (1 / self.scale)).astype('float') 0, 256, self.input_size)).astype(self.data_type)
self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)} self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
self.attrs = {'Scale': self.scale, 'Shift': self.shift}
def prepare_output_int8(self):
output = (self.input / self.scale -
(self.shift / self.scale)).astype('float')
self.outputs = {'Output': output} self.outputs = {'Output': output}
self.attrs = {'Scale': self.scale, }
def test_check_output(self): def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode # TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_output(check_dygraph=False) self.check_output(check_dygraph=False)
def check_raise_error(self, msg):
try:
self.check_output()
except Exception as e:
if msg in str(e):
raise AttributeError
else:
print(e)
def set_scale(self): def set_scale(self):
pass pass
def set_shift(self):
pass
def set_data_type(OpTest): def set_data_type(OpTest):
pass pass
def set_input_size(self):
pass
class TestDeQuantizeOp1(TestDeQuantizeOp): class TestDeQuantizeOp1(TestDeQuantizeOp):
def set_scale(self): def set_scale(self):
...@@ -81,5 +110,95 @@ class TestDeQuantizeOpBf16(TestDeQuantizeOp): ...@@ -81,5 +110,95 @@ class TestDeQuantizeOpBf16(TestDeQuantizeOp):
self.data_type = 'uint16' self.data_type = 'uint16'
class TestDeQuantizeOp_ZeroScale(TestDeQuantizeOp):
def set_scale(self):
self.scale = 0.0
def prepare_output_int8(self):
self.output = np.zeros(self.input_size)
self.outputs = {'Output': self.output}
def test_check_output(self):
self.assertRaises(AttributeError, self.check_raise_error,
'Dequantization scale cannot be 0.0')
# 2-dim input
# P - positive input, with shift
class TestDeQuantizeOpShift_2_P(TestDeQuantizeOp):
def set_data_type(self):
self.data_type = 'uint8'
def set_scale(self):
self.scale = 255.0
def set_shift(self):
self.shift = 128.0
def set_input_size(self):
self.input_size = [2, 3]
# 2-dim input
# N - negative input, with shift
class TestDeQuantizeOpShift_2_N(TestDeQuantizeOpShift_2_P):
def set_data_type(self):
self.data_type = 'int8'
def set_scale(self):
self.scale = 127.0
def set_shift(self):
self.shift = 10.0
def set_input_size(self):
self.input_size = [2, 3]
# 3-dim input
class TestDeQuantizeOpShift_3_P(TestDeQuantizeOpShift_2_P):
def set_input_size(self):
self.input_size = [2, 3, 4]
class TestDeQuantizeOpShift_3_N(TestDeQuantizeOpShift_2_N):
def set_input_size(self):
self.input_size = [2, 3, 4]
# 4-dim input
class TestDeQuantizeOpShift_4_P(TestDeQuantizeOpShift_2_P):
def set_input_size(self):
self.input_size = [2, 3, 4, 5]
class TestDeQuantizeOpShift_4_N(TestDeQuantizeOpShift_2_N):
def set_input_size(self):
self.input_size = [2, 3, 4, 5]
class TestDeQuantizeOp_NegativeShift(TestDeQuantizeOp):
def set_shift(self):
self.shift = -10.0
def prepare_output_int8(self):
self.output = np.zeros(self.input_size)
self.outputs = {'Output': self.output}
def test_check_output(self):
self.assertRaises(AttributeError, self.check_raise_error,
'Dequantization shift must be nonnegative.')
class TestDeQuantizeOp_TooBigShift(TestDeQuantizeOp_NegativeShift):
def set_shift(self):
self.shift = 300.0
def test_check_output(self):
self.assertRaises(
AttributeError, self.check_raise_error,
'Dequantization shift must be less than or equal to 255.')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -22,44 +22,75 @@ from paddle.fluid.tests.unittests.op_test import OpTest ...@@ -22,44 +22,75 @@ from paddle.fluid.tests.unittests.op_test import OpTest
class TestQuantizeOp(OpTest): class TestQuantizeOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'quantize' self.op_type = 'quantize'
self.scale = 2.0 self.scale = 255.0
self.input_size = [1, 1, 5, 5] #Naive nChw16c self.shift = 0.0
self.input_size = [1, 1, 5, 5] # Naive nChw16c
self.is_negative = False self.is_negative = False
self.output_format = 'NCHW'
self.set_scale() self.set_scale()
self.set_shift()
self.set_is_negative() self.set_is_negative()
self.set_input_size()
self.set_output_format()
self.prepare_input()
self.prepare_output()
def prepare_input(self):
if self.is_negative: if self.is_negative:
input = (100 * np.random.random_sample(self.input_size) - 50 # input data values are from interval [-1.0, 1.0)
).astype('float32') self.input = (2 * np.random.random_sample(self.input_size) - 1
output = np.round(input * self.scale).astype('int8') ).astype('float32')
else: else:
input = (100 * # input data values are from interval [0.0, 1.0)
np.random.random_sample(self.input_size)).astype('float32') self.input = (
output = np.round(input * self.scale).astype('uint8') np.random.random_sample(self.input_size)).astype('float32')
self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(input)}
self.outputs = {'Output': output}
self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
self.attrs = { self.attrs = {
'Scale': self.scale, 'Scale': self.scale,
'is_negative_input': self.is_negative 'Shift': self.shift,
'is_negative_input': self.is_negative,
'output_format': self.output_format
} }
def prepare_output(self):
input_data_type = 'int8' if self.is_negative else 'uint8'
output = np.rint(self.input * self.scale + self.shift).astype(
input_data_type)
self.outputs = {'Output': output}
def test_check_output(self): def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode # TODO(wangzhongpu): support mkldnn op in dygraph mode
self.check_output(check_dygraph=False) self.check_output(check_dygraph=False)
def check_raise_error(self, msg):
try:
self.check_output()
except Exception as e:
if msg in str(e):
raise AttributeError
else:
print(e)
def set_scale(self): def set_scale(self):
pass pass
def set_shift(self):
pass
def set_is_negative(self): def set_is_negative(self):
pass pass
def set_input_size(self):
pass
def set_output_format(self):
pass
class TestQuantizeOp1(TestQuantizeOp): class TestQuantizeOp1(TestQuantizeOp):
def set_scale(self): def set_scale(self):
self.scale = 1.5 self.scale = 127.0
def set_is_negative(self): def set_is_negative(self):
self.is_nagative = True self.is_nagative = True
...@@ -67,11 +98,137 @@ class TestQuantizeOp1(TestQuantizeOp): ...@@ -67,11 +98,137 @@ class TestQuantizeOp1(TestQuantizeOp):
class TestQuantizeOp2(TestQuantizeOp): class TestQuantizeOp2(TestQuantizeOp):
def set_scale(self): def set_scale(self):
self.scale = 0.1 self.scale = 255.0
def set_is_negative(self):
self.is_nagative = False
class TestQuantizeOp_ZeroScale(TestQuantizeOp):
def set_scale(self):
self.scale = 0.0
def prepare_output(self):
self.output = np.zeros(self.input_size)
self.outputs = {'Output': self.output}
def test_check_output(self):
self.assertRaises(AttributeError, self.check_raise_error,
'Quantization scale cannot be 0.0')
# 2-dim input
# P - positive input
class TestQuantizeOpShift_NCHW_2_P(TestQuantizeOp):
def set_output_format(self):
self.output_format = 'NCHW'
def set_is_negative(self):
self.is_nagative = False
def set_scale(self):
self.scale = 255.0
def set_shift(self):
self.shift = 0.0
def set_input_size(self):
self.input_size = [2, 3]
# 2-dim input
# N - negative input
class TestQuantizeOpShift_NCHW_2_N(TestQuantizeOpShift_NCHW_2_P):
def set_is_negative(self):
self.is_nagative = True
def set_scale(self):
self.scale = 127.0
def set_shift(self):
self.shift = 128.0
class TestQuantizeOpShift_NHWC_2_P(TestQuantizeOpShift_NCHW_2_P):
def set_output_format(self):
self.output_format = 'NHWC'
class TestQuantizeOpShift_NHWC_2_N(TestQuantizeOpShift_NCHW_2_N):
def set_output_format(self):
self.output_format = 'NHWC'
# 3-dim input
class TestQuantizeOpShift_NCHW_3_P(TestQuantizeOpShift_NCHW_2_P):
def set_input_size(self):
self.input_size = [2, 3, 4]
class TestQuantizeOpShift_NCHW_3_N(TestQuantizeOpShift_NCHW_2_N):
def set_input_size(self):
self.input_size = [2, 3, 4]
class TestQuantizeOpShift_NHWC_3_P(TestQuantizeOpShift_NCHW_3_P):
def set_output_format(self):
self.output_format = 'NHWC'
class TestQuantizeOpShift_NHWC_3_N(TestQuantizeOpShift_NCHW_3_N):
def set_output_format(self):
self.output_format = 'NHWC'
# 4-dim input
class TestQuantizeOpShift_NCHW_4_P(TestQuantizeOpShift_NCHW_2_P):
def set_input_size(self):
self.input_size = [2, 3, 4, 5]
class TestQuantizeOpShift_NCHW_4_N(TestQuantizeOpShift_NCHW_2_N):
def set_input_size(self):
self.input_size = [2, 3, 4, 5]
class TestQuantizeOpShift_NHWC_4_P(TestQuantizeOpShift_NCHW_4_P):
def set_output_format(self):
self.output_format = 'NHWC'
class TestQuantizeOpShift_NHWC_4_N(TestQuantizeOpShift_NCHW_4_N):
def set_output_format(self):
self.output_format = 'NHWC'
class TestQuantizeOp_NegativeShift(TestQuantizeOp):
def set_is_negative(self): def set_is_negative(self):
self.is_nagative = False self.is_nagative = False
def set_scale(self):
self.scale = 100.0
def set_shift(self):
self.shift = -10.0
def prepare_output(self):
self.output = np.zeros(self.input_size)
self.outputs = {'Output': self.output}
def test_check_output(self):
self.assertRaises(AttributeError, self.check_raise_error,
'Quantization shift must be nonnegative.')
class TestQuantizeOp_TooBigShift(TestQuantizeOp_NegativeShift):
def set_shift(self):
self.shift = 300.0
def test_check_output(self):
self.assertRaises(
AttributeError, self.check_raise_error,
'Quantization shift must be less than or equal to 255.')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -25,88 +25,271 @@ from mkldnn_op_test import format_reorder ...@@ -25,88 +25,271 @@ from mkldnn_op_test import format_reorder
class TestReQuantizeOp(OpTest): class TestReQuantizeOp(OpTest):
def setUp(self): def setUp(self):
self.op_type = 'requantize' self.op_type = 'requantize'
self.scale_in = 2.0 self.scale_in = 127.0
self.scale_out = 1.5 self.shift_in = 0.0
self.scale_out = 100.0
self.shift_out = 0.0
self.input_size = [1, 1, 10, 10] self.input_size = [1, 1, 10, 10]
self.data_type = 'int8' self.input_data_type = 'int8'
self.set_scale() self.set_scales()
self.set_data_type() self.set_shifts()
self.prepare_inputs() self.set_input_data_type()
self.prepare_input()
def prepare_inputs(self): self.prepare_output()
scale_shift = self.scale_out / self.scale_in
def prepare_input(self):
if self.data_type == 'int8': if self.input_data_type == 'int8':
self.input = (np.random.randint(0, 100, self.input_size) - 50 # input data values are integers from interval [-128, 128)
).astype(self.data_type) self.input = (np.random.randint(0, 256, self.input_size) - 128
output_tmp = np.round(self.input.astype('float32') * ).astype(self.input_data_type)
scale_shift).astype('int8')
else: else:
# input data values are integers from interval [0, 256)
self.input = (np.random.randint( self.input = (np.random.randint(
0, 100, self.input_size)).astype(self.data_type) 0, 256, self.input_size)).astype(self.input_data_type)
output_tmp = np.round(self.input.astype('float32') *
scale_shift).astype('uint8')
self.output = format_reorder(output_tmp, self.input_size)
self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)} self.inputs = {'Input': OpTest.np_dtype_to_fluid_dtype(self.input)}
self.attrs = {
'Scale_in': self.scale_in,
'Scale_out': self.scale_out,
'Shift_in': self.shift_in,
'Shift_out': self.shift_out
}
self.outputs = {'Output': self.output} def prepare_output(self):
scale_ratio = self.scale_out / self.scale_in
with_shift = (self.shift_in != 0.0 or self.shift_out != 0.0)
if with_shift or self.input_data_type == 'uint8':
dst_type = 'uint8'
type_min = 0
type_max = 255
new_shift = np.clip(
np.rint(self.shift_out - scale_ratio * self.shift_in), type_min,
type_max)
else:
dst_type = 'int8'
type_min = -128
type_max = 127
new_shift = 0
self.attrs = {'Scale_in': self.scale_in, 'Scale_out': self.scale_out} output_tmp = np.clip(
np.rint(self.input.astype('float32') * scale_ratio + new_shift),
type_min, type_max).astype(dst_type)
self.output = format_reorder(output_tmp, self.input_size)
self.outputs = {'Output': self.output}
def test_check_output(self): def test_check_output(self):
# TODO(wangzhongpu): support mkldnn op in dygraph mode # TODO(wangzhongpu): support mkldnn op in dygraph mode
self.assertTrue(self.input_data_type == 'uint8' or self.shift_in == 0.0,
'Input data must be unsigned if it has nonzero shift.')
self.check_output(check_dygraph=False) self.check_output(check_dygraph=False)
def set_scale(self): def check_raise_error(self, msg):
try:
self.check_output()
except Exception as e:
if msg in str(e):
raise AttributeError
else:
print(e)
def set_scales(self):
pass pass
def set_data_type(OpTest): def set_shifts(self):
pass pass
def set_input_data_type(OpTest):
pass
# ---------------test requantize with s8 input, no shift--------------------
#--------------------test requantize with s8 input-------------------- class TestReQuantizeOp_S8_SameScales(TestReQuantizeOp):
def set_scales(self):
self.scale_in = 127.0
self.scale_out = 127.0
class TestReQuantizeOp1(TestReQuantizeOp): class TestReQuantizeOp_S8_DifferentScales_1(TestReQuantizeOp):
def set_scale(self): def set_scales(self):
self.scale_in = 1.5 self.scale_in = 127.0
self.scale_out = 1.5 self.scale_out = 100.0
class TestReQuantizeOp2(TestReQuantizeOp): class TestReQuantizeOp_S8_DifferentScales_2(TestReQuantizeOp):
def set_scale(self): def set_scales(self):
self.scale_in = 0.1 self.scale_in = 100.0
self.scale_out = 0.2 self.scale_out = 127.0
#--------------------test requantize with u8 input-------------------- class TestReQuantizeOp_S8_ZeroInputScale(TestReQuantizeOp):
def set_scales(self):
self.scale_in = 0.0
self.scale_out = 127.0
def prepare_output(self):
self.output = np.zeros(self.input_size)
self.outputs = {'Output': self.output}
def test_check_output(self):
self.assertRaises(AttributeError, self.check_raise_error,
'Scale of input cannot be 0.0')
class TestReQuantizeOp3(TestReQuantizeOp1):
def set_data_type(self):
self.data_type = 'uint8'
class TestReQuantizeOp_S8_ZeroOutputScale(TestReQuantizeOp):
def set_scales(self):
self.scale_in = 127.0
self.scale_out = 0.0
class TestReQuantizeOp4(TestReQuantizeOp2): def prepare_output(self):
def set_data_type(self): self.output = np.zeros(self.input_size)
self.data_type = 'uint8' self.outputs = {'Output': self.output}
def test_check_output(self):
self.assertRaises(AttributeError, self.check_raise_error,
'Scale of output cannot be 0.0')
# ---------------test requantize with u8 input, no shift--------------------
class TestReQuantizeOp_U8_SameScales(TestReQuantizeOp_S8_SameScales):
def set_input_data_type(self):
self.input_data_type = 'uint8'
class TestReQuantizeOp_U8_DifferentScales_1(
TestReQuantizeOp_S8_DifferentScales_1):
def set_input_data_type(self):
self.input_data_type = 'uint8'
class TestReQuantizeOp_U8_DifferentScales_2(
TestReQuantizeOp_S8_DifferentScales_2):
def set_input_data_type(self):
self.input_data_type = 'uint8'
# ---------------test requantize with s8 input, with shift------------------
class TestReQuantizeOp_S8_WithShift(TestReQuantizeOp):
def set_scales(self):
self.scale_in = 60.0
self.scale_out = 127.0
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 128.0
def test_check_output(self):
self.assertRaises(
AttributeError, self.check_raise_error,
'Requantize does not support nonzero shift for signed input.')
#-------------------test reused requantize op---------------------------
class TestReQuantizeOp_S8_WithOutputShift(TestReQuantizeOp):
def set_scales(self):
self.scale_in = 127.0
self.scale_out = 60.0
def set_shifts(self):
self.shift_in = 0.0
self.shift_out = 120.0
# ---------------test requantize with u8 input, with shift------------------
class TestReQuantizeOp_U8_SameScales_SameShift(TestReQuantizeOp_U8_SameScales):
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 128.0
class TestReQuantizeOp_U8_SameScales_DifferentShift_1(
TestReQuantizeOp_U8_SameScales):
def set_shifts(self):
self.shift_in = 60.0
self.shift_out = 128.0
class TestReQuantizeOp_U8_SameScales_DifferentShift_2(
TestReQuantizeOp_U8_SameScales):
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 60.0
class TestReQuantizeOp_U8_DifferentScales_1_SameShift(
TestReQuantizeOp_U8_DifferentScales_1):
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 128.0
class TestReQuantizeOp_U8_DifferentScales_2_SameShift(
TestReQuantizeOp_U8_DifferentScales_2):
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 128.0
class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_1(
TestReQuantizeOp_U8_DifferentScales_1):
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 60.0
class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_1(
TestReQuantizeOp_U8_DifferentScales_2):
def set_shifts(self):
self.shift_in = 128.0
self.shift_out = 60.0
class TestReQuantizeOp_U8_DifferentScales_1_DifferentShift_2(
TestReQuantizeOp_U8_DifferentScales_1):
def set_shifts(self):
self.shift_in = 60.0
self.shift_out = 128.0
class TestReQuantizeOp_U8_DifferentScales_2_DifferentShift_2(
TestReQuantizeOp_U8_DifferentScales_2):
def set_shifts(self):
self.shift_in = 60.0
self.shift_out = 128.0
# ---------------test reused requantize op, no shift------------------------
class TestReQuantizeOpReused(TestReQuantizeOp): class TestReQuantizeOpReused(TestReQuantizeOp):
def setUp(self): def setUp(self):
self.input_size = [1, 1, 10, 10] # self.input_size = [1, 1, 10, 10]
self.data_type = 'int8' self.input_size = [1, 1, 2, 2]
self.set_scale() self.input_data_type = 'int8'
self.prepare_inputs() self.set_scales()
self.set_shifts()
def set_scale(self): self.set_input_data_type()
self.scale_in = 0.1 self.prepare_input()
self.scale_out = 0.2 self.prepare_output()
def set_scales(self):
self.scale_in = 100.0
self.scale_out = 120.0
def set_shifts(self):
self.shift_in = 0.0
self.shift_out = 0.0
def set_input_data_type(self):
pass
def test_check_output(self): def test_check_output(self):
variables = { variables = {
...@@ -119,12 +302,16 @@ class TestReQuantizeOpReused(TestReQuantizeOp): ...@@ -119,12 +302,16 @@ class TestReQuantizeOpReused(TestReQuantizeOp):
for name in variables: for name in variables:
block.create_var( block.create_var(
name=name, dtype="int8", shape=variables[name].shape) name=name, dtype="int8", shape=variables[name].shape)
requant_op = block.append_op( block.append_op(
type="requantize", type="requantize",
inputs={'Input': block.var('input'), }, inputs={'Input': block.var('input'), },
outputs={"Output": block.var('output')}, outputs={"Output": block.var('output')},
attrs={'Scale_in': self.scale_in, attrs={
'Scale_out': self.scale_out}) 'Scale_in': self.scale_in,
'Scale_out': self.scale_out,
'Shift_in': self.shift_in,
'Shift_out': self.shift_out
})
place = core.CPUPlace() place = core.CPUPlace()
exe = fluid.Executor(place) exe = fluid.Executor(place)
for i in range(2): for i in range(2):
...@@ -137,5 +324,17 @@ class TestReQuantizeOpReused(TestReQuantizeOp): ...@@ -137,5 +324,17 @@ class TestReQuantizeOpReused(TestReQuantizeOp):
variables['output'], out[0], atol=1e-4), 'output') variables['output'], out[0], atol=1e-4), 'output')
# ---------------test reused requantize op, no shift------------------------
class TestReQuantizeOpReused_WithShift(TestReQuantizeOpReused):
def set_input_data_type(self):
self.input_data_type = 'uint8'
def set_shifts(self):
self.shift_in = 128
self.shift_out = 60
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册