diff --git a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc index ecee094de346e6ae231ef31ddeec16b565648c0f..393247644c2e88aa9295d24933bc0048e116e31a 100644 --- a/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/activation_mkldnn_op.cc @@ -44,14 +44,6 @@ class MKLDNNActivationKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *x = ctx.Input("X"); - PADDLE_ENFORCE_EQ( - x->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument("Wrong layout set for X tensor")); - PADDLE_ENFORCE_NE( - x->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument("Wrong format set for X tensor")); - Functor functor; functor(ctx); } @@ -62,14 +54,6 @@ class MKLDNNActivationGradKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext &ctx) const override { - const auto *diff_y = ctx.Input(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ(diff_y->layout(), DataLayout::kMKLDNN, - platform::errors::InvalidArgument( - "Wrong layout set for Input OutGrad tensor")); - PADDLE_ENFORCE_NE(diff_y->format(), MKLDNNMemoryFormat::undef, - platform::errors::InvalidArgument( - "Wrong format set for Input OutGrad tensor")); - Functor functor; functor(ctx); } diff --git a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc index b10572edf6f273d56f82f106c4e5120db2b26ab7..747e4603d7fe7774b039d60200d5db7cdf0952d6 100644 --- a/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/dequantize_mkldnn_op.cc @@ -36,100 +36,58 @@ template class DeQuantOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto scale_data = ctx.Attr("Scale"); - auto scale_shift = ctx.Attr("Shift"); - bool with_shift = scale_shift != 0.0f; - auto* output = ctx.Output("Output"); - - PADDLE_ENFORCE_NE(scale_data, 0.0f, - platform::errors::InvalidArgument( - "Dequantization scale cannot be 0.0")); - PADDLE_ENFORCE_GE(scale_shift, 0, - platform::errors::Unimplemented( - "Dequantization shift must be nonnegative.")); - PADDLE_ENFORCE_LE( - scale_shift, 255, - platform::errors::Unimplemented( - "Dequantization shift must be less than or equal to 255.")); + auto* x = ctx.Input("Input"); + const auto quantization_scale = ctx.Attr("Scale"); + const auto quantization_shift = ctx.Attr("Shift"); + const bool with_shift = quantization_shift != 0.0f; + auto* out = ctx.Output("Output"); + + PADDLE_ENFORCE(quantization_scale != 0.0f, + platform::errors::InvalidArgument( + "Dequantization scale must be different than 0.0f")); + + PADDLE_ENFORCE( + quantization_shift <= 255 && quantization_shift >= 0, + platform::errors::InvalidArgument( + "Dequantization shift must be lower or equal to ", + "255 and greater or equal to 0, but got %f", quantization_shift)); auto& dev_ctx = ctx.template device_context(); - const auto& engine = dev_ctx.GetEngine(); - - const T* input_data = input->data(); - float* output_data = output->mutable_data(ctx.GetPlace()); - - float reorder_shift = -scale_shift / scale_data; - - auto src_tz = phi::vectorize(input->dims()); - auto dst_tz = phi::vectorize(output->dims()); - dnnl::memory::data_type src_dt = paddle::framework::ToMKLDNNDataType( - framework::TransToProtoVarType(input->dtype())); - MKLDNNMemoryFormat src_fmt = input->format(); - - std::string key = - platform::CreateKey(dev_ctx, src_dt, src_tz, ctx.OutputName("Output")); - key = platform::ExtendKeyWithThreadInfoIfNeeded(dev_ctx, key); - - const std::string key_prim = key + "@r"; - const std::string key_src_mem = key + "@s"; - const std::string key_dst_mem = key + "@d"; - - std::shared_ptr src_memory; - std::shared_ptr dst_memory; - std::shared_ptr reorder_p; - reorder_p = std::static_pointer_cast(dev_ctx.GetBlob(key_prim)); - - if (reorder_p == nullptr) { - dnnl::primitive_attr attri; - int mask = 0; - float reorder_scale = 1. / scale_data; - attri.set_output_scales(mask, {reorder_scale}); - - if (with_shift) { - dnnl::post_ops post_operations; - post_operations.append_sum(); - attri.set_post_ops(post_operations); - std::fill(output_data, output_data + output->numel(), reorder_shift); - } - - auto src_md = platform::MKLDNNMemDesc({src_tz}, src_dt, src_fmt); - src_memory = std::make_shared(src_md, engine, - to_void_cast(input_data)); - - auto dst_md = - platform::MKLDNNMemDesc({dst_tz}, memory::data_type::f32, - platform::MKLDNNFormatForSize( - dst_tz.size(), MKLDNNMemoryFormat::nchw)); - - dst_memory = std::make_shared( - dst_md, engine, to_void_cast(output_data)); - - auto reorder_pd = std::shared_ptr( - new reorder::primitive_desc(*src_memory, *dst_memory, attri)); - reorder_p = std::shared_ptr(new reorder(*reorder_pd)); - dev_ctx.SetBlob(key_prim, reorder_p); - dev_ctx.SetBlob(key_src_mem, src_memory); - dev_ctx.SetBlob(key_dst_mem, dst_memory); - } else { - src_memory = - std::static_pointer_cast(dev_ctx.GetBlob(key_src_mem)); - src_memory->set_data_handle(to_void_cast(input_data)); - - dst_memory = - std::static_pointer_cast(dev_ctx.GetBlob(key_dst_mem)); - if (with_shift) - std::fill(output_data, output_data + output->numel(), reorder_shift); - dst_memory->set_data_handle(output->mutable_data(ctx.GetPlace())); + + auto x_tz = phi::vectorize(x->dims()); + auto x_paddle_dtype = framework::TransToProtoVarType(x->dtype()); + auto out_paddle_dtype = framework::TransToProtoVarType(out->dtype()); + + dnnl::primitive_attr attrs; + static constexpr int32_t mask = 0; // same shift and scale for whole tensor + + const float reorder_scale = 1. / quantization_scale; + attrs.set_output_scales(mask, {reorder_scale}); + + if (with_shift) { + attrs.set_zero_points(DNNL_ARG_SRC, mask, + {static_cast(quantization_shift)}); } + platform::ReorderMKLDNNHandler reorder_handler( + x_tz, x_paddle_dtype, framework::ToMKLDNNDataType(x_paddle_dtype), + out_paddle_dtype, framework::ToMKLDNNDataType(out_paddle_dtype), + dev_ctx.GetEngine()); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + x->mem_desc(), platform::to_void_cast(x->data())); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( + out, x->mem_desc(), dev_ctx.GetPlace()); + + auto reorder_p = reorder_handler.AcquireReorder( + reorder_dst_memory_p, reorder_src_memory_p, attrs); + auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - reorder_p->execute(astream, *src_memory, *dst_memory); + reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetMKLDNNFormat(*dst_memory)); + out->set_mem_desc(reorder_dst_memory_p->get_desc()); } }; diff --git a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc index 4cae3f0c73711556ac9aeaf77a9876c89647db2f..8cbe46bee481abd896683d82107efa273127dedc 100644 --- a/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/quantize_mkldnn_op.cc @@ -13,6 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "dnnl.hpp" +#include "paddle/fluid/framework/data_layout_transform.h" #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/operators/quantize_op.h" #include "paddle/fluid/platform/mkldnn_helper.h" @@ -34,83 +35,73 @@ template class QuantOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto* input = ctx.Input("Input"); - auto scale_data = ctx.Attr("Scale"); - auto scale_shift = ctx.Attr("Shift"); - bool with_shift = scale_shift != 0.0f; - auto* output = ctx.Output("Output"); - - PADDLE_ENFORCE_NE( - scale_data, 0.0f, - platform::errors::InvalidArgument("Quantization scale cannot be 0.0")); - PADDLE_ENFORCE_GE(scale_shift, 0, - platform::errors::Unimplemented( - "Quantization shift must be nonnegative.")); - PADDLE_ENFORCE_LE( - scale_shift, 255, - platform::errors::Unimplemented( - "Quantization shift must be less than or equal to 255.")); + auto* x = ctx.Input("Input"); + auto* out = ctx.Output("Output"); + + const auto quantization_scale = ctx.Attr("Scale"); + const auto quantization_shift = ctx.Attr("Shift"); + const bool with_scale = quantization_scale != 1.0f; + const bool with_shift = quantization_shift != 0.0f; + + PADDLE_ENFORCE_NE(quantization_scale, 0.0f, + platform::errors::InvalidArgument( + "Quantization scale must be different than 0.0f")); + PADDLE_ENFORCE( + quantization_shift <= 255 && quantization_shift >= 0, + platform::errors::InvalidArgument( + "Quantization shift must be lower or equal to ", + "255 and greater or equal to 0, but got %f", quantization_shift)); auto& dev_ctx = ctx.template device_context(); - const auto& engine = dev_ctx.GetEngine(); - std::vector pipeline; - auto src_tz = phi::vectorize(input->dims()); - auto dst_tz = phi::vectorize(output->dims()); + auto x_tz = phi::vectorize(x->dims()); - const T* input_data = input->data(); + const bool is_negative_input = ctx.Attr("is_negative_input"); + const bool bfloat16 = ctx.Attr("bfloat16"); - bool is_negative_input = ctx.Attr("is_negative_input"); - bool bfloat16 = ctx.Attr("bfloat16"); + dnnl::primitive_attr attrs; + static constexpr int32_t mask = 0; - // TODO(jczaja): Refactor with Acquire API - std::shared_ptr src_memory; - std::shared_ptr dst_memory; - std::shared_ptr reorder_p; - - std::string out_layout = ctx.Attr("output_format"); - MKLDNNMemoryFormat out_format = - platform::data_format_to_memory_format(out_layout); - dnnl::primitive_attr attri; - int mask = 0; - attri.set_output_scales(mask, {scale_data}); + if (with_scale) { + attrs.set_output_scales(mask, {quantization_scale}); + } if (with_shift) { - dnnl::post_ops post_operations; - post_operations.append_sum(); - attri.set_post_ops(post_operations); - uint8_t* output_data = output->mutable_data(ctx.GetPlace()); - // memset casts scale_shift to unsigned char (uint8_t) internally - std::memset(output_data, scale_shift, output->numel()); + attrs.set_zero_points(DNNL_ARG_DST, mask, + {static_cast(quantization_shift)}); } - auto src_md = platform::MKLDNNMemDesc({src_tz}, memory::data_type::f32, - input->format()); - src_memory = std::make_shared(src_md, engine, - to_void_cast(input_data)); + framework::proto::VarType::Type x_paddle_dtype = + framework::TransToProtoVarType(x->dtype()); + framework::proto::VarType::Type out_paddle_dtype; - std::shared_ptr dst_md; if (bfloat16) { - platform::SetDstMemoryQuantized( - ctx, output, dst_tz, engine, dst_md, dst_memory, out_format); + out_paddle_dtype = framework::proto::VarType::BF16; } else if (is_negative_input && !with_shift) { - platform::SetDstMemoryQuantized(ctx, output, dst_tz, engine, - dst_md, dst_memory, out_format); + out_paddle_dtype = framework::proto::VarType::INT8; } else { - platform::SetDstMemoryQuantized(ctx, output, dst_tz, engine, - dst_md, dst_memory, out_format); + out_paddle_dtype = framework::proto::VarType::UINT8; } - auto reorder_pd = std::shared_ptr( - new reorder::primitive_desc(*src_memory, *dst_memory, attri)); - reorder_p = std::shared_ptr(new reorder(*reorder_pd)); + + platform::ReorderMKLDNNHandler reorder_handler( + x_tz, x_paddle_dtype, framework::ToMKLDNNDataType(x_paddle_dtype), + out_paddle_dtype, framework::ToMKLDNNDataType(out_paddle_dtype), + dev_ctx.GetEngine()); + + auto reorder_src_memory_p = reorder_handler.AcquireSrcMemory( + x->mem_desc(), platform::to_void_cast(x->data())); + auto reorder_dst_memory_p = reorder_handler.AcquireDstMemory( + out, x->mem_desc(), dev_ctx.GetPlace()); + + auto reorder_p = reorder_handler.AcquireReorder( + reorder_dst_memory_p, reorder_src_memory_p, attrs); auto& astream = platform::MKLDNNDeviceContext::tls().get_stream(); - reorder_p->execute(astream, *src_memory, *dst_memory); + reorder_p->execute(astream, *reorder_src_memory_p, *reorder_dst_memory_p); astream.wait(); - output->set_layout(DataLayout::kMKLDNN); - output->set_format(GetMKLDNNFormat(*dst_memory)); + out->set_mem_desc(reorder_dst_memory_p->get_desc()); } }; } // namespace operators diff --git a/paddle/fluid/platform/mkldnn_reuse.h b/paddle/fluid/platform/mkldnn_reuse.h index 12fa933701ef461bbd5dfb1a1d7d91437ed475c4..13b5005a30fa05aef99dd213a3c003cf7d4517be 100644 --- a/paddle/fluid/platform/mkldnn_reuse.h +++ b/paddle/fluid/platform/mkldnn_reuse.h @@ -1057,6 +1057,14 @@ class ReorderMKLDNNHandler { return std::make_shared(*(src_memory_p), *(dst_memory_p)); } + std::shared_ptr AcquireReorder( + std::shared_ptr dst_memory_p, + std::shared_ptr src_memory_p, + const dnnl::primitive_attr& attrs) { + return std::make_shared(*(src_memory_p), *(dst_memory_p), + attrs); + } + private: std::vector dims_; framework::proto::VarType::Type vtype_, vtype_dst_; diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py index a0836c959c84b99be981987ddfa5dd745ed29221..fae52ab833b9d4c71f5a4be3622156c229e92ee6 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_dequantize_mkldnn_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 +import paddle class TestDeQuantizeOp(OpTest): @@ -110,19 +111,6 @@ class TestDeQuantizeOpBf16(TestDeQuantizeOp): self.data_type = 'uint16' -class TestDeQuantizeOp_ZeroScale(TestDeQuantizeOp): - def set_scale(self): - self.scale = 0.0 - - def prepare_output_int8(self): - self.output = np.zeros(self.input_size) - self.outputs = {'Output': self.output} - - def test_check_output(self): - self.assertRaises(AttributeError, self.check_raise_error, - 'Dequantization scale cannot be 0.0') - - # 2-dim input # P - positive input, with shift class TestDeQuantizeOpShift_2_P(TestDeQuantizeOp): @@ -177,28 +165,6 @@ class TestDeQuantizeOpShift_4_N(TestDeQuantizeOpShift_2_N): self.input_size = [2, 3, 4, 5] -class TestDeQuantizeOp_NegativeShift(TestDeQuantizeOp): - def set_shift(self): - self.shift = -10.0 - - def prepare_output_int8(self): - self.output = np.zeros(self.input_size) - self.outputs = {'Output': self.output} - - def test_check_output(self): - self.assertRaises(AttributeError, self.check_raise_error, - 'Dequantization shift must be nonnegative.') - - -class TestDeQuantizeOp_TooBigShift(TestDeQuantizeOp_NegativeShift): - def set_shift(self): - self.shift = 300.0 - - def test_check_output(self): - self.assertRaises( - AttributeError, self.check_raise_error, - 'Dequantization shift must be less than or equal to 255.') - - if __name__ == '__main__': + paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py index a7acc5f3f9bf327146df804949e5428c999edd12..c92d870565fbc93ee25337e2eee78e48b9b01e40 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_quantize_mkldnn_op.py @@ -17,6 +17,7 @@ from __future__ import print_function import unittest import numpy as np from paddle.fluid.tests.unittests.op_test import OpTest +import paddle class TestQuantizeOp(OpTest): @@ -104,19 +105,6 @@ class TestQuantizeOp2(TestQuantizeOp): self.is_nagative = False -class TestQuantizeOp_ZeroScale(TestQuantizeOp): - def set_scale(self): - self.scale = 0.0 - - def prepare_output(self): - self.output = np.zeros(self.input_size) - self.outputs = {'Output': self.output} - - def test_check_output(self): - self.assertRaises(AttributeError, self.check_raise_error, - 'Quantization scale cannot be 0.0') - - # 2-dim input # P - positive input class TestQuantizeOpShift_NCHW_2_P(TestQuantizeOp): @@ -201,34 +189,6 @@ class TestQuantizeOpShift_NHWC_4_N(TestQuantizeOpShift_NCHW_4_N): self.output_format = 'NHWC' -class TestQuantizeOp_NegativeShift(TestQuantizeOp): - def set_is_negative(self): - self.is_nagative = False - - def set_scale(self): - self.scale = 100.0 - - def set_shift(self): - self.shift = -10.0 - - def prepare_output(self): - self.output = np.zeros(self.input_size) - self.outputs = {'Output': self.output} - - def test_check_output(self): - self.assertRaises(AttributeError, self.check_raise_error, - 'Quantization shift must be nonnegative.') - - -class TestQuantizeOp_TooBigShift(TestQuantizeOp_NegativeShift): - def set_shift(self): - self.shift = 300.0 - - def test_check_output(self): - self.assertRaises( - AttributeError, self.check_raise_error, - 'Quantization shift must be less than or equal to 255.') - - if __name__ == '__main__': + paddle.enable_static() unittest.main()