From d9262145350a81256a17e43451677460c7b0a4c6 Mon Sep 17 00:00:00 2001 From: lidanqing Date: Tue, 18 Feb 2020 06:30:49 +0100 Subject: [PATCH] [UT coverage] improve the mul_mkldnn_op line coverage (#22408) * improve the mul_mkldnn_op line coverage test=develop * remove fp32 mul mkldnn kernel test=develop * locally refactoring test=develop * change according to reviews test=develop --- .../fluid/operators/mkldnn/mul_mkldnn_op.cc | 305 +++++++----------- .../mkldnn/test_mul_int8_mkldnn_op.py | 6 +- 2 files changed, 122 insertions(+), 189 deletions(-) diff --git a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc index 96f4fe9e6d0..862d6508c51 100644 --- a/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/mul_mkldnn_op.cc @@ -40,17 +40,24 @@ class MulPrimitiveFactory { explicit MulPrimitiveFactory(const mkldnn::engine &engine) : engine_(engine) {} - virtual ~MulPrimitiveFactory() {} - - virtual inner_product_forward CreateMulPrimitive( - const Tensor *input_x, const Tensor *input_y, Tensor *output, - const ExecutionContext &ctx) { - /* check format and reorder if need */ + inner_product_forward CreateMulPrimitive(const Tensor *x_input, + const Tensor *y_input, + Tensor *output, + const ExecutionContext &ctx) { + /* check data format and reorder if need */ int x_num_col_dims = ctx.Attr("x_num_col_dims"); int y_num_col_dims = ctx.Attr("y_num_col_dims"); - auto x_matrix = UpdateDataFormat(input_x, x_num_col_dims, ctx); - auto y_matrix = UpdateDataFormat(input_y, y_num_col_dims, ctx); + // TODO(intel-minghui) : Remove the restriction that only supports Input(Y) + // as weights + PADDLE_ENFORCE_EQ( + (std::is_same::value), true, + platform::errors::InvalidArgument( + "Input(Y) must be fp32 data type since only fp32 data type is " + "supported in the current design of MKLDNN INT8.")); + + auto x_matrix = UpdateDataFormat(x_input, x_num_col_dims, ctx); + auto y_matrix = UpdateDataFormat(y_input, y_num_col_dims, ctx); auto output_dim = output->dims(); if (output_dim.size() != 2) { @@ -60,17 +67,110 @@ class MulPrimitiveFactory { if (mul_) { UpdateDataPointers(ctx, output, &x_matrix); Execute(); - return *mul_; + return *(mul_); } auto src_desc = CreateMemDescriptor(&x_matrix, MKLDNNMemoryFormat::nc); x_input_ = CreateMemory(src_desc, &x_matrix); - y_input_ = TransposeInputY(&y_matrix); + + if (is_int8_) { + const auto trans_y = TransposeInputY(&y_matrix); + auto scale_y = ctx.Attr>("scale_y"); + y_input_ = QuantInputY(trans_y, scale_y); + } else { + y_input_ = TransposeInputY(&y_matrix); + } + auto dst_desc = CreateMemDescriptor(output, MKLDNNMemoryFormat::any); mul_ = CreateMulPrimitive(*x_input_, *y_input_, dst_desc, output, ctx); Execute(); - return *mul_; + return *(mul_); + } + + private: + memory ReorderWithScale(const memory::desc &src_desc, + const memory::desc &dst_desc, void *src_data, + const std::vector &scale) { + auto mask = scale.size() > 1 ? 1 : 0; + mkldnn::primitive_attr attr; + attr.set_output_scales(mask, scale); + + auto src_mem = memory(src_desc, engine_, src_data); + auto dst_mem = memory(dst_desc, engine_); + + auto reorder_pd = mkldnn::reorder::primitive_desc(src_mem, dst_mem, attr); + + auto reorder = mkldnn::reorder(reorder_pd); + + mkldnn::stream astream(engine_); + reorder.execute(astream, src_mem, dst_mem); + astream.wait(); + + return dst_mem; + } + + memory QuantInputY(memory input_y, const std::vector &scale_y) { + const auto &dims = input_y.get_desc().data.dims; + auto ndims = input_y.get_desc().data.ndims; + auto y_dims = std::vector(dims, dims + ndims); + + auto user_y_desc = CreateMemDescriptor(y_dims, MKLDNNMemoryFormat::oi); + auto y_desc = CreateMemDescriptor(y_dims, MKLDNNMemoryFormat::oi); + + return ReorderWithScale(user_y_desc, y_desc, input_y.get_data_handle(), + scale_y); + } + + mkldnn::primitive_attr CreateMulAttr(const ExecutionContext &ctx, + bool force_fp32_output) { + mkldnn::primitive_attr mul_attr; + + auto scale_y_data = ctx.Attr>("scale_y"); + auto scale_x_data = ctx.Attr("scale_x"); + auto scale_out_data = + force_fp32_output ? 1.0f : ctx.Attr("scale_out"); + + bool is_multi_channel = scale_y_data.size() > 1; + int count = is_multi_channel ? scale_y_data.size() : 1; + std::vector output_shift_scale(count); + for (int i = 0; i < count; i++) { + if (scale_y_data[i] == 0.0) + output_shift_scale[i] = scale_out_data; + else + output_shift_scale[i] = + scale_out_data / (scale_x_data * scale_y_data[i]); + } + int mul_mask = is_multi_channel ? 1 : 0; + mul_attr.set_output_scales(mul_mask, output_shift_scale); + + return mul_attr; + } + + inner_product_forward CreateMulPrimitive(const memory &x_memory, + const memory &y_memory, + const memory::desc &dst_desc, + Tensor *output, + const ExecutionContext &ctx) { + const auto x_desc = x_memory.get_desc(); + const auto y_desc = y_memory.get_desc(); + inner_product_forward::primitive_desc mul_prim_desc; + + const auto &mul_desc = inner_product_forward::desc( + prop_kind::forward, x_desc, y_desc, dst_desc); + + if (is_int8_) { + bool force_fp32_output = ctx.Attr("force_fp32_output"); + auto mul_attr = CreateMulAttr(ctx, force_fp32_output); + mul_prim_desc = + inner_product_forward::primitive_desc(mul_desc, mul_attr, engine_); + } else { + mul_prim_desc = inner_product_forward::primitive_desc(mul_desc, engine_); + } + + output_ = CreateDstMemory(mul_prim_desc, ctx, output); + + return inner_product_forward(mul_prim_desc); } void Execute() { @@ -81,7 +181,6 @@ class MulPrimitiveFactory { astream.wait(); } - protected: template Tensor UpdateDataFormat(const Tensor *data, int num_col_dims, const ExecutionContext &ctx) { @@ -176,176 +275,13 @@ class MulPrimitiveFactory { return Reorder(src_desc, dst_desc, to_void_cast(input_y->data())); } - inner_product_forward CreateMulPrimitive(const memory &x_memory, - const memory &y_memory, - const memory::desc &dst_desc, - Tensor *output, - const ExecutionContext &ctx) { - const auto y_desc = y_memory.get_desc(); - const auto x_desc = x_memory.get_desc(); - - auto mul_prim_desc = CreateMulPrimDesc(x_desc, y_desc, dst_desc); - output_ = CreateDstMemory(mul_prim_desc, ctx, output); - - return inner_product_forward(mul_prim_desc); - } - - inner_product_forward::primitive_desc CreateMulPrimDesc( - const memory::desc &x_desc, const memory::desc &y_desc, - const memory::desc &dst_desc) { - auto mul_desc = inner_product_forward::desc(prop_kind::forward, x_desc, - y_desc, dst_desc); - - return inner_product_forward::primitive_desc(mul_desc, engine_); - } - - protected: const mkldnn::engine &engine_; boost::optional x_input_; boost::optional y_input_; boost::optional output_; boost::optional mul_; -}; // namespace operators - -template -class QuantMulPrimitiveFactory : public MulPrimitiveFactory { - public: - using MulPrimitiveFactory::MulPrimitiveFactory; - - virtual inner_product_forward CreateMulPrimitive( - const Tensor *x_input, const Tensor *y_input, Tensor *output, - const ExecutionContext &ctx) { - /* check data format and reorder if need */ - int x_num_col_dims = ctx.Attr("x_num_col_dims"); - int y_num_col_dims = ctx.Attr("y_num_col_dims"); - auto scale_y = ctx.Attr>("scale_y"); - - // TODO(intel-minghui) : Remove the restriction that only supports Input(Y) - // as weights - bool enforce = std::is_same::value; - PADDLE_ENFORCE( - enforce == true, - "Input(Y) supposed to be fp32 data type since only fp32 data type is " - "supported in the current design of MKLDNN INT8."); - - auto x_matrix = - this->template UpdateDataFormat(x_input, x_num_col_dims, ctx); - auto y_matrix = - this->template UpdateDataFormat(y_input, y_num_col_dims, ctx); - - auto output_dim = output->dims(); - if (output_dim.size() != 2) { - output->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); - } - - if (this->mul_) { - this->UpdateDataPointers(ctx, output, &x_matrix); - this->Execute(); - return *(this->mul_); - } - - auto src_desc = this->template CreateMemDescriptor( - &x_matrix, MKLDNNMemoryFormat::nc); - this->x_input_ = this->template CreateMemory(src_desc, &x_matrix); - - const auto trans_y = this->TransposeInputY(&y_matrix); - this->y_input_ = QuantInputY(trans_y, scale_y); - - auto dst_desc = - this->template CreateMemDescriptor(output, MKLDNNMemoryFormat::any); - - this->mul_ = CreateMulPrimitive(*(this->x_input_), *(this->y_input_), - dst_desc, output, ctx); - this->Execute(); - return *(this->mul_); - } - - memory ReorderWithScale(const memory::desc &src_desc, - const memory::desc &dst_desc, void *src_data, - const std::vector &scale) { - auto mask = scale.size() > 1 ? 1 : 0; - mkldnn::primitive_attr attr; - attr.set_output_scales(mask, scale); - - auto src_mem = memory(src_desc, this->engine_, src_data); - auto dst_mem = memory(dst_desc, this->engine_); - - auto reorder_pd = mkldnn::reorder::primitive_desc(src_mem, dst_mem, attr); - - auto reorder = mkldnn::reorder(reorder_pd); - - mkldnn::stream astream(this->engine_); - reorder.execute(astream, src_mem, dst_mem); - astream.wait(); - - return dst_mem; - } - - memory QuantInputY(memory input_y, const std::vector &scale_y) { - const auto &dims = input_y.get_desc().data.dims; - auto ndims = input_y.get_desc().data.ndims; - auto y_dims = std::vector(dims, dims + ndims); - - auto user_y_desc = - this->template CreateMemDescriptor(y_dims, MKLDNNMemoryFormat::oi); - auto y_desc = this->template CreateMemDescriptor( - y_dims, MKLDNNMemoryFormat::oi); - - return ReorderWithScale(user_y_desc, y_desc, input_y.get_data_handle(), - scale_y); - } - - mkldnn::primitive_attr CreateMulAttr(const ExecutionContext &ctx, - bool force_fp32_output) { - mkldnn::primitive_attr mul_attr; - - auto scale_y_data = ctx.Attr>("scale_y"); - auto scale_x_data = ctx.Attr("scale_x"); - auto scale_out_data = - force_fp32_output ? 1.0f : ctx.Attr("scale_out"); - - bool is_multi_channel = scale_y_data.size() > 1; - int count = is_multi_channel ? scale_y_data.size() : 1; - std::vector output_shift_scale(count); - for (int i = 0; i < count; i++) { - if (scale_y_data[i] == 0.0) - output_shift_scale[i] = scale_out_data; - else - output_shift_scale[i] = - scale_out_data / (scale_x_data * scale_y_data[i]); - } - int mul_mask = is_multi_channel ? 1 : 0; - mul_attr.set_output_scales(mul_mask, output_shift_scale); - - return mul_attr; - } - - inner_product_forward CreateMulPrimitive(const memory &x_memory, - const memory &y_memory, - const memory::desc &dst_desc, - Tensor *output, - const ExecutionContext &ctx) { - const auto x_desc = x_memory.get_desc(); - const auto y_desc = y_memory.get_desc(); - bool force_fp32_output = ctx.Attr("force_fp32_output"); - - mkldnn::primitive_attr mul_attr = CreateMulAttr(ctx, force_fp32_output); - auto mul_prim_desc = CreateMulPrimDesc(x_desc, y_desc, dst_desc, mul_attr); - - this->output_ = this->CreateDstMemory(mul_prim_desc, ctx, output); - - return inner_product_forward(mul_prim_desc); - } - - inner_product_forward::primitive_desc CreateMulPrimDesc( - const memory::desc &x_desc, const memory::desc &y_desc, - const memory::desc &dst_desc, const mkldnn::primitive_attr &mul_attr) { - const auto &mul_desc = inner_product_forward::desc( - prop_kind::forward, x_desc, y_desc, dst_desc); - - return inner_product_forward::primitive_desc(mul_desc, mul_attr, - this->engine_); - } + static constexpr bool is_int8_ = + std::is_same::value || std::is_same::value; }; /* OT: output data type */ @@ -353,7 +289,7 @@ template std::shared_ptr> GetPrimitiveFactory( const MKLDNNDeviceContext &dev_ctx, const ExecutionContext &ctx, const Tensor *input_x, const Tensor *input_y, - const mkldnn::engine &mkldnn_engine, bool enable_quant) { + const mkldnn::engine &mkldnn_engine) { const std::string key = platform::CreateKey( input_x->type(), framework::vectorize(input_x->dims()), input_y->type(), framework::vectorize(input_y->dims()), ctx.OutputName("Out")); @@ -363,10 +299,7 @@ std::shared_ptr> GetPrimitiveFactory( if (prim_creator == nullptr) { prim_creator = - enable_quant - ? std::make_shared>( - mkldnn_engine) - : std::make_shared>(mkldnn_engine); + std::make_shared>(mkldnn_engine); dev_ctx.SetBlob(key, prim_creator); } @@ -379,18 +312,18 @@ inner_product_forward GetMulPrimitive(const MKLDNNDeviceContext &dev_ctx, const Tensor *input_x, const Tensor *input_y, Tensor *output, const mkldnn::engine &mkldnn_engine) { - bool enable_quant = + constexpr bool is_int8 = std::is_same::value || std::is_same::value; bool force_fp32_output = ctx.Attr("force_fp32_output"); - if (enable_quant && !force_fp32_output) { + if (is_int8 && !force_fp32_output) { return GetPrimitiveFactory(dev_ctx, ctx, input_x, input_y, - mkldnn_engine, enable_quant) + mkldnn_engine) ->CreateMulPrimitive(input_x, input_y, output, ctx); } else { return GetPrimitiveFactory(dev_ctx, ctx, input_x, input_y, - mkldnn_engine, enable_quant) + mkldnn_engine) ->CreateMulPrimitive(input_x, input_y, output, ctx); } } diff --git a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py index 1d4ae90e90e..9280bf886fb 100644 --- a/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py +++ b/python/paddle/fluid/tests/unittests/mkldnn/test_mul_int8_mkldnn_op.py @@ -52,11 +52,11 @@ class TestMKLDNNMulOpS8S8(OpTest): # limit random range inside |-127, 127| to avoid overflow on SKL if self.srctype == np.int8: - A_data = np.random.randint(-127, 127, (2, 5)).astype(np.int8) + A_data = np.random.randint(-127, 127, (20, 5)).astype(np.int8) else: - A_data = np.random.randint(0, 127, (2, 5)).astype(np.uint8) + A_data = np.random.randint(0, 127, (20, 5)).astype(np.uint8) - B_data = np.random.uniform(-127, 127, (5, 3)).astype(np.float32) + B_data = np.random.uniform(-127, 127, (5, 20)).astype(np.float32) quant_B = np.round(B_data * self.scale_y[0]).astype(np.int) output = np.dot(A_data, quant_B) -- GitLab