From dde9cec0b606ee8ab5203c2c3bdffad23fc5dfc9 Mon Sep 17 00:00:00 2001 From: Jacek Czaja Date: Tue, 15 Mar 2022 08:48:18 +0100 Subject: [PATCH] oneDNN NHWC fixes (#40049) * - Prototype of third solution - fix - compilation fixes - fix - fixe - fix - fix - compilation fix - comment fix - lint update mkldnn conv_elementwise_add_fuse_pass ut - NHWC changes to prelu - alhpa dims - UT fix - fix to UT - lint - Some fixes - added to BWD of prelu NHWC support - reverted removal of resetting cu_layout in clearing of caching * - Small changes * - compilation fix * - fix * - fix * lint * - fixes after internal review * - compilation fix * - lint --- paddle/fluid/framework/executor.cc | 3 +- .../conv_elementwise_add_mkldnn_fuse_pass.cc | 2 +- paddle/fluid/framework/naive_executor.cc | 1 + paddle/fluid/operators/lrn_op.cc | 2 +- .../fluid/operators/mkldnn/prelu_mkldnn_op.cc | 9 +- .../operators/mkldnn/test_mkldnn_op_nhwc.cc | 2 +- paddle/fluid/operators/prelu_op.cc | 34 ++++- paddle/fluid/platform/mkldnn_helper.h | 28 ++++ ...t_mkldnn_conv_elementwise_add_fuse_pass.py | 125 ++++++++++++++++-- 9 files changed, 183 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc index 48850d4624..f951b5d0f5 100644 --- a/paddle/fluid/framework/executor.cc +++ b/paddle/fluid/framework/executor.cc @@ -174,10 +174,11 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, bool force_disable_gc, bool keep_kid_scopes) { platform::RecordBlock b(block_id); if (FLAGS_use_mkldnn) EnableMKLDNN(pdesc); + auto ctx = Prepare(pdesc, block_id, skip_ref_cnt_vars, force_disable_gc); #ifdef PADDLE_WITH_MKLDNN platform::AttachPointerHashToMKLDNNKey(this, place_); + platform::RegisterModelLayout(ctx->ops_, place_); #endif - auto ctx = Prepare(pdesc, block_id, skip_ref_cnt_vars, force_disable_gc); RunPreparedContext(ctx.get(), scope, create_local_scope, create_vars, keep_kid_scopes); } diff --git a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc index 2403e60df3..0f3f37320b 100644 --- a/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc +++ b/paddle/fluid/framework/ir/mkldnn/conv_elementwise_add_mkldnn_fuse_pass.cc @@ -118,7 +118,7 @@ ResidualConnectionMKLDNNFusePass::ResidualConnectionMKLDNNFusePass() { .IsType>() .End() .AddAttr("data_format") - .IsStringIn({"NCHW", "AnyLayout"}) + .IsStringIn({"NHWC", "NCHW", "AnyLayout"}) .End(); AddOpCompat(OpCompat("elementwise_add")) diff --git a/paddle/fluid/framework/naive_executor.cc b/paddle/fluid/framework/naive_executor.cc index ece4815858..f30d1ea1b8 100644 --- a/paddle/fluid/framework/naive_executor.cc +++ b/paddle/fluid/framework/naive_executor.cc @@ -41,6 +41,7 @@ void NaiveExecutor::Prepare(Scope *scope, const ProgramDesc &program_desc, void NaiveExecutor::Run() { #ifdef PADDLE_WITH_MKLDNN platform::AttachPointerHashToMKLDNNKey(this, place_); + platform::RegisterModelLayout(ops_, place_); #endif platform::ScopedFlushDenormal flush; for (auto &op : ops_) { diff --git a/paddle/fluid/operators/lrn_op.cc b/paddle/fluid/operators/lrn_op.cc index 65297abe3e..88d70d9bb7 100644 --- a/paddle/fluid/operators/lrn_op.cc +++ b/paddle/fluid/operators/lrn_op.cc @@ -221,7 +221,7 @@ class LRNOp : public framework::OperatorWithKernel { auto ar = paddle::framework::AttrReader(attrs); const std::string data_format = ar.Get("data_format"); auto dl = framework::StringToDataLayout(data_format); - // Some models may have intentionally set "AnyLayout" for pool + // Some models may have intentionally set "AnyLayout" for lrn // op. Treat this as NCHW (default data_format value) if (dl != framework::DataLayout::kAnyLayout) { return framework::OpKernelType(expected_kernel_type.data_type_, diff --git a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc index bdb4fe1198..86ecb01c89 100644 --- a/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc +++ b/paddle/fluid/operators/mkldnn/prelu_mkldnn_op.cc @@ -50,13 +50,8 @@ class PReluMKLDNNHandler if (weights->dims().size() != x->dims().size()) { auto new_weights_dims = std::vector(x->dims().size(), 1); if (mode == "channel") { - if (data_format == "NHWC") { - new_weights_dims[x->dims().size() - 1] = - *std::max_element(weights_dims.begin(), weights_dims.end()); - } else { - new_weights_dims[1] = - *std::max_element(weights_dims.begin(), weights_dims.end()); - } + new_weights_dims[1] = + *std::max_element(weights_dims.begin(), weights_dims.end()); } weights_dims = std::move(new_weights_dims); } diff --git a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc index 717af61b85..0e988557df 100644 --- a/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc +++ b/paddle/fluid/operators/mkldnn/test_mkldnn_op_nhwc.cc @@ -98,7 +98,7 @@ TEST(test_pool2d_transpose_nhwc, cpu_place) { TEST(test_pool2d_relu_relu_nhwc, cpu_place) { framework::DDim dims({1, 4, 8, 512}); // NHWC shape - framework::DDim expected_dims({1, 512, 3, 7}); // NHWC expected shape + framework::DDim expected_dims({1, 512, 3, 7}); // NCHW expected shape platform::CPUPlace p; framework::Scope scope; diff --git a/paddle/fluid/operators/prelu_op.cc b/paddle/fluid/operators/prelu_op.cc index 9bd6ae8bab..4d2a2e23b3 100644 --- a/paddle/fluid/operators/prelu_op.cc +++ b/paddle/fluid/operators/prelu_op.cc @@ -17,6 +17,26 @@ limitations under the License. */ namespace paddle { namespace operators { +framework::OpKernelType innerGetKernelTypeForVar( + const Tensor &tensor, const framework::OpKernelType &expected_kernel_type) { +#ifdef PADDLE_WITH_MKLDNN + auto isOneDNNKernelChosen = + (expected_kernel_type.data_layout_ == framework::DataLayout::kMKLDNN); + auto isNotOneDNNTensor = (tensor.layout() != framework::DataLayout::kMKLDNN); + auto isModelNHWC = + (paddle::platform::MKLDNNDeviceContext::tls() + .get_cur_paddle_data_layout() == framework::DataLayout::kNHWC); + // All inputs (including alpha) need shape rotating + if (isOneDNNKernelChosen && isNotOneDNNTensor && isModelNHWC) { + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), + framework::DataLayout::kNHWC); + } +#endif + return framework::OpKernelType(expected_kernel_type.data_type_, + tensor.place(), tensor.layout()); +} + class PReluOp : public framework::OperatorWithKernel { public: PReluOp(const std::string &type, const framework::VariableNameMap &inputs, @@ -53,7 +73,7 @@ class PReluOp : public framework::OperatorWithKernel { "For mode 'channel', data_format must be one of " "NCHW and NHWC. But recevied data_format: %s", data_format_str)); - if (data_format_str == "NCHW") { + if (data_format_str == "NCHW" || ctx->IsRunMKLDNNKernel()) { PADDLE_ENFORCE_EQ( product(ctx->GetInputDim("Alpha")) == x_dim[1], true, platform::errors::InvalidArgument( @@ -128,6 +148,12 @@ class PReluOp : public framework::OperatorWithKernel { #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const { + return innerGetKernelTypeForVar(tensor, expected_kernel_type); + } }; class PReluOpMaker : public framework::OpProtoAndCheckerMaker { @@ -212,6 +238,12 @@ class PReluGradOp : public framework::OperatorWithKernel { #endif return framework::OpKernelType(input_data_type, ctx.GetPlace()); } + + framework::OpKernelType GetKernelTypeForVar( + const std::string &var_name, const Tensor &tensor, + const framework::OpKernelType &expected_kernel_type) const { + return innerGetKernelTypeForVar(tensor, expected_kernel_type); + } }; template diff --git a/paddle/fluid/platform/mkldnn_helper.h b/paddle/fluid/platform/mkldnn_helper.h index ce2dba4db0..4001fd744e 100644 --- a/paddle/fluid/platform/mkldnn_helper.h +++ b/paddle/fluid/platform/mkldnn_helper.h @@ -559,6 +559,34 @@ inline void GetGroupConvWeightsTz(std::vector& weights_tz, // NOLINT } } +inline void RegisterModelLayout( + std::vector>& ops, + const platform::Place& place) { + if (platform::is_cpu_place(place)) { + auto check_attrib = [](std::unique_ptr& op, + const std::string& attrib_name) -> bool { + if (op->HasAttr(attrib_name)) { + auto data_format = op->Attr(attrib_name); + platform::MKLDNNDeviceContext::tls().set_cur_paddle_data_layout( + data_format.compare("NHWC") == 0 ? framework::DataLayout::kNHWC + : framework::DataLayout::kNCHW); + return true; + } else { + return false; + } + }; + + for (auto& op : ops) { + if (check_attrib(op, std::string("data_format"))) { + return; + } + if (check_attrib(op, std::string("data_layout"))) { + return; + } + } + } +} + inline bool HasOpINT8DataType(const paddle::framework::OpDesc* op) { return (op->GetAttrIfExists("mkldnn_data_type") == "int8" || op->GetAttrIfExists("use_quantizer")); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py index 66c547de2c..2e84607e2f 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_elementwise_add_fuse_pass.py @@ -25,17 +25,120 @@ from hypothesis import given, settings, seed, example, assume import hypothesis.strategies as st -class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): +# the two inputs of elementwise_add are tensor +class TestConvElementwiseAddMkldnnFusePass1(PassAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] - # If the problem has been fixed, the judgment - # needs to be deleted!!! - if attrs[1]['data_format'] == "NHWC": + if attrs[1]['data_format'] == "NHWC" and attrs[3]['axis'] == 0: + return False + if attrs[1]['data_format'] == "NCHW" and attrs[3]['axis'] == -1: return False + return True + + def sample_program_config(self, draw): + data_format = draw(st.sampled_from(["NCHW", "NHWC"])) + dilations = draw(st.sampled_from([[1, 1], [2, 2], [1, 2]])) + padding_algorithm = draw(st.sampled_from(["EXPLICIT", "SAME", "VALID"])) + groups = draw(st.sampled_from([1, 2, 4])) + paddings = draw(st.sampled_from([[0, 3], [1, 1], [1, 2, 3, 4]])) + strides = draw(st.sampled_from([[1, 1], [2, 2], [1, 2]])) + axis = draw(st.sampled_from([-1, 0])) + batch_size = draw(st.integers(min_value=1, max_value=4)) + + def generate_input(): + if data_format == "NCHW": + return np.random.random( + [batch_size, 48, 64, 64]).astype(np.float32) + else: + return np.random.random( + [batch_size, 64, 64, 48]).astype(np.float32) + + def generate_weight(): + return np.random.random( + [48, int(48 / groups), 3, 3]).astype(np.float32) + + relu_op = OpConfig( + type="relu", + inputs={"X": ["input_data"]}, + outputs={"Out": ["relu_out"]}, + attrs={}) + + conv2d_op1 = OpConfig( + type="conv2d", + inputs={"Input": ["relu_out"], + "Filter": ["conv_weight1"]}, + outputs={"Output": ["conv_output1"]}, + attrs={ + "data_format": data_format, + "dilations": dilations, + "padding_algorithm": padding_algorithm, + "groups": groups, + "paddings": paddings, + "strides": strides + }) + + conv2d_op2 = OpConfig( + type="conv2d", + inputs={"Input": ["input_data"], + "Filter": ["conv_weight2"]}, + outputs={"Output": ["conv_output2"]}, + attrs={ + "data_format": data_format, + "dilations": dilations, + "padding_algorithm": padding_algorithm, + "groups": groups, + "paddings": paddings, + "strides": strides + }) + + elt_op = OpConfig( + type="elementwise_add", + inputs={"X": ["conv_output1"], + "Y": ["conv_output2"]}, + outputs={"Out": ["elementwise_output"]}, + attrs={'axis': axis}) + model_net = [relu_op, conv2d_op1, conv2d_op2, elt_op] + + program_config = ProgramConfig( + ops=model_net, + weights={ + "conv_weight1": TensorConfig(data_gen=partial(generate_weight)), + "conv_weight2": TensorConfig(data_gen=partial(generate_weight)) + }, + inputs={ + "input_data": TensorConfig(data_gen=partial(generate_input)) + }, + outputs=["elementwise_output"]) + + return program_config + + def sample_predictor_configs(self, program_config): + config = self.create_inference_config(use_mkldnn=True) + yield config, ["relu", "conv2d", "conv2d"], (1e-5, 1e-5) + + def test(self): + self.run_and_statis( + quant=False, passes=["conv_elementwise_add_mkldnn_fuse_pass"]) + + +''' +class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + attrs = [ + program_config.ops[i].attrs + for i in range(len(program_config.ops)) + ] + if "elementwise_weight" in program_config.weights: + if program_config.weights["elementwise_weight"].shape[0] == program_config.inputs["input_data1"].shape[1]: + if attrs[2]['axis'] != 1: + return False + if program_config.weights["elementwise_weight"].shape[0] == program_config.inputs["input_data1"].shape[3]: + if attrs[2]['axis'] != -1: + return False return True def sample_program_config(self, draw): @@ -101,7 +204,7 @@ class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): "strides": strides }) - if axis == -1 or axis == 0: + if axis == 0: elt_op = OpConfig( type="elementwise_add", inputs={"X": ["input_data1"], @@ -118,14 +221,12 @@ class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): model_net = [relu_op, conv2d_op, elt_op] - if axis == 1: + if axis == 0: program_config = ProgramConfig( ops=model_net, weights={ "conv_weight": - TensorConfig(data_gen=partial(generate_weight1)), - "elementwise_weight": - TensorConfig(data_gen=partial(generate_weight2)) + TensorConfig(data_gen=partial(generate_weight1)) }, inputs={ "input_data1": @@ -137,7 +238,9 @@ class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): ops=model_net, weights={ "conv_weight": - TensorConfig(data_gen=partial(generate_weight1)) + TensorConfig(data_gen=partial(generate_weight1)), + "elementwise_weight": + TensorConfig(data_gen=partial(generate_weight2)) }, inputs={ "input_data1": @@ -154,7 +257,7 @@ class TestConvElementwiseAddMkldnnFusePass(PassAutoScanTest): def test(self): self.run_and_statis( quant=False, passes=["conv_elementwise_add_mkldnn_fuse_pass"]) - +''' if __name__ == "__main__": unittest.main() -- GitLab