diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index b7dba03ff46a9e10305f4f06b39135b0f9fea421..9bbe26d0f8a4e1eb2ad8af2ddea4fde7256a27b7 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -1233,6 +1233,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): 'end', 'stop', 'perm', + 'paddings', ] heavily_sensitive_attr = ['data_format', 'data_layout'] layout_autotune_attr = [] diff --git a/paddle/fluid/eager/eager_layout_auto_tune.h b/paddle/fluid/eager/eager_layout_auto_tune.h index f5e77113ba8eab91dded862f792a64f074318ed7..6cb4a9ea464bf086e2a95d3804771cfb1c785a9e 100644 --- a/paddle/fluid/eager/eager_layout_auto_tune.h +++ b/paddle/fluid/eager/eager_layout_auto_tune.h @@ -67,7 +67,7 @@ inline std::shared_ptr EagerLayoutAutotune( kSlotSmallVectorSize>& tensors_vector, T* attr UNUSED) { // For lightly op like reduce - if (!(DesiredLayout() == phi::DataLayout::UNDEFINED)) { + if ((DesiredLayout() == phi::DataLayout::UNDEFINED)) { VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name; return std::make_shared( op_name, tensors_vector, tensors_vector[0][0].layout()); @@ -85,7 +85,21 @@ inline std::shared_ptr EagerLayoutAutotune( // For lightly op like argmax return EagerLayoutAutotune(op_name, tensors_vector, axis); } - +template <> +inline std::shared_ptr EagerLayoutAutotune( + const std::string& op_name, + const paddle::small_vector, + kSlotSmallVectorSize>& tensors_vector, + paddle::experimental::IntArray* padddings, + std::string* attr) { + // for pad + if ((DesiredLayout() == phi::DataLayout::UNDEFINED)) { + VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name; + return std::make_shared( + op_name, tensors_vector, tensors_vector[0][0].layout()); + } + return std::make_shared(op_name); +} template <> inline std::shared_ptr EagerLayoutAutotune( const std::string& op_name, @@ -104,8 +118,9 @@ inline std::shared_ptr EagerLayoutAutotune( auto data_type = tensors_vector[0][0].dtype(); bool is_tune_fp32 = (data_type == phi::DataType::FLOAT32) && (*attr == "NHWC"); - bool is_tune_fp16 = - (data_type == phi::DataType::FLOAT16) && (*attr == "NCHW"); + bool is_tune_fp16 = (data_type == phi::DataType::FLOAT16 || + data_type == phi::DataType::BFLOAT16) && + (*attr == "NCHW"); VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : " << op_name; if (is_tune_fp32) { diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h index 8131c362d2af996aed6b03f735e23c99df614f48..d707b14be416e07c41cc9ea0f7c452b3dffcd035 100644 --- a/paddle/fluid/eager/eager_layout_transformer.h +++ b/paddle/fluid/eager/eager_layout_transformer.h @@ -199,7 +199,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer { explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name, std::string* layout) : op_name_(op_name), desired_layout_(DesiredLayout()) { - VLOG(4) << "Heavily op: " << op_name; + VLOG(4) << "Heavily op: " << op_name << " layout " << *layout; *layout = phi::DataLayoutToString(DesiredLayout()); } diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc index 33a340664f03b70bb06599c88ae51db12259a587..7d74c3a7e822901b995a18b943cd0124c9a2af60 100644 --- a/paddle/fluid/imperative/layout_autotune.cc +++ b/paddle/fluid/imperative/layout_autotune.cc @@ -159,7 +159,8 @@ paddle::imperative::NameVarMap AutoTuneLayout( const paddle::imperative::NameVarMap& outs, paddle::framework::AttributeMap* attrs, const std::shared_ptr& tracer) { - if (!tracer->UseLayoutAutoTune()) { + if (!tracer->UseLayoutAutoTune() || + op_type.find("_grad") != std::string::npos) { return ins; } // When layout autotuning is enabled, the tuner will check the desired layout. @@ -191,7 +192,8 @@ paddle::imperative::NameVarMap AutoTuneLayout( (conv_in_type == framework::proto::VarType::FP32); bool is_tune_fp16 = (PADDLE_GET_CONST(std::string, (*attrs)["data_format"]) == "NCHW") && - (conv_in_type == framework::proto::VarType::FP16); + (conv_in_type == framework::proto::VarType::FP16 || + conv_in_type == framework::proto::VarType::BF16); if (is_tune_fp32) { LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW); LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC); diff --git a/test/legacy_test/test_layout_autotune.py b/test/legacy_test/test_layout_autotune.py index 70c283a549a09bb6c0466928a1cdaa9ce8bbc0ba..b4297e98817a8bb0f79a6ad56de113405e387f30 100644 --- a/test/legacy_test/test_layout_autotune.py +++ b/test/legacy_test/test_layout_autotune.py @@ -167,6 +167,21 @@ class LayoutAutoTune(unittest.TestCase): self.assertEqual(conv_out1.shape, [1, 8, 14, 12]) self.assertEqual(out.shape, [2, 8, 14, 12]) + def test_padding_tranpose(self): + conv = paddle.nn.Conv2D(3, 8, (3, 3)) + data = paddle.rand([1, 3, 16, 14]) + mode = "constant" + pad = [1, 0, 1, 2] + padding = paddle.nn.Pad2D(padding=pad, mode=mode, data_format='NCHW') + with paddle.amp.auto_cast(level="O2", dtype="bfloat16"): + conv_out = conv(data) + # conv_out.shape = [1, 14, 12, 8] with NHWC + out = padding(conv_out) + # from NHWC to NCHW + + self.assertEqual(conv_out.shape, [1, 8, 14, 12]) + self.assertEqual(out.shape, [1, 8, 17, 13]) + class TestAutoTuneAPI(unittest.TestCase): def test_set_config_warnings(self):