From 7901abe8cc0f78c67e3f1e032b69c0f5a3c8fd73 Mon Sep 17 00:00:00 2001
From: niuliling123 <51102941+niuliling123@users.noreply.github.com>
Date: Mon, 14 Aug 2023 20:24:42 +0800
Subject: [PATCH] Add padding parameter for layout lightly op check (#55937)

---
 .../generator/eager_gen.py                    |  1 +
 paddle/fluid/eager/eager_layout_auto_tune.h   | 23 +++++++++++++++----
 paddle/fluid/eager/eager_layout_transformer.h |  2 +-
 paddle/fluid/imperative/layout_autotune.cc    |  6 +++--
 test/legacy_test/test_layout_autotune.py      | 15 ++++++++++++
 5 files changed, 40 insertions(+), 7 deletions(-)
diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
index b7dba03ff46..9bbe26d0f8a 100644
--- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
+++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py
@@ -1233,6 +1233,7 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
             'end',
             'stop',
             'perm',
+            'paddings',
         ]
         heavily_sensitive_attr = ['data_format', 'data_layout']
         layout_autotune_attr = []
diff --git a/paddle/fluid/eager/eager_layout_auto_tune.h b/paddle/fluid/eager/eager_layout_auto_tune.h
index f5e77113ba8..6cb4a9ea464 100644
--- a/paddle/fluid/eager/eager_layout_auto_tune.h
+++ b/paddle/fluid/eager/eager_layout_auto_tune.h
@@ -67,7 +67,7 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
                                kSlotSmallVectorSize>& tensors_vector,
     T* attr UNUSED) {
   // For lightly op like reduce
-  if (!(DesiredLayout() == phi::DataLayout::UNDEFINED)) {
+  if ((DesiredLayout() == phi::DataLayout::UNDEFINED)) {
     VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
     return std::make_shared<EagerLayoutTransformer>(
         op_name, tensors_vector, tensors_vector[0][0].layout());
@@ -85,7 +85,21 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
   // For lightly op like argmax
   return EagerLayoutAutotune<T1>(op_name, tensors_vector, axis);
 }
-
+template <>
+inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
+    const std::string& op_name,
+    const paddle::small_vector<std::vector<paddle::Tensor>,
+                               kSlotSmallVectorSize>& tensors_vector,
+    paddle::experimental::IntArray* padddings,
+    std::string* attr) {
+  // for pad
+  if ((DesiredLayout() == phi::DataLayout::UNDEFINED)) {
+    VLOG(4) << "LayoutAutotune was unstarted. Current op :" << op_name;
+    return std::make_shared<EagerLayoutTransformer>(
+        op_name, tensors_vector, tensors_vector[0][0].layout());
+  }
+  return std::make_shared<EagerLightlyLayoutSensitiveOpTransformer>(op_name);
+}
 template <>
 inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
     const std::string& op_name,
@@ -104,8 +118,9 @@ inline std::shared_ptr<EagerLayoutTransformer> EagerLayoutAutotune(
       auto data_type = tensors_vector[0][0].dtype();
       bool is_tune_fp32 =
           (data_type == phi::DataType::FLOAT32) && (*attr == "NHWC");
-      bool is_tune_fp16 =
-          (data_type == phi::DataType::FLOAT16) && (*attr == "NCHW");
+      bool is_tune_fp16 = (data_type == phi::DataType::FLOAT16 ||
+                           data_type == phi::DataType::BFLOAT16) &&
+                          (*attr == "NCHW");
       VLOG(4) << "LayoutAutoTune assert with dtype and layout, Current op : "
               << op_name;
       if (is_tune_fp32) {
diff --git a/paddle/fluid/eager/eager_layout_transformer.h b/paddle/fluid/eager/eager_layout_transformer.h
index 8131c362d2a..d707b14be41 100644
--- a/paddle/fluid/eager/eager_layout_transformer.h
+++ b/paddle/fluid/eager/eager_layout_transformer.h
@@ -199,7 +199,7 @@ class EagerHeavilyLayoutSensitiveOpTransformer : public EagerLayoutTransformer {
   explicit EagerHeavilyLayoutSensitiveOpTransformer(const std::string& op_name,
                                                     std::string* layout)
       : op_name_(op_name), desired_layout_(DesiredLayout()) {
-    VLOG(4) << "Heavily op: " << op_name;
+    VLOG(4) << "Heavily op: " << op_name << " layout " << *layout;
     *layout = phi::DataLayoutToString(DesiredLayout());
   }
 
diff --git a/paddle/fluid/imperative/layout_autotune.cc b/paddle/fluid/imperative/layout_autotune.cc
index 33a340664f0..7d74c3a7e82 100644
--- a/paddle/fluid/imperative/layout_autotune.cc
+++ b/paddle/fluid/imperative/layout_autotune.cc
@@ -159,7 +159,8 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
     const paddle::imperative::NameVarMap<VarType>& outs,
     paddle::framework::AttributeMap* attrs,
     const std::shared_ptr<imperative::Tracer>& tracer) {
-  if (!tracer->UseLayoutAutoTune()) {
+  if (!tracer->UseLayoutAutoTune() ||
+      op_type.find("_grad") != std::string::npos) {
     return ins;
   }
   // When layout autotuning is enabled, the tuner will check the desired layout.
@@ -191,7 +192,8 @@ paddle::imperative::NameVarMap<VarType> AutoTuneLayout(
           (conv_in_type == framework::proto::VarType::FP32);
       bool is_tune_fp16 =
           (PADDLE_GET_CONST(std::string, (*attrs)["data_format"]) == "NCHW") &&
-          (conv_in_type == framework::proto::VarType::FP16);
+          (conv_in_type == framework::proto::VarType::FP16 ||
+           conv_in_type == framework::proto::VarType::BF16);
       if (is_tune_fp32) {
         LayoutAutoTune::Instance().SetDesiredLayout(DataLayout::NCHW);
         LayoutAutoTune::Instance().SetDefaultLayout(DataLayout::NHWC);
diff --git a/test/legacy_test/test_layout_autotune.py b/test/legacy_test/test_layout_autotune.py
index 70c283a549a..b4297e98817 100644
--- a/test/legacy_test/test_layout_autotune.py
+++ b/test/legacy_test/test_layout_autotune.py
@@ -167,6 +167,21 @@ class LayoutAutoTune(unittest.TestCase):
         self.assertEqual(conv_out1.shape, [1, 8, 14, 12])
         self.assertEqual(out.shape, [2, 8, 14, 12])
 
+    def test_padding_tranpose(self):
+        conv = paddle.nn.Conv2D(3, 8, (3, 3))
+        data = paddle.rand([1, 3, 16, 14])
+        mode = "constant"
+        pad = [1, 0, 1, 2]
+        padding = paddle.nn.Pad2D(padding=pad, mode=mode, data_format='NCHW')
+        with paddle.amp.auto_cast(level="O2", dtype="bfloat16"):
+            conv_out = conv(data)
+            # conv_out.shape = [1, 14, 12, 8] with NHWC
+            out = padding(conv_out)
+            # from NHWC to NCHW
+
+        self.assertEqual(conv_out.shape, [1, 8, 14, 12])
+        self.assertEqual(out.shape, [1, 8, 17, 13])
+
 
 class TestAutoTuneAPI(unittest.TestCase):
     def test_set_config_warnings(self):
-- 
GitLab