From a2dbb0c2ef72cdafebc660645aac743dfcc7f0ec Mon Sep 17 00:00:00 2001
From: WangXi <wangxi16@baidu.com>
Date: Wed, 8 Sep 2021 15:35:08 +0800
Subject: [PATCH] [hybrid] check pipeline persist var which changed in forward
 and used in backward (#35453)

---
 python/paddle/fluid/optimizer.py              | 34 +++++++++++++++++++
 .../test_fleet_pipeline_meta_optimizer.py     |  2 ++
 2 files changed, 36 insertions(+)

diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py
index 676642a9c93..c0b99b3a270 100755
--- a/python/paddle/fluid/optimizer.py
+++ b/python/paddle/fluid/optimizer.py
@@ -5715,6 +5715,35 @@ class PipelineOptimizer(object):
                 backward_insert_index += 1
         block._sync_with_cpp()
 
+    def _check_pipeline_persist_var(self, program):
+        """
+        Pipeline may need multiple forward before
+        """
+        block = program.global_block()
+
+        persist_output = set()
+        used_in_backward = set()
+        for op in block.ops:
+            if self._is_forward_op(op):
+                for var_name in op.output_arg_names:
+                    var = block.vars[var_name]
+                    if var.persistable:
+                        persist_output.add(var_name)
+            elif self._is_backward_op(op):
+                for var_name in op.input_arg_names:
+                    if var_name in persist_output:
+                        used_in_backward.add(var_name)
+        if len(used_in_backward) == 0:
+            return
+        warnings.warn(
+            "The pipeline requires multiple forward calculations before backward, "
+            "so when the persistable var is changed in the forward, it may cause "
+            "errors in the backward calculation who using this persistable var. "
+            "However, some backward op don't need this var(NoNeedBufferVars), "
+            "there will be no error at this time.\n"
+            "So please check these persistable vars which changed in "
+            "forward and used in backward:\n{}".format(used_in_backward))
+
     def minimize(self,
                  loss,
                  startup_program=None,
@@ -5831,6 +5860,11 @@ class PipelineOptimizer(object):
         # A pass to move the recv op to the beginning of
         # the forward/backward phase
         self._mv_head_recv(program_list[self.local_rank])
+
+        # A pass to check pipeline persist var which changed in
+        # forward and used in backward
+        self._check_pipeline_persist_var(program_list[self.local_rank])
+
         main_program._pipeline_opt = {
             "trainer": "PipelineTrainer",
             "device_worker": "Section",
diff --git a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py
index 3f8d994ad19..263c578a571 100644
--- a/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py
+++ b/python/paddle/fluid/tests/unittests/test_fleet_pipeline_meta_optimizer.py
@@ -45,6 +45,8 @@ class TestFleetMetaOptimizer(unittest.TestCase):
 
         with static.device_guard("gpu:1"):
             fc_2 = paddle.fluid.layers.fc(input=fc_1, size=64, act='tanh')
+            # for pipeline check_pipeline_persist_var coverage
+            fc_2.persistable = True
             fc_2 = fc_2 * input_z
             prediction = paddle.fluid.layers.fc(input=[fc_2],
                                                 size=2,
-- 
GitLab