[bugfix] fix unuseful inputs causes segment error. (#50531)

3adee6c9 · xiongkun · GitHub · 9e73be65 · 3adee6c9 · 3adee6c9
4 changed file
--- a/paddle/fluid/eager/grad_node_info.cc
+++ b/paddle/fluid/eager/grad_node_info.cc
@@ -326,6 +326,68 @@ void GradNodeBase::SetGradOutMeta(
  }
 }

+void GradNodeBase::SetGradOutMeta(
+    const std::vector<const paddle::experimental::Tensor*>& fwd_in,
+    size_t slot_rank) {
+  size_t slot_size = fwd_in.size();
+  PADDLE_ENFORCE_LE(
+      slot_rank,
+      (bwd_out_meta_.size() - 1),
+      paddle::platform::errors::InvalidArgument(
+          "Slot Rank should less equal than bwd_out_meta_ size, "
+          "since bwd_out_meta_ is designed to hold as same num as "
+          "backward outputs."));
+  auto& metas = bwd_out_meta_.at(slot_rank);
+  // Init stop gradient vector before use to avoid push back
+  if (metas.size() < slot_size) {
+    metas.resize(slot_size);
+  }
+  for (size_t i = 0; i < slot_size; i++) {
+    const auto& fwd_in_tensor = (*fwd_in[i]);
+    auto& meta = metas[i];
+    auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor);
+    // Set Stop_gradient
+    if (fwd_in_meta) {
+      meta.SetStopGradient(fwd_in_meta->StopGradient());
+    }
+    // Set Adj Edges
+    if (fwd_in_meta && !fwd_in_meta->StopGradient()) {
+      auto node = fwd_in_meta->GetMutableGradNode();
+      if (!node || !node.get()) {
+        fwd_in_meta->SetGradNode(
+            std::make_shared<egr::GradNodeAccumulation>(fwd_in_meta));
+      }
+      VLOG(3) << "Add Edges for slot: " << slot_rank << ", the Edge is from "
+              << this->name() << " (addr: " << this << ") "
+              << " to " << fwd_in_meta->GetMutableGradNode()->name()
+              << " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")";
+
+      meta.SetEdge(fwd_in_meta->GetMutableGradNode(),
+                   fwd_in_meta->OutRankInfo());
+    }
+    // Record TensorMeta
+    if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) {
+      if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) {
+        // Only Copy Meta
+        phi::DenseTensor* dense_tensor =
+            static_cast<phi::DenseTensor*>(fwd_in_tensor.impl().get());
+        PADDLE_ENFORCE_NE(dense_tensor->dtype(),
+                          phi::DataType::UNDEFINED,
+                          paddle::platform::errors::Fatal(
+                              "Attempting to copy DenseTensorMeta "
+                              "with phi::DataType::UNDEFINED,"
+                              "which is illegal."));
+        meta.SetTensorMeta(dense_tensor->meta());
+        meta.SetPlace(fwd_in_tensor.place());
+      }
+    } else {
+      VLOG(7)
+          << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
+             "non-DenseTensor argument.";
+    }
+  }
+}
+
 void GradNodeBase::SetDefaultGradInOutMeta() {
  PADDLE_ENFORCE((bwd_out_meta_.size() == 1) && (bwd_in_meta_.size() == 1),
                 paddle::platform::errors::PreconditionNotMet(

--- a/paddle/fluid/eager/grad_node_info.h
+++ b/paddle/fluid/eager/grad_node_info.h
@@ -223,6 +223,9 @@ class GradNodeBase {

  void SetGradOutMeta(const std::vector<paddle::experimental::Tensor>& fwd_in,
                      size_t slot_rank);
+  void SetGradOutMeta(
+      const std::vector<const paddle::experimental::Tensor*>& fwd_in,
+      size_t slot_rank);
  void SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
                      size_t slot_rank);
  /**

--- a/paddle/fluid/eager/to_static/run_program_op_func.h
+++ b/paddle/fluid/eager/to_static/run_program_op_func.h
@@ -93,7 +93,23 @@ inline void run_program_ad_func(
    grad_node->SetStepScope(step_scope);

    // Set Grad out rank as same as fwd input and set stop gradient to bwd
-    grad_node->SetGradOutMeta(x, /*slot id*/ 0);
+    // NOTE(@xiongkun): Not every tensor in x(list of tensor) is required
+    // gradient. for example: x[1] is not used for output, the x[1] is ignored.
+
+    auto* forward_global_block = PADDLE_GET_CONST(
+        paddle::framework::BlockDesc*, attrs.at("forward_global_block"));
+    auto* backward_global_block = PADDLE_GET_CONST(
+        paddle::framework::BlockDesc*, attrs.at("backward_global_block"));
+    std::vector<const paddle::experimental::Tensor*> x_require_grad;
+    for (size_t i = 0; i < x.size(); ++i) {
+      auto& name = x[i].name();
+      if (forward_global_block->HasVar(name) ||
+          backward_global_block->HasVar(name)) {
+        x_require_grad.push_back(&x[i]);
+      }
+    }
+
+    grad_node->SetGradOutMeta(x_require_grad, /*slot id*/ 0);
    grad_node->SetGradOutMeta(params, /*slot id*/ 1);

    VLOG(2) << "clear_no_grad_edges.";

--- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
+++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+import paddle.nn as nn
+from paddle.jit import to_static
+
+np.random.seed(1)
+
+
+def apply_to_static(support_to_static, model, image_shape=None):
+    if support_to_static:
+        specs = None
+        model = to_static(model, input_spec=specs)
+
+    return model
+
+
+class Layer0(nn.Layer):
+    def __init__(self, level):
+        super(Layer0, self).__init__()
+        self._linear1 = nn.Linear(10, 5)
+        self._linear2 = nn.Linear(10, 5)
+        self.layer1 = Layer1(level)
+        apply_to_static(True, self.layer1)
+
+    def forward(self, x):
+        out1 = self._linear1(x)
+        out2 = self._linear2(x)
+        # out2.stop_gradient = True  如果stop_gradient不报错
+        a = [out1, out2]
+        b = self.layer1(a)
+        # self.layer1(out1, out2)  也出错
+        return b
+
+
+class Layer1(nn.Layer):
+    def __init__(self, level):
+        super(Layer1, self).__init__()
+        self.level = level
+        self._linear = nn.Linear(5, 2)
+
+    def forward(self, x):
+        inp = x[self.level]
+        val = self._linear(inp)
+        return val
+
+
+class TestDuplicateOutput(unittest.TestCase):
+    """
+    TestCase for the transformation from control flow `if/else`
+    dependent on tensor in Dygraph into Static `fluid.layers.cond`.
+    """
+
+    def test_case(self):
+        # create network
+        layer = Layer0(0)
+        a = paddle.rand(shape=[10, 10])
+        out = layer(a)
+        loss = out.mean()
+        loss.backward()
+
+
+if __name__ == '__main__':
+    unittest.main()