From 3adee6c9e8a5c112e8dd988c4d796fb0e8f4b83a Mon Sep 17 00:00:00 2001 From: xiongkun Date: Fri, 17 Feb 2023 10:59:28 +0800 Subject: [PATCH] [bugfix] fix unuseful inputs causes segment error. (#50531) --- paddle/fluid/eager/grad_node_info.cc | 62 ++++++++++++++ paddle/fluid/eager/grad_node_info.h | 3 + .../eager/to_static/run_program_op_func.h | 18 ++++- .../dygraph_to_static/test_unuseful_inputs.py | 80 +++++++++++++++++++ 4 files changed, 162 insertions(+), 1 deletion(-) create mode 100644 python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 43da47436e2..d0a1d45b443 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -326,6 +326,68 @@ void GradNodeBase::SetGradOutMeta( } } +void GradNodeBase::SetGradOutMeta( + const std::vector& fwd_in, + size_t slot_rank) { + size_t slot_size = fwd_in.size(); + PADDLE_ENFORCE_LE( + slot_rank, + (bwd_out_meta_.size() - 1), + paddle::platform::errors::InvalidArgument( + "Slot Rank should less equal than bwd_out_meta_ size, " + "since bwd_out_meta_ is designed to hold as same num as " + "backward outputs.")); + auto& metas = bwd_out_meta_.at(slot_rank); + // Init stop gradient vector before use to avoid push back + if (metas.size() < slot_size) { + metas.resize(slot_size); + } + for (size_t i = 0; i < slot_size; i++) { + const auto& fwd_in_tensor = (*fwd_in[i]); + auto& meta = metas[i]; + auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor); + // Set Stop_gradient + if (fwd_in_meta) { + meta.SetStopGradient(fwd_in_meta->StopGradient()); + } + // Set Adj Edges + if (fwd_in_meta && !fwd_in_meta->StopGradient()) { + auto node = fwd_in_meta->GetMutableGradNode(); + if (!node || !node.get()) { + fwd_in_meta->SetGradNode( + std::make_shared(fwd_in_meta)); + } + VLOG(3) << "Add Edges for slot: " << slot_rank << ", the Edge is from " + << this->name() << " (addr: " << this << ") " + << " to " << fwd_in_meta->GetMutableGradNode()->name() + << " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")"; + + meta.SetEdge(fwd_in_meta->GetMutableGradNode(), + fwd_in_meta->OutRankInfo()); + } + // Record TensorMeta + if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) { + if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) { + // Only Copy Meta + phi::DenseTensor* dense_tensor = + static_cast(fwd_in_tensor.impl().get()); + PADDLE_ENFORCE_NE(dense_tensor->dtype(), + phi::DataType::UNDEFINED, + paddle::platform::errors::Fatal( + "Attempting to copy DenseTensorMeta " + "with phi::DataType::UNDEFINED," + "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); + meta.SetPlace(fwd_in_tensor.place()); + } + } else { + VLOG(7) + << "Unable to initialize the DenseTensorMeta of GradSlotMeta with " + "non-DenseTensor argument."; + } + } +} + void GradNodeBase::SetDefaultGradInOutMeta() { PADDLE_ENFORCE((bwd_out_meta_.size() == 1) && (bwd_in_meta_.size() == 1), paddle::platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index f20bad71c51..de458db1234 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -223,6 +223,9 @@ class GradNodeBase { void SetGradOutMeta(const std::vector& fwd_in, size_t slot_rank); + void SetGradOutMeta( + const std::vector& fwd_in, + size_t slot_rank); void SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, size_t slot_rank); /** diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h index 6bbf62ea6c0..f37e323ef80 100644 --- a/paddle/fluid/eager/to_static/run_program_op_func.h +++ b/paddle/fluid/eager/to_static/run_program_op_func.h @@ -93,7 +93,23 @@ inline void run_program_ad_func( grad_node->SetStepScope(step_scope); // Set Grad out rank as same as fwd input and set stop gradient to bwd - grad_node->SetGradOutMeta(x, /*slot id*/ 0); + // NOTE(@xiongkun): Not every tensor in x(list of tensor) is required + // gradient. for example: x[1] is not used for output, the x[1] is ignored. + + auto* forward_global_block = PADDLE_GET_CONST( + paddle::framework::BlockDesc*, attrs.at("forward_global_block")); + auto* backward_global_block = PADDLE_GET_CONST( + paddle::framework::BlockDesc*, attrs.at("backward_global_block")); + std::vector x_require_grad; + for (size_t i = 0; i < x.size(); ++i) { + auto& name = x[i].name(); + if (forward_global_block->HasVar(name) || + backward_global_block->HasVar(name)) { + x_require_grad.push_back(&x[i]); + } + } + + grad_node->SetGradOutMeta(x_require_grad, /*slot id*/ 0); grad_node->SetGradOutMeta(params, /*slot id*/ 1); VLOG(2) << "clear_no_grad_edges."; diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py new file mode 100644 index 00000000000..2e4d12ac4dc --- /dev/null +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_unuseful_inputs.py @@ -0,0 +1,80 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +import paddle.nn as nn +from paddle.jit import to_static + +np.random.seed(1) + + +def apply_to_static(support_to_static, model, image_shape=None): + if support_to_static: + specs = None + model = to_static(model, input_spec=specs) + + return model + + +class Layer0(nn.Layer): + def __init__(self, level): + super(Layer0, self).__init__() + self._linear1 = nn.Linear(10, 5) + self._linear2 = nn.Linear(10, 5) + self.layer1 = Layer1(level) + apply_to_static(True, self.layer1) + + def forward(self, x): + out1 = self._linear1(x) + out2 = self._linear2(x) + # out2.stop_gradient = True 如果stop_gradient不报错 + a = [out1, out2] + b = self.layer1(a) + # self.layer1(out1, out2) 也出错 + return b + + +class Layer1(nn.Layer): + def __init__(self, level): + super(Layer1, self).__init__() + self.level = level + self._linear = nn.Linear(5, 2) + + def forward(self, x): + inp = x[self.level] + val = self._linear(inp) + return val + + +class TestDuplicateOutput(unittest.TestCase): + """ + TestCase for the transformation from control flow `if/else` + dependent on tensor in Dygraph into Static `fluid.layers.cond`. + """ + + def test_case(self): + # create network + layer = Layer0(0) + a = paddle.rand(shape=[10, 10]) + out = layer(a) + loss = out.mean() + loss.backward() + + +if __name__ == '__main__': + unittest.main() -- GitLab