From f32ae272ecf433c55c4a1c1a74c745513e1bb4ee Mon Sep 17 00:00:00 2001 From: Zhen Wang Date: Fri, 28 Aug 2020 16:49:38 +0800 Subject: [PATCH] Remove `sorted_sum_gradient_` form BasicEngine and PartialGradTask. (#26766) Use `Tensor` instead of `Variable` in the doc of paddle.grad. --- paddle/fluid/imperative/basic_engine.cc | 3 +-- paddle/fluid/imperative/basic_engine.h | 1 - .../fluid/imperative/partial_grad_engine.cc | 3 +-- python/paddle/fluid/dygraph/base.py | 24 +++++++++---------- 4 files changed, 14 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/imperative/basic_engine.cc b/paddle/fluid/imperative/basic_engine.cc index a91f14e56b7..9ad30506b2c 100644 --- a/paddle/fluid/imperative/basic_engine.cc +++ b/paddle/fluid/imperative/basic_engine.cc @@ -36,7 +36,6 @@ namespace paddle { namespace imperative { void BasicEngine::Init(VarBase* var, bool retain_graph) { - sorted_sum_gradient_ = FLAGS_sort_sum_gradient; retain_graph_ = retain_graph; init_node_ = var->GradVarBase()->GradNode(); var->GradVarBase()->ClearGradNode(); @@ -106,7 +105,7 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) { auto& accumulator = accumulators_[var.get()]; if (!accumulator) { - if (sorted_sum_gradient_) { + if (FLAGS_sort_sum_gradient) { accumulator.reset(new SortedGradientAccumulator(var.get())); } else { accumulator.reset(new EagerGradientAccumulator(var.get())); diff --git a/paddle/fluid/imperative/basic_engine.h b/paddle/fluid/imperative/basic_engine.h index d1aa69f1686..0906dd4f923 100644 --- a/paddle/fluid/imperative/basic_engine.h +++ b/paddle/fluid/imperative/basic_engine.h @@ -44,7 +44,6 @@ class BasicEngine : public Engine { private: std::shared_ptr init_node_; - bool sorted_sum_gradient_; std::unordered_map node_deps_; std::unordered_map> accumulators_; diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index 3afe5af7f63..5c717835e5c 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -578,7 +578,6 @@ class PartialGradTask { bool retain_graph_; bool allow_unused_; bool only_inputs_; - bool sorted_sum_gradient_{FLAGS_sort_sum_gradient}; }; PartialGradTask::PartialGradTask( @@ -981,7 +980,7 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) { if (!accumulator) { accumulator.reset(new GradientAccumulationInfo( - var, sorted_sum_gradient_, create_graph_)); + var, FLAGS_sort_sum_gradient, create_graph_)); } accumulator->IncreaseTotalRefCnt(); diff --git a/python/paddle/fluid/dygraph/base.py b/python/paddle/fluid/dygraph/base.py index 0c4a1964838..2174dbd31b8 100644 --- a/python/paddle/fluid/dygraph/base.py +++ b/python/paddle/fluid/dygraph/base.py @@ -327,19 +327,19 @@ def grad(outputs, This API computes the sum of gradients of `outputs` with respect to each `inputs` . Parameters: - outputs (Variable|list(Variable)|tuple(Variable)): the output Variable or - Variable list/tuple of the graph to compute gradients. - inputs (Variable|list(Variable)|tuple(Variable)): the input Variable or - Variable list/tuple of the graph to compute gradients. The returned + outputs (Tensor|list(Tensor)|tuple(Tensor)): the output Tensor or + Tensor list/tuple of the graph to compute gradients. + inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or + Tensor list/tuple of the graph to compute gradients. The returned values of this API are the gradients of `inputs` . - grad_outputs (Variable|list(Variable|None)|tuple(Variable|None), optional): + grad_outputs (Tensor|list(Tensor|None)|tuple(Tensor|None), optional): initial gradient values of `outputs` . If `grad_outputs` is None, the initial gradient values of `outputs` would be Tensors filled with 1; if `grad_outputs` is not None, it must have the same length as `outputs` , and in this case, the initial gradient value of the i-th `outputs` would be: (1) a Tensor filled with 1 when the i-th element of `grad_outputs` is None; (2) the i-th element of `grad_outputs` when the i-th element of - `grad_outputs` is a Variable. Default None. + `grad_outputs` is a Tensor. Default None. retain_graph (bool, optional): whether to retain the forward graph which is used to calculate the gradient. When it is True, the graph would be retained, in which way users can calculate backward twice for the @@ -351,21 +351,21 @@ def grad(outputs, computing process would be discarded. Default False. only_inputs (bool, optional): whether to only compute the gradients of `inputs` . If it is False, the gradients of all remaining leaf - Variables in the graph would be also computed and accumulated. + Tensors in the graph would be also computed and accumulated. If it is True, only the gradients of `inputs` would be computed. Default True. only_inputs=False is under development, and it is not supported yet. allow_unused (bool, optional): whether to raise error or return None if some - Variables of `inputs` are unreachable in the graph. If some Variables of + Tensors of `inputs` are unreachable in the graph. If some Tensors of `inputs` are unreachable in the graph (i.e., their gradients are None), error would be raised if allow_unused=False, or None would be returned as their gradients if allow_unused=True. Default False. - no_grad_vars (Variable|list(Variable)|tuple(Variable)|set(Variable), optional): - the Variables whose gradients are not needed to compute. Default None. + no_grad_vars (Tensor|list(Tensor)|tuple(Tensor)|set(Tensor), optional): + the Tensors whose gradients are not needed to compute. Default None. Returns: - tuple: a tuple of Variables, whose length is the same as the Variable number - inside `inputs`, and the i-th returned Variable is the sum of gradients of + tuple: a tuple of Tensors, whose length is the same as the Tensor number + inside `inputs`, and the i-th returned Tensor is the sum of gradients of `outputs` with respect to the i-th `inputs`. Examples 1: -- GitLab