未验证 提交 f32ae272 编写于 作者: Z Zhen Wang 提交者: GitHub

Remove `sorted_sum_gradient_` form BasicEngine and PartialGradTask. (#26766)

Use `Tensor` instead of `Variable`  in the doc of paddle.grad.
上级 7b78bfc0
......@@ -36,7 +36,6 @@ namespace paddle {
namespace imperative {
void BasicEngine::Init(VarBase* var, bool retain_graph) {
sorted_sum_gradient_ = FLAGS_sort_sum_gradient;
retain_graph_ = retain_graph;
init_node_ = var->GradVarBase()->GradNode();
var->GradVarBase()->ClearGradNode();
......@@ -106,7 +105,7 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) {
auto& accumulator = accumulators_[var.get()];
if (!accumulator) {
if (sorted_sum_gradient_) {
if (FLAGS_sort_sum_gradient) {
accumulator.reset(new SortedGradientAccumulator(var.get()));
} else {
accumulator.reset(new EagerGradientAccumulator(var.get()));
......
......@@ -44,7 +44,6 @@ class BasicEngine : public Engine {
private:
std::shared_ptr<GradOpNode> init_node_;
bool sorted_sum_gradient_;
std::unordered_map<GradOpNode*, size_t> node_deps_;
std::unordered_map<VariableWrapper*, std::unique_ptr<GradientAccumulator>>
accumulators_;
......
......@@ -578,7 +578,6 @@ class PartialGradTask {
bool retain_graph_;
bool allow_unused_;
bool only_inputs_;
bool sorted_sum_gradient_{FLAGS_sort_sum_gradient};
};
PartialGradTask::PartialGradTask(
......@@ -981,7 +980,7 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) {
if (!accumulator) {
accumulator.reset(new GradientAccumulationInfo(
var, sorted_sum_gradient_, create_graph_));
var, FLAGS_sort_sum_gradient, create_graph_));
}
accumulator->IncreaseTotalRefCnt();
......
......@@ -327,19 +327,19 @@ def grad(outputs,
This API computes the sum of gradients of `outputs` with respect to each `inputs` .
Parameters:
outputs (Variable|list(Variable)|tuple(Variable)): the output Variable or
Variable list/tuple of the graph to compute gradients.
inputs (Variable|list(Variable)|tuple(Variable)): the input Variable or
Variable list/tuple of the graph to compute gradients. The returned
outputs (Tensor|list(Tensor)|tuple(Tensor)): the output Tensor or
Tensor list/tuple of the graph to compute gradients.
inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or
Tensor list/tuple of the graph to compute gradients. The returned
values of this API are the gradients of `inputs` .
grad_outputs (Variable|list(Variable|None)|tuple(Variable|None), optional):
grad_outputs (Tensor|list(Tensor|None)|tuple(Tensor|None), optional):
initial gradient values of `outputs` . If `grad_outputs` is None,
the initial gradient values of `outputs` would be Tensors filled with 1;
if `grad_outputs` is not None, it must have the same length as `outputs` ,
and in this case, the initial gradient value of the i-th `outputs` would
be: (1) a Tensor filled with 1 when the i-th element of `grad_outputs`
is None; (2) the i-th element of `grad_outputs` when the i-th element of
`grad_outputs` is a Variable. Default None.
`grad_outputs` is a Tensor. Default None.
retain_graph (bool, optional): whether to retain the forward graph which
is used to calculate the gradient. When it is True, the graph would
be retained, in which way users can calculate backward twice for the
......@@ -351,21 +351,21 @@ def grad(outputs,
computing process would be discarded. Default False.
only_inputs (bool, optional): whether to only compute the gradients of
`inputs` . If it is False, the gradients of all remaining leaf
Variables in the graph would be also computed and accumulated.
Tensors in the graph would be also computed and accumulated.
If it is True, only the gradients of `inputs` would be computed.
Default True. only_inputs=False is under development, and it is
not supported yet.
allow_unused (bool, optional): whether to raise error or return None if some
Variables of `inputs` are unreachable in the graph. If some Variables of
Tensors of `inputs` are unreachable in the graph. If some Tensors of
`inputs` are unreachable in the graph (i.e., their gradients are None),
error would be raised if allow_unused=False, or None would be returned as
their gradients if allow_unused=True. Default False.
no_grad_vars (Variable|list(Variable)|tuple(Variable)|set(Variable), optional):
the Variables whose gradients are not needed to compute. Default None.
no_grad_vars (Tensor|list(Tensor)|tuple(Tensor)|set(Tensor), optional):
the Tensors whose gradients are not needed to compute. Default None.
Returns:
tuple: a tuple of Variables, whose length is the same as the Variable number
inside `inputs`, and the i-th returned Variable is the sum of gradients of
tuple: a tuple of Tensors, whose length is the same as the Tensor number
inside `inputs`, and the i-th returned Tensor is the sum of gradients of
`outputs` with respect to the i-th `inputs`.
Examples 1:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册