未验证 提交 f32ae272 编写于 作者: Z Zhen Wang 提交者: GitHub

Remove `sorted_sum_gradient_` form BasicEngine and PartialGradTask. (#26766)

Use `Tensor` instead of `Variable`  in the doc of paddle.grad.
上级 7b78bfc0
...@@ -36,7 +36,6 @@ namespace paddle { ...@@ -36,7 +36,6 @@ namespace paddle {
namespace imperative { namespace imperative {
void BasicEngine::Init(VarBase* var, bool retain_graph) { void BasicEngine::Init(VarBase* var, bool retain_graph) {
sorted_sum_gradient_ = FLAGS_sort_sum_gradient;
retain_graph_ = retain_graph; retain_graph_ = retain_graph;
init_node_ = var->GradVarBase()->GradNode(); init_node_ = var->GradVarBase()->GradNode();
var->GradVarBase()->ClearGradNode(); var->GradVarBase()->ClearGradNode();
...@@ -106,7 +105,7 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) { ...@@ -106,7 +105,7 @@ void BasicEngine::PrepareGradAccumulators(const OpBase& op) {
auto& accumulator = accumulators_[var.get()]; auto& accumulator = accumulators_[var.get()];
if (!accumulator) { if (!accumulator) {
if (sorted_sum_gradient_) { if (FLAGS_sort_sum_gradient) {
accumulator.reset(new SortedGradientAccumulator(var.get())); accumulator.reset(new SortedGradientAccumulator(var.get()));
} else { } else {
accumulator.reset(new EagerGradientAccumulator(var.get())); accumulator.reset(new EagerGradientAccumulator(var.get()));
......
...@@ -44,7 +44,6 @@ class BasicEngine : public Engine { ...@@ -44,7 +44,6 @@ class BasicEngine : public Engine {
private: private:
std::shared_ptr<GradOpNode> init_node_; std::shared_ptr<GradOpNode> init_node_;
bool sorted_sum_gradient_;
std::unordered_map<GradOpNode*, size_t> node_deps_; std::unordered_map<GradOpNode*, size_t> node_deps_;
std::unordered_map<VariableWrapper*, std::unique_ptr<GradientAccumulator>> std::unordered_map<VariableWrapper*, std::unique_ptr<GradientAccumulator>>
accumulators_; accumulators_;
......
...@@ -578,7 +578,6 @@ class PartialGradTask { ...@@ -578,7 +578,6 @@ class PartialGradTask {
bool retain_graph_; bool retain_graph_;
bool allow_unused_; bool allow_unused_;
bool only_inputs_; bool only_inputs_;
bool sorted_sum_gradient_{FLAGS_sort_sum_gradient};
}; };
PartialGradTask::PartialGradTask( PartialGradTask::PartialGradTask(
...@@ -981,7 +980,7 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) { ...@@ -981,7 +980,7 @@ void PartialGradTask::PrepareInitialGradientAccumulators(const OpBase *op) {
if (!accumulator) { if (!accumulator) {
accumulator.reset(new GradientAccumulationInfo( accumulator.reset(new GradientAccumulationInfo(
var, sorted_sum_gradient_, create_graph_)); var, FLAGS_sort_sum_gradient, create_graph_));
} }
accumulator->IncreaseTotalRefCnt(); accumulator->IncreaseTotalRefCnt();
......
...@@ -327,19 +327,19 @@ def grad(outputs, ...@@ -327,19 +327,19 @@ def grad(outputs,
This API computes the sum of gradients of `outputs` with respect to each `inputs` . This API computes the sum of gradients of `outputs` with respect to each `inputs` .
Parameters: Parameters:
outputs (Variable|list(Variable)|tuple(Variable)): the output Variable or outputs (Tensor|list(Tensor)|tuple(Tensor)): the output Tensor or
Variable list/tuple of the graph to compute gradients. Tensor list/tuple of the graph to compute gradients.
inputs (Variable|list(Variable)|tuple(Variable)): the input Variable or inputs (Tensor|list(Tensor)|tuple(Tensor)): the input Tensor or
Variable list/tuple of the graph to compute gradients. The returned Tensor list/tuple of the graph to compute gradients. The returned
values of this API are the gradients of `inputs` . values of this API are the gradients of `inputs` .
grad_outputs (Variable|list(Variable|None)|tuple(Variable|None), optional): grad_outputs (Tensor|list(Tensor|None)|tuple(Tensor|None), optional):
initial gradient values of `outputs` . If `grad_outputs` is None, initial gradient values of `outputs` . If `grad_outputs` is None,
the initial gradient values of `outputs` would be Tensors filled with 1; the initial gradient values of `outputs` would be Tensors filled with 1;
if `grad_outputs` is not None, it must have the same length as `outputs` , if `grad_outputs` is not None, it must have the same length as `outputs` ,
and in this case, the initial gradient value of the i-th `outputs` would and in this case, the initial gradient value of the i-th `outputs` would
be: (1) a Tensor filled with 1 when the i-th element of `grad_outputs` be: (1) a Tensor filled with 1 when the i-th element of `grad_outputs`
is None; (2) the i-th element of `grad_outputs` when the i-th element of is None; (2) the i-th element of `grad_outputs` when the i-th element of
`grad_outputs` is a Variable. Default None. `grad_outputs` is a Tensor. Default None.
retain_graph (bool, optional): whether to retain the forward graph which retain_graph (bool, optional): whether to retain the forward graph which
is used to calculate the gradient. When it is True, the graph would is used to calculate the gradient. When it is True, the graph would
be retained, in which way users can calculate backward twice for the be retained, in which way users can calculate backward twice for the
...@@ -351,21 +351,21 @@ def grad(outputs, ...@@ -351,21 +351,21 @@ def grad(outputs,
computing process would be discarded. Default False. computing process would be discarded. Default False.
only_inputs (bool, optional): whether to only compute the gradients of only_inputs (bool, optional): whether to only compute the gradients of
`inputs` . If it is False, the gradients of all remaining leaf `inputs` . If it is False, the gradients of all remaining leaf
Variables in the graph would be also computed and accumulated. Tensors in the graph would be also computed and accumulated.
If it is True, only the gradients of `inputs` would be computed. If it is True, only the gradients of `inputs` would be computed.
Default True. only_inputs=False is under development, and it is Default True. only_inputs=False is under development, and it is
not supported yet. not supported yet.
allow_unused (bool, optional): whether to raise error or return None if some allow_unused (bool, optional): whether to raise error or return None if some
Variables of `inputs` are unreachable in the graph. If some Variables of Tensors of `inputs` are unreachable in the graph. If some Tensors of
`inputs` are unreachable in the graph (i.e., their gradients are None), `inputs` are unreachable in the graph (i.e., their gradients are None),
error would be raised if allow_unused=False, or None would be returned as error would be raised if allow_unused=False, or None would be returned as
their gradients if allow_unused=True. Default False. their gradients if allow_unused=True. Default False.
no_grad_vars (Variable|list(Variable)|tuple(Variable)|set(Variable), optional): no_grad_vars (Tensor|list(Tensor)|tuple(Tensor)|set(Tensor), optional):
the Variables whose gradients are not needed to compute. Default None. the Tensors whose gradients are not needed to compute. Default None.
Returns: Returns:
tuple: a tuple of Variables, whose length is the same as the Variable number tuple: a tuple of Tensors, whose length is the same as the Tensor number
inside `inputs`, and the i-th returned Variable is the sum of gradients of inside `inputs`, and the i-th returned Tensor is the sum of gradients of
`outputs` with respect to the i-th `inputs`. `outputs` with respect to the i-th `inputs`.
Examples 1: Examples 1:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册