未验证 提交 0b4c3c20 编写于 作者: Z Zhanlue Yang 提交者: GitHub

[DoubleGrad] Enabled double grad test cases in eager_mode for test_imperative_double_grad (#41451)

* [DoubleGrad] Enabled double grad test cases in eager_mode for test_imperative_double_grad

* Fixed elementwise issue

* Addressed CI failures
上级 c448032b
...@@ -23,7 +23,7 @@ import os ...@@ -23,7 +23,7 @@ import os
######################## ########################
ops_to_fill_zero_for_empty_grads = set([ ops_to_fill_zero_for_empty_grads = set([
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad", "split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
"sigmoid_triple_grad" "sigmoid_triple_grad, add_double_grad"
]) ])
# For API dispatch used at python-level # For API dispatch used at python-level
......
...@@ -205,6 +205,7 @@ FORWARD_FUNCTION_TEMPLATE = \ ...@@ -205,6 +205,7 @@ FORWARD_FUNCTION_TEMPLATE = \
#endif #endif
}} }}
// Forward API Call // Forward API Call
VLOG(3) << \"Final State Running: \" << \"{}\";
{} {}
// Get Outputs // Get Outputs
{} {}
...@@ -505,15 +506,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): ...@@ -505,15 +506,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
for i in range(len(forward_attrs_list)): for i in range(len(forward_attrs_list)):
orig_attr_type = orig_forward_attrs_list[i][1] orig_attr_type = orig_forward_attrs_list[i][1]
orig_attr_default = orig_forward_attrs_list[i][2]
orig_attr_pos = orig_forward_attrs_list[i][3] orig_attr_pos = orig_forward_attrs_list[i][3]
forward_attr_type = forward_attrs_list[i][1] forward_attr_type = forward_attrs_list[i][1]
forward_attr_default = forward_attrs_list[i][2]
forward_attr_pos = forward_attrs_list[i][3] forward_attr_pos = forward_attrs_list[i][3]
assert orig_attr_type == forward_attr_type, AssertMessage( assert orig_attr_type == forward_attr_type, AssertMessage(
orig_attr_type, forward_attr_type) orig_attr_type, forward_attr_type)
assert orig_attr_default == forward_attr_default, AssertMessage(
orig_attr_default, forward_attr_default)
assert orig_attr_pos == forward_attr_pos, AssertMessage( assert orig_attr_pos == forward_attr_pos, AssertMessage(
orig_attr_pos, forward_attr_pos) orig_attr_pos, forward_attr_pos)
...@@ -753,6 +750,15 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): ...@@ -753,6 +750,15 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
set_grad_out_meta_list = [] set_grad_out_meta_list = []
set_edges_list = [] set_edges_list = []
for name, (_, pos) in forward_inputs_position_map.items(): for name, (_, pos) in forward_inputs_position_map.items():
# Has corresponding grad output
has_corresponding_grad_output = False
for _, (_, corresponding_pos,
_) in backward_grad_outputs_map.items():
if pos == corresponding_pos:
has_corresponding_grad_output = True
if not has_corresponding_grad_output:
continue
input_autograd_meta_name = GetAutoGradMetaName(name) input_autograd_meta_name = GetAutoGradMetaName(name)
is_optional = (name in self.optional_inputs) is_optional = (name in self.optional_inputs)
if is_optional: if is_optional:
...@@ -1063,9 +1069,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): ...@@ -1063,9 +1069,10 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase):
self.forward_definition_str += FORWARD_FUNCTION_TEMPLATE.format( self.forward_definition_str += FORWARD_FUNCTION_TEMPLATE.format(
returns_type_str, forward_function_name, inputs_args_definition_str, returns_type_str, forward_function_name, inputs_args_definition_str,
dygraph_event_str, amp_logic_str, inputs_autograd_meta_str, dygraph_event_str, amp_logic_str, inputs_autograd_meta_str,
forward_call_str, get_outputs_str, outputs_autograd_meta_str, forward_function_name, forward_call_str, get_outputs_str,
compute_require_grad_args_str, check_inplace_str, outputs_autograd_meta_str, compute_require_grad_args_str,
bump_inplace_version_str, node_creation_str, returns_str) check_inplace_str, bump_inplace_version_str, node_creation_str,
returns_str)
self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n" self.forward_declaration_str += f"{returns_type_str} {forward_function_name}({inputs_args_declaration_str});\n"
logging.info( logging.info(
...@@ -1439,28 +1446,18 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): ...@@ -1439,28 +1446,18 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
compute_require_grad_str += f"{indent}bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({compute_require_grad_args_str});" compute_require_grad_str += f"{indent}bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({compute_require_grad_args_str});"
# Construct grad_api returns # Construct grad_api returns
num_bwd_outputs = len(backward_grad_outputs_map.keys())
slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys())
returns_str = f"{indent}std::vector<std::vector<paddle::experimental::Tensor>> returns({slot_num_bwd_outputs});\n" returns_str = f"{indent}std::vector<std::vector<paddle::experimental::Tensor>> returns({slot_num_bwd_outputs});\n"
for name, (ttype, fwd_position, for name, (ttype, fwd_position,
grad_api_position) in backward_grad_outputs_map.items(): grad_api_position) in backward_grad_outputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name) transformed_tensor_name = self.TransformToNextGradName(name)
# Infer Grad API Return Type # Rearrange output order accordingly
if num_bwd_outputs == 1: if IsPlainTensorType(ttype):
# Single tensor output, return as is returns_str += f"{indent}returns[{fwd_position}] = {{ {transformed_tensor_name} }};\n"
if IsPlainTensorType(ttype):
returns_str += f"{indent}returns[0] = {{ {transformed_tensor_name} }};\n"
else:
assert IsVectorTensorType(ttype)
returns_str += f"{indent}returns[0] = {transformed_tensor_name};\n"
else: else:
# Rearrange output order accordingly assert IsVectorTensorType(ttype)
if IsPlainTensorType(ttype): returns_str += f"{indent}returns[{fwd_position}] = {transformed_tensor_name};\n"
returns_str += f"{indent}returns[{fwd_position}] = {{ {transformed_tensor_name} }};\n"
else:
assert IsVectorTensorType(ttype)
returns_str += f"{indent}returns[{fwd_position}] = {transformed_tensor_name};\n"
returns_str += f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" returns_str += f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n"
returns_str += f"{indent}return returns;\n" returns_str += f"{indent}return returns;\n"
......
...@@ -485,6 +485,7 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap( ...@@ -485,6 +485,7 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
} }
} }
} }
return node_in_degree_map; return node_in_degree_map;
} }
...@@ -526,6 +527,7 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -526,6 +527,7 @@ std::vector<paddle::experimental::Tensor> RunBackward(
bool allow_unused = false, bool allow_unused = false,
const std::vector<paddle::experimental::Tensor>& no_grad_vars = {}) { const std::vector<paddle::experimental::Tensor>& no_grad_vars = {}) {
VLOG(6) << "Start Backward"; VLOG(6) << "Start Backward";
// *Gradient Hook should happen at node-level // *Gradient Hook should happen at node-level
// *Inplace version check should perform at node-level // *Inplace version check should perform at node-level
// *Cross-batch accumulation happens at forward pass // *Cross-batch accumulation happens at forward pass
...@@ -729,6 +731,16 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -729,6 +731,16 @@ std::vector<paddle::experimental::Tensor> RunBackward(
continue; continue;
} }
auto* next_node = next_node_shared.get();
if (!node_input_buffers_dict.count(next_node)) {
const auto& input_meta = next_node->InputMeta();
auto grad_tensor_holder =
std::make_unique<GradTensorHolder>(input_meta);
VLOG(6) << "Construct GradTensorHolder for grad node: "
<< next_node->name();
node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
}
PADDLE_ENFORCE_LT( PADDLE_ENFORCE_LT(
j, grad_output_tensors[i].size(), j, grad_output_tensors[i].size(),
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
...@@ -748,15 +760,6 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -748,15 +760,6 @@ std::vector<paddle::experimental::Tensor> RunBackward(
<< ", rank: " << j << ", rank: " << j
<< " 's name is: " << grad_output_tensor.name(); << " 's name is: " << grad_output_tensor.name();
auto* next_node = next_node_shared.get();
if (!node_input_buffers_dict.count(next_node)) {
const auto& input_meta = next_node->InputMeta();
auto grad_tensor_holder =
std::make_unique<GradTensorHolder>(input_meta);
VLOG(6) << "Construct GradTensorHolder for grad node: "
<< next_node->name();
node_input_buffers_dict[next_node] = std::move(grad_tensor_holder);
}
VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first
<< ", rank: " << edge_rank.second; << ", rank: " << edge_rank.second;
node_input_buffers_dict[next_node]->add( node_input_buffers_dict[next_node]->add(
......
...@@ -63,9 +63,9 @@ void AddGradKernel(const Context& dev_ctx, ...@@ -63,9 +63,9 @@ void AddGradKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx, void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx, paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy, paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis, int axis,
DenseTensor* ddout) { DenseTensor* ddout) {
phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout); phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
......
...@@ -31,9 +31,9 @@ void AddGradKernel(const Context& dev_ctx, ...@@ -31,9 +31,9 @@ void AddGradKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx, void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx, paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy, paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis, int axis,
DenseTensor* ddout); DenseTensor* ddout);
......
...@@ -56,9 +56,9 @@ void AddGradKernel(const Context& dev_ctx, ...@@ -56,9 +56,9 @@ void AddGradKernel(const Context& dev_ctx,
template <typename T, typename Context> template <typename T, typename Context>
void AddDoubleGradKernel(const Context& dev_ctx, void AddDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y, const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx, paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy, paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis, int axis,
DenseTensor* ddout) { DenseTensor* ddout) {
phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout); phi::AddDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
......
...@@ -115,7 +115,7 @@ KernelSignature ElementwiseAddGradOpArgumentMapping( ...@@ -115,7 +115,7 @@ KernelSignature ElementwiseAddGradOpArgumentMapping(
KernelSignature ElementwiseAddDoubleGradOpArgumentMapping( KernelSignature ElementwiseAddDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) { const ArgumentMappingContext& ctx) {
return KernelSignature( return KernelSignature(
"add_double_grad", {"Y", "DDX", "DDY", "DOut"}, {"axis"}, {"DDOut"}); "add_double_grad", {"Y", "DOut", "DDX", "DDY"}, {"axis"}, {"DDOut"});
} }
KernelSignature ElementwiseAddTripleGradOpArgumentMapping( KernelSignature ElementwiseAddTripleGradOpArgumentMapping(
......
...@@ -15,7 +15,7 @@ ...@@ -15,7 +15,7 @@
from __future__ import print_function from __future__ import print_function
from .. import core from .. import core
from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator from ..framework import Variable, convert_np_dtype_to_dtype_, _varbase_creator, _in_legacy_dygraph, in_dygraph_mode
from ..layers.layer_function_generator import OpProtoHolder from ..layers.layer_function_generator import OpProtoHolder
from . import no_grad from . import no_grad
from .. import framework from .. import framework
...@@ -62,6 +62,15 @@ _complex_dtypes = [ ...@@ -62,6 +62,15 @@ _complex_dtypes = [
_already_patch_varbase = False _already_patch_varbase = False
_already_patch_eager_tensor = False _already_patch_eager_tensor = False
# Dispatch to final state Python-C functions
_final_state_op_type_mapping = {
"elementwise_add": "final_state_add",
"elementwise_sub": "final_state_subtract",
"elementwise_div": "final_state_divide",
"elementwise_mul": "final_state_multiply",
"matmul_v2": "final_state_matmul",
}
def monkey_patch_math_varbase(): def monkey_patch_math_varbase():
""" """
...@@ -105,10 +114,15 @@ def monkey_patch_math_varbase(): ...@@ -105,10 +114,15 @@ def monkey_patch_math_varbase():
""" """
if not isinstance(dtype, core.VarDesc.VarType): if not isinstance(dtype, core.VarDesc.VarType):
dtype = convert_np_dtype_to_dtype_(dtype) dtype = convert_np_dtype_to_dtype_(dtype)
return _C_ops.cast(self, 'in_dtype', self.dtype, 'out_dtype', dtype)
if _in_legacy_dygraph():
return _C_ops.cast(self, 'in_dtype', self.dtype, 'out_dtype', dtype)
return _C_ops.final_state_cast(self, dtype)
def _scalar_elementwise_op_(var, scale, bias): def _scalar_elementwise_op_(var, scale, bias):
return _C_ops.scale(var, 'scale', scale, 'bias', bias) if _in_legacy_dygraph():
return _C_ops.scale(var, 'scale', scale, 'bias', bias)
return _C_ops.final_state_scale(var, float(scale), bias, True)
def _neg_(var): def _neg_(var):
return _scalar_elementwise_op_(var, -1.0, 0.0) return _scalar_elementwise_op_(var, -1.0, 0.0)
...@@ -164,7 +178,10 @@ def monkey_patch_math_varbase(): ...@@ -164,7 +178,10 @@ def monkey_patch_math_varbase():
perm = [] perm = []
for i in range(len(var.shape)): for i in range(len(var.shape)):
perm.insert(0, i) perm.insert(0, i)
out, _ = _C_ops.transpose2(var, 'axis', perm) if _in_legacy_dygraph():
out, _ = _C_ops.transpose2(var, 'axis', perm)
else:
out = _C_ops.final_state_transpose(var, perm)
return out return out
def _scalar_add_(var, value): def _scalar_add_(var, value):
...@@ -270,11 +287,13 @@ def monkey_patch_math_varbase(): ...@@ -270,11 +287,13 @@ def monkey_patch_math_varbase():
# 4. calculation # 4. calculation
axis = -1 axis = -1
if framework._in_eager_mode_ and op_type == 'elementwise_add': if in_dygraph_mode(
math_op = getattr(_C_ops, 'final_state_add') ) and op_type in _final_state_op_type_mapping.keys():
math_op = getattr(_C_ops, _final_state_op_type_mapping[op_type])
return math_op(self, other_var)
else: else:
math_op = getattr(_C_ops, op_type) math_op = getattr(_C_ops, op_type)
return math_op(self, other_var, 'axis', axis) return math_op(self, other_var, 'axis', axis)
comment = OpProtoHolder.instance().get_op_proto(op_type).comment comment = OpProtoHolder.instance().get_op_proto(op_type).comment
......
...@@ -9036,7 +9036,10 @@ def relu(x, name=None): ...@@ -9036,7 +9036,10 @@ def relu(x, name=None):
# [[0. 0. ] # [[0. 0. ]
# [1. 2.6]] # [1. 2.6]]
""" """
if _non_static_mode():
if in_dygraph_mode():
return _C_ops.final_state_relu(x)
if _in_legacy_dygraph():
return _C_ops.relu(x) return _C_ops.relu(x)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu') check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'relu')
......
...@@ -385,26 +385,23 @@ class TestDygraphDoubleGrad(TestCase): ...@@ -385,26 +385,23 @@ class TestDygraphDoubleGrad(TestCase):
(x_np > 0) * 2).astype('float32') (x_np > 0) * 2).astype('float32')
self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
if not _in_legacy_dygraph(): loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
pass loss.backward(retain_graph=True)
else:
loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
loss.backward(retain_graph=True)
x_grad_actual = x.gradient()
x_grad_expected = (2.0 / float(numel) *
(x_np + dx_expected *
(x_np > 0) * 2 / float(numel))).astype('float32')
self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
for i in range(5):
loss.backward(retain_graph=True)
x_grad_actual = x.gradient() x_grad_actual = x.gradient()
x_grad_expected = (2.0 / float(numel) * ( x_grad_expected = (i + 2) * (2.0 / float(numel) * (
x_np + dx_expected * x_np + dx_expected *
(x_np > 0) * 2 / float(numel))).astype('float32') (x_np > 0) * 2 / float(numel))).astype('float32')
self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
for i in range(5):
loss.backward(retain_graph=True)
x_grad_actual = x.gradient()
x_grad_expected = (i + 2) * (2.0 / float(numel) * (
x_np + dx_expected *
(x_np > 0) * 2 / float(numel))).astype('float32')
self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
def test_example_with_gradient_accumulation_and_create_graph(self): def test_example_with_gradient_accumulation_and_create_graph(self):
with _test_eager_guard(): with _test_eager_guard():
self.func_example_with_gradient_accumulation_and_create_graph() self.func_example_with_gradient_accumulation_and_create_graph()
...@@ -426,7 +423,10 @@ class TestDygraphDoubleGrad(TestCase): ...@@ -426,7 +423,10 @@ class TestDygraphDoubleGrad(TestCase):
del y1, z, w del y1, z, w
dx_actual, = self.grad( dx_actual, = self.grad(
[w_mean], [x], create_graph=True, no_grad_vars=[y2]) [w_mean], [x],
retain_graph=True,
create_graph=True,
no_grad_vars=[y2])
self.assertFalse(y2.stop_gradient) self.assertFalse(y2.stop_gradient)
self.assertFalse(dx_actual.stop_gradient) self.assertFalse(dx_actual.stop_gradient)
...@@ -435,17 +435,14 @@ class TestDygraphDoubleGrad(TestCase): ...@@ -435,17 +435,14 @@ class TestDygraphDoubleGrad(TestCase):
(x_np > 0) * 2).astype('float32') (x_np > 0) * 2).astype('float32')
self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
if not _in_legacy_dygraph(): loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
pass loss.backward()
else:
loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
loss.backward()
x_grad_actual = x.gradient() x_grad_actual = x.gradient()
x_grad_expected = (2.0 / float(numel) * ( x_grad_expected = (2.0 / float(numel) *
x_np + dx_expected * (x_np + dx_expected *
(x_np > 0) * 4 / float(numel))).astype('float32') (x_np > 0) * 4 / float(numel))).astype('float32')
self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
def test_example_with_gradient_accumulation_and_no_grad_vars(self): def test_example_with_gradient_accumulation_and_no_grad_vars(self):
with _test_eager_guard(): with _test_eager_guard():
...@@ -476,15 +473,12 @@ class TestDygraphDoubleGrad(TestCase): ...@@ -476,15 +473,12 @@ class TestDygraphDoubleGrad(TestCase):
self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected)) self.assertTrue(np.allclose(dx_actual.numpy(), dx_expected))
if not _in_legacy_dygraph(): loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
pass loss.backward()
else:
loss = fluid.layers.reduce_mean(dx_actual * dx_actual + x * x)
loss.backward()
x_grad_actual = x.gradient() x_grad_actual = x.gradient()
x_grad_expected = (2.0 * x_np / float(numel)).astype('float32') x_grad_expected = (2.0 * x_np / float(numel)).astype('float32')
self.assertTrue(np.allclose(x_grad_actual, x_grad_expected)) self.assertTrue(np.allclose(x_grad_actual, x_grad_expected))
def test_example_with_gradient_accumulation_and_not_create_graph(self): def test_example_with_gradient_accumulation_and_not_create_graph(self):
with _test_eager_guard(): with _test_eager_guard():
......
...@@ -30,6 +30,18 @@ ...@@ -30,6 +30,18 @@
kernel : kernel :
func : acosh_grad func : acosh_grad
- backward_api : add_double_grad
forward : add_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args : (Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_out]
kernel :
func : add_double_grad
optional : grad_x_grad, grad_y_grad
backward : add_triple_grad
- backward_api : add_grad - backward_api : add_grad
forward : add (Tensor x, Tensor y) -> Tensor(out) forward : add (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)
...@@ -40,6 +52,7 @@ ...@@ -40,6 +52,7 @@
kernel : kernel :
func : add_grad func : add_grad
no_need_buffer : x, y no_need_buffer : x, y
backward : add_double_grad
- backward_api : add_n_grad - backward_api : add_n_grad
forward : add_n (Tensor[] x) -> Tensor(out) forward : add_n (Tensor[] x) -> Tensor(out)
...@@ -48,6 +61,16 @@ ...@@ -48,6 +61,16 @@
invoke : add_n_grad_impl(x, out_grad) invoke : add_n_grad_impl(x, out_grad)
no_need_buffer : x no_need_buffer : x
- backward_api : add_triple_grad
forward : add_double_grad (Tensor y, Tensor grad_out, Tensor grad_grad_x, Tensor grad_grad_y, int axis = -1) -> Tensor(grad_grad_out)
args : (Tensor grad_grad_x, Tensor grad_grad_y, Tensor grad_grad_out_grad, int axis = -1)
output : Tensor(grad_grad_x_grad), Tensor(grad_grad_y_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [grad_grad_x, grad_grad_y]
kernel :
func : add_triple_grad
- backward_api : addmm_grad - backward_api : addmm_grad
forward : addmm (Tensor input, Tensor x, Tensor y, float alpha, float beta) -> Tensor(out) forward : addmm (Tensor input, Tensor x, Tensor y, float alpha, float beta) -> Tensor(out)
args : (Tensor input, Tensor x, Tensor y, Tensor out_grad, float alpha, float beta) args : (Tensor input, Tensor x, Tensor y, Tensor out_grad, float alpha, float beta)
...@@ -934,6 +957,12 @@ ...@@ -934,6 +957,12 @@
kernel : kernel :
func : mean_all_grad func : mean_all_grad
- backward_api : mean_double_grad
forward: mean_grad (Tensor x, Tensor grad_out, int64_t[] dims={}, bool keep_dim=false, bool reduce_all = false) -> Tensor(grad_x)
args : (Tensor grad_x_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false)
output : Tensor(grad_out_grad)
invoke : mean(grad_x_grad, dims, keep_dim)
- backward_api : mean_grad - backward_api : mean_grad
forward: mean (Tensor x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(out) forward: mean (Tensor x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false) args : (Tensor x, Tensor out_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false)
...@@ -943,6 +972,7 @@ ...@@ -943,6 +972,7 @@
param: [x] param: [x]
kernel : kernel :
func : mean_grad func : mean_grad
backward : mean_double_grad
no_need_buffer : x no_need_buffer : x
- backward_api : meshgrid_grad - backward_api : meshgrid_grad
...@@ -1025,6 +1055,17 @@ ...@@ -1025,6 +1055,17 @@
func : multiplex_grad func : multiplex_grad
param : [ids, out_grad] param : [ids, out_grad]
- backward_api : multiply_double_grad
forward : multiply_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args : (Tensor x, Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output : Tensor(x_grad), Tensor(y_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [x, y, grad_out]
kernel :
func : multiply_double_grad
optional : grad_x_grad, grad_y_grad
- backward_api : multiply_grad - backward_api : multiply_grad
forward : multiply (Tensor x, Tensor y) -> Tensor(out) forward : multiply (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1) args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)
...@@ -1034,6 +1075,7 @@ ...@@ -1034,6 +1075,7 @@
param : [x, y] param : [x, y]
kernel : kernel :
func : multiply_grad func : multiply_grad
backward : multiply_double_grad
- backward_api : mv_grad - backward_api : mv_grad
forward : mv (Tensor x, Tensor vec) -> Tensor(out) forward : mv (Tensor x, Tensor vec) -> Tensor(out)
...@@ -1184,10 +1226,10 @@ ...@@ -1184,10 +1226,10 @@
- backward_api : relu_double_grad - backward_api : relu_double_grad
forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x) forward : relu_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_x_grad) args : (Tensor out, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad) output : Tensor(grad_out_grad)
infer_meta : infer_meta :
func : GeneralBinaryGradInferMeta func : UnchangedInferMeta
param : [out, out] param : [out]
kernel : kernel :
func : relu_double_grad func : relu_double_grad
...@@ -1270,11 +1312,25 @@ ...@@ -1270,11 +1312,25 @@
kernel : kernel :
func : rsqrt_grad func : rsqrt_grad
- backward_api : scale_double_grad
forward : scale_grad (Tensor grad_out, Scalar scale, float bias, bool bias_after_scale) -> Tensor(grad_x)
args : (Tensor grad_x_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true)
output : Tensor(grad_out_grad)
invoke : scale(grad_x_grad, scale, 0.0, bias_after_scale)
backward : scale_triple_grad
- backward_api : scale_grad - backward_api : scale_grad
forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out) forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out)
args : (Tensor out_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true) args : (Tensor out_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true)
output : Tensor(x_grad) output : Tensor(x_grad)
invoke : scale(out_grad, scale, 0.0, bias_after_scale) invoke : scale(out_grad, scale, 0.0, bias_after_scale)
backward : scale_double_grad
- backward_api : scale_triple_grad
forward : scale_double_grad (Tensor grad_grad_x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(grad_grad_out)
args : (Tensor grad_grad_out_grad, Scalar scale=1.0, float bias=0.0, bool bias_after_scale=true)
output : Tensor(grad_grad_x_grad)
invoke : scale(grad_grad_out_grad, scale, 0.0, bias_after_scale)
- backward_api : scatter_grad - backward_api : scatter_grad
forward : scatter (Tensor x, Tensor index, Tensor updates, bool overwrite) -> Tensor(out) forward : scatter (Tensor x, Tensor index, Tensor updates, bool overwrite) -> Tensor(out)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册