未验证 提交 24ec6ed0 编写于 作者: Y YuanRisheng 提交者: GitHub

Add some double/triple grad kernel yaml file (#42361)

* add double yaml

* add inline func
上级 2bee99df
......@@ -22,17 +22,12 @@ import os
### Global Variables ###
########################
ops_to_fill_zero_for_empty_grads = set([
"split_grad",
"rnn_grad",
"matmul_double_grad",
"matmul_triple_grad",
"sigmoid_double_grad",
"sigmoid_triple_grad",
"add_double_grad",
"add_triple_grad",
"multiply_double_grad",
"multiply_triple_grad",
"conv2d_grad_grad",
"split_grad", "rnn_grad", "matmul_double_grad", "matmul_triple_grad",
"sigmoid_double_grad", "sigmoid_triple_grad", "add_double_grad",
"add_triple_grad", "multiply_double_grad", "multiply_triple_grad",
"conv2d_grad_grad", "batch_norm_double_grad", "tanh_double_grad",
"tanh_triple_grad", "subtract_double_grad", "divide_double_grad",
"log_double_grad", "elu_double_grad"
])
# For API dispatch used at python-level
......
......@@ -96,8 +96,7 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
// TODO(chenweihang): deal with multiple diff input Tensors
// TODO(chenweihang): add global device guard method to set backend
void operator()(const Tensor& x) {
const phi::TensorBase& tensor = *x.impl();
inline void AssignKernelKeySet(const phi::TensorBase& tensor) {
key_set.backend_set =
key_set.backend_set | detail::GetTensorBackendSet(tensor);
// TODO(chenweihang): select multi layout and dtype
......@@ -110,6 +109,8 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
}
}
void operator()(const Tensor& x) { AssignKernelKeySet(*x.impl()); }
void operator()(const std::vector<Tensor>& x) {
const phi::TensorBase& tensor = *x.at(0).impl();
key_set.backend_set =
......@@ -119,6 +120,13 @@ struct KernelKeyParser : ArgsIterator<KernelKeyParser> {
key_set.dtype = tensor.dtype();
}
void operator()(const paddle::optional<const Tensor&> x) {
if (x.get_ptr() != nullptr) {
const phi::TensorBase& tensor = *(x.get_ptr()->impl());
AssignKernelKeySet(tensor);
}
}
// skip other type args, these args don't used in kernel selection
template <typename T>
void operator()(const T& x) {
......
......@@ -82,18 +82,18 @@ void ReluDoubleGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void TanhDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
const DenseTensor& dout,
const DenseTensor& ddx,
DenseTensor* dout_new,
DenseTensor* ddout);
template <typename T, typename Context>
void TanhTripleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
const DenseTensor& dout,
const DenseTensor& d_ddout,
const DenseTensor& ddx,
const DenseTensor& d_dout_new,
const DenseTensor& d_ddout,
DenseTensor* d_out_new,
DenseTensor* d_dout,
DenseTensor* d_ddx);
......
......@@ -66,16 +66,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void BatchNormDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& x_grad_grad,
const DenseTensor& scale_grad_grad,
const DenseTensor& bias_grad_grad,
const DenseTensor& y_grad,
const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& saved_mean,
const DenseTensor& saved_variance,
paddle::optional<const DenseTensor&> mean,
paddle::optional<const DenseTensor&> variance,
const DenseTensor& saved_mean,
const DenseTensor& saved_variance,
const DenseTensor& y_grad,
const DenseTensor& x_grad_grad,
const DenseTensor& scale_grad_grad,
const DenseTensor& bias_grad_grad,
float momentum,
float epsilon,
const std::string& data_layout,
......
......@@ -341,16 +341,16 @@ void BatchNormGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void BatchNormDoubleGradKernel(const Context& ctx,
const DenseTensor& x_grad_grad,
const DenseTensor& scale_grad_grad,
const DenseTensor& bias_grad_grad,
const DenseTensor& y_grad,
const DenseTensor& x,
const DenseTensor& scale,
const DenseTensor& saved_mean,
const DenseTensor& saved_variance,
paddle::optional<const DenseTensor&> mean,
paddle::optional<const DenseTensor&> variance,
const DenseTensor& saved_mean,
const DenseTensor& saved_variance,
const DenseTensor& y_grad,
const DenseTensor& x_grad_grad,
const DenseTensor& scale_grad_grad,
const DenseTensor& bias_grad_grad,
float momentum,
float epsilon,
const std::string& data_layout_str,
......
......@@ -38,9 +38,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout) {
phi::SubtractDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
......
......@@ -30,9 +30,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout);
......
......@@ -908,16 +908,16 @@ void BatchNormGradKernel(const Context &dev_ctx,
template <typename T, typename Context>
void BatchNormDoubleGradKernel(const Context &ctx,
const DenseTensor &x_grad_grad,
const DenseTensor &scale_grad_grad,
const DenseTensor &bias_grad_grad,
const DenseTensor &y_grad,
const DenseTensor &x,
const DenseTensor &scale,
const DenseTensor &saved_mean,
const DenseTensor &saved_variance,
paddle::optional<const DenseTensor &> mean,
paddle::optional<const DenseTensor &> variance,
const DenseTensor &saved_mean,
const DenseTensor &saved_variance,
const DenseTensor &y_grad,
const DenseTensor &x_grad_grad,
const DenseTensor &scale_grad_grad,
const DenseTensor &bias_grad_grad,
float momentum,
float epsilon,
const std::string &data_layout_str,
......
......@@ -46,9 +46,9 @@ void SubtractGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void SubtractDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& y,
const DenseTensor& dout,
paddle::optional<const DenseTensor&> ddx,
paddle::optional<const DenseTensor&> ddy,
const DenseTensor& dout,
int axis,
DenseTensor* ddout) {
phi::SubtractDoubleGradImpl<T>(dev_ctx, y, ddx, ddy, dout, axis, ddout);
......
......@@ -152,8 +152,8 @@ void LeakyReluDoubleGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void TanhDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
const DenseTensor& dout,
const DenseTensor& ddx,
DenseTensor* dout_new,
DenseTensor* ddout) {
if (dout_new) {
......@@ -171,10 +171,10 @@ void TanhDoubleGradKernel(const Context& dev_ctx,
template <typename T, typename Context>
void TanhTripleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
const DenseTensor& dout,
const DenseTensor& d_ddout,
const DenseTensor& ddx,
const DenseTensor& d_dout_new,
const DenseTensor& d_ddout,
DenseTensor* d_out_new,
DenseTensor* d_dout,
DenseTensor* d_ddx) {
......
......@@ -121,13 +121,13 @@ KernelSignature ReluDoubleGradOpArgumentMapping(
KernelSignature TanhDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"tanh_double_grad", {"Out", "DDX", "DOut"}, {}, {"DOutNew", "DDOut"});
"tanh_double_grad", {"Out", "DOut", "DDX"}, {}, {"DOutNew", "DDOut"});
}
KernelSignature TanhTripleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("tanh_triple_grad",
{"Out", "DDX", "DOut", "D_DDOut", "D_DOut_New"},
{"Out", "DOut", "DDX", "D_DOut_New", "D_DDOut"},
{},
{"D_OutNew", "D_DOut", "D_DDx"});
}
......
......@@ -82,16 +82,16 @@ KernelSignature BatchNormGradOpArgumentMapping(
KernelSignature BatchNormGradGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("batch_norm_grad_grad",
{"DDX",
"DDScale",
"DDBias",
"DY",
"X",
{"X",
"Scale",
"Mean",
"Variance",
"SavedMean",
"SavedVariance",
"Mean",
"Variance"},
"DY",
"DDX",
"DDScale",
"DDBias"},
{"momentum",
"epsilon",
"data_layout",
......
......@@ -133,7 +133,7 @@ KernelSignature ElementwiseSubGradOpArgumentMapping(
KernelSignature ElementwiseSubDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature(
"subtract_double_grad", {"Y", "DDX", "DDY", "DOut"}, {"axis"}, {"DDOut"});
"subtract_double_grad", {"Y", "DOut", "DDX", "DDY"}, {"axis"}, {"DDOut"});
}
KernelSignature ElementwiseDivGradOpArgumentMapping(
......
......@@ -560,7 +560,10 @@ def get_static_double_grad(x,
# so, they are also the input of second-order backward.
x += y_grads
x_init += dy_init
y = dx
# filter None in dx for DX/DY may be None in kernel
filted_dx = [dxi for dxi in dx if dxi is not None]
y = filted_dx
# check input arguments
x = _as_list(x)
......@@ -619,6 +622,7 @@ def get_static_double_grad(x,
def get_eager_double_grad(func,
x_init=None,
dy_init=None,
place=None,
return_mid_result=False):
"""
Get Double Grad result of dygraph.
......@@ -627,6 +631,7 @@ def get_eager_double_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (bool): A flag that controls the return content.
Returns:
If 'return_mid_result' set True.
......@@ -635,6 +640,10 @@ def get_eager_double_grad(func,
If 'return_mid_result' set False.
A list of numpy array that stores second derivative result calulated by dygraph.
"""
if isinstance(place, fluid.CPUPlace):
paddle.set_device("cpu")
if isinstance(place, fluid.CUDAPlace):
paddle.set_device("gpu")
inputs = []
dys = []
for x in x_init:
......@@ -648,7 +657,12 @@ def get_eager_double_grad(func,
# calculate first derivative
outputs = func(inputs)
d_inputs = paddle.grad(
outputs=outputs, inputs=inputs, grad_outputs=dys, create_graph=True)
outputs=outputs,
inputs=inputs,
grad_outputs=dys,
create_graph=True,
allow_unused=True)
d_inputs = [d_input for d_input in d_inputs if d_input is not None]
# calcluate second derivative
inputs = inputs + dys
......@@ -663,15 +677,20 @@ def get_eager_double_grad(func,
ddy = paddle.ones(shape=d_input.shape, dtype=d_input.dtype)
ddy.stop_gradient = False
ddys.append(ddy)
dd_inputs = paddle.grad(
outputs=d_inputs,
inputs=inputs,
grad_outputs=ddys,
create_graph=create_graph)
create_graph=create_graph,
allow_unused=True)
if return_mid_result:
return dd_inputs, inputs + ddys
else:
return [dd_input.numpy() for dd_input in dd_inputs]
return [
dd_input.numpy() for dd_input in dd_inputs if dd_input is not None
]
def double_grad_check_for_dygraph(func,
......@@ -693,7 +712,6 @@ def double_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
......@@ -722,19 +740,25 @@ def double_grad_check_for_dygraph(func,
paddle.disable_static()
with _test_eager_guard():
eager_double_grad = get_eager_double_grad(func, x_init, y_grads_init)
eager_double_grad = get_eager_double_grad(func, x_init, y_grads_init,
place)
paddle.enable_static()
static_double_grad = get_static_double_grad(x, y, x_init, y_grads_init,
place)
if len(static_double_grad) != len(eager_double_grad):
msg = "The output grad tensor's number of static graph is different with dygraph, " \
"please check the python api unit test used."
raise RuntimeError(msg)
for i in six.moves.xrange(len(static_double_grad)):
if not np.allclose(static_double_grad[i], eager_double_grad[i], rtol,
atol):
msg = 'Check eager double result fail. Mismatch between static_graph double grad %s ' \
'and eager double grad %s on %s,\n' \
msg = 'Check eager double result fail. Mismatch between static_graph double grad ' \
'and eager double grad on %s, the output double grad tensor\'s index is : %d \n' \
'static:%s\n eager:%s\n' \
% (static_double_grad[i].name, eager_double_grad[i].name, str(place), static_double_grad[i], eager_double_grad[i])
% (str(place), i, static_double_grad[i], eager_double_grad[i])
return fail_test(msg)
......@@ -794,6 +818,7 @@ def get_static_triple_grad(x,
def get_eager_triple_grad(func,
x_init=None,
dy_init=None,
place=None,
return_mid_result=False):
"""
Get triple Grad result of dygraph.
......@@ -802,12 +827,13 @@ def get_eager_triple_grad(func,
func: A wrapped dygraph function that its logic is equal to static program
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
dy_init (numpy.array|list[numpy.array]|None): the init value for gradient of output.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (list[Tensor], list[Tensor]): If set True, the
Returns:
A list of numpy array that stores second derivative result calulated by dygraph
"""
dd_y, dd_x = get_eager_double_grad(
func, x_init, dy_init, return_mid_result=True)
func, x_init, dy_init, place, return_mid_result=True)
# calcluate third derivative
dddys = []
......@@ -839,7 +865,6 @@ def triple_grad_check_for_dygraph(func,
y (Variable|list[Variable]): output variables to the program.
x_init (numpy.array|list[numpy.array]|None): the init value for input x.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
eps (float): perturbation for finite differences.
atol (float): absolute tolerance.
rtol (float): relative tolerance.
raise_exception (bool): whether to raise an exception if
......@@ -868,17 +893,23 @@ def triple_grad_check_for_dygraph(func,
paddle.disable_static()
with _test_eager_guard():
eager_triple_grad = get_eager_triple_grad(func, x_init, y_grads_init)
eager_triple_grad = get_eager_triple_grad(func, x_init, y_grads_init,
place)
paddle.enable_static()
static_triple_grad = get_static_triple_grad(x, y, x_init, y_grads_init,
place)
if len(static_triple_grad) != len(eager_triple_grad):
msg = "The output grad tensor's number of static graph is different with dygraph, " \
"please check the python api unit test used."
raise RuntimeError(msg)
for i in six.moves.xrange(len(static_triple_grad)):
if not np.allclose(static_triple_grad[i], eager_triple_grad[i], rtol,
atol):
msg = 'Check eager double result fail. Mismatch between static_graph double grad %s ' \
'and eager double grad %s on %s,\n' \
msg = 'Check eager double result fail. Mismatch between static_graph double grad ' \
'and eager double grad on %s, the output double grad tensor\'s index is : %d \n' \
'static:%s\n eager:%s\n' \
% (static_triple_grad[i].name, eager_triple_grad[i].name, str(place), static_triple_grad[i], eager_triple_grad[i])
% (str(place), i, static_triple_grad[i], eager_triple_grad[i])
return fail_test(msg)
......@@ -52,6 +52,9 @@ class TestSigmoidTripleGradCheck(unittest.TestCase):
class TestSigmoidDoubleGradCheck(unittest.TestCase):
def sigmoid_wrapper(self, x):
return fluid.layers.sigmoid(x[0])
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
......@@ -64,6 +67,8 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.sigmoid_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......@@ -75,6 +80,9 @@ class TestSigmoidDoubleGradCheck(unittest.TestCase):
class TestTanhTripleGradCheck(unittest.TestCase):
def tanh_wrapper(self, x):
return paddle.tanh(x[0])
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
......@@ -87,6 +95,8 @@ class TestTanhTripleGradCheck(unittest.TestCase):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.triple_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.triple_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......@@ -98,6 +108,9 @@ class TestTanhTripleGradCheck(unittest.TestCase):
class TestTanhDoubleGradCheck(unittest.TestCase):
def tanh_wrapper(self, x):
return paddle.tanh(x[0])
@prog_scope()
def func(self, place):
shape = [2, 3, 7, 9]
......@@ -110,6 +123,8 @@ class TestTanhDoubleGradCheck(unittest.TestCase):
x_arr[np.abs(x_arr) < 0.005] = 0.002
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.tanh_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......@@ -173,6 +188,9 @@ class TestLeakyReluDoubleGradCheck(unittest.TestCase):
class TestELUDoubleGradCheck(unittest.TestCase):
def elu_wrapper(self, x):
return paddle.nn.functional.elu(x[0], alpha=0.2)
@prog_scope()
def func(self, place):
shape = [2, 4, 4, 4]
......@@ -189,6 +207,8 @@ class TestELUDoubleGradCheck(unittest.TestCase):
x_arr = np.random.uniform(-1, 1, shape).astype(dtype)
gradient_checker.double_grad_check(
[x], y, x_init=x_arr, place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.elu_wrapper, [x], y, x_init=x_arr, place=place)
def test_grad(self):
paddle.enable_static()
......
......@@ -139,6 +139,9 @@ class TestElementwiseAddBroadcastDoubleGradCheck(unittest.TestCase):
class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
def subtract_wrapper(self, x):
return paddle.subtract(x[0], x[1])
@prog_scope()
def func(self, place):
# the shape of input variable should be clearly specified, not inlcude -1.
......@@ -156,6 +159,11 @@ class TestElementwiseSubDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps)
gradient_checker.double_grad_check_for_dygraph(
self.subtract_wrapper, [x, y],
out,
x_init=[x_arr, y_arr],
place=place)
def test_grad(self):
paddle.enable_static()
......@@ -195,6 +203,9 @@ class TestElementwiseSubBroadcastDoubleGradCheck(unittest.TestCase):
class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
def divide_wrapper(self, x):
return paddle.divide(x[0], x[1])
@prog_scope()
def func(self, place):
# the shape of input variable should be clearly specified, not inlcude -1.
......@@ -213,6 +224,12 @@ class TestElementwiseDivDoubleGradCheck(unittest.TestCase):
gradient_checker.double_grad_check(
[x, y], out, x_init=[x_arr, y_arr], place=place, eps=eps, atol=1e-3)
gradient_checker.double_grad_check_for_dygraph(
self.divide_wrapper, [x, y],
out,
x_init=[x_arr, y_arr],
place=place,
atol=1e-3)
def test_grad(self):
paddle.enable_static()
......
......@@ -112,7 +112,10 @@ def elu(x, alpha=1.0, name=None):
# [ 1. 15.6 ]]
"""
if in_dynamic_mode():
if in_dygraph_mode():
return _C_ops.final_state_elu(x, alpha)
if _in_legacy_dygraph():
return _C_ops.elu(x, 'alpha', alpha)
check_variable_and_dtype(x, 'x', ['float16', 'float32', 'float64'], 'elu')
......
......@@ -466,6 +466,7 @@
func : DeformableConvInferMeta
kernel :
func : deformable_conv
data_type : x
optional : mask
backward : deformable_conv_grad
......@@ -546,6 +547,7 @@
func : DropoutInferMeta
kernel :
func : dropout
data_type : x
optional : seed_tensor
backward : dropout_grad
......@@ -1065,6 +1067,7 @@
func : LayerNormInferMeta
kernel :
func : layer_norm
data_type : x
backward : layer_norm_grad
optional : scale, bias
......@@ -1608,6 +1611,7 @@
func : PsroiPoolInferMeta
kernel :
func : psroi_pool
data_type : x
optional : boxes_num
backward : psroi_pool_grad
......@@ -1713,6 +1717,7 @@
func : RoiAlignInferMeta
kernel :
func : roi_align
data_type : x
optional : boxes_num
backward : roi_align_grad
......@@ -1723,6 +1728,7 @@
func : RoiPoolInferMeta
kernel :
func : roi_pool
data_type : x
optional : boxes_num
intermediate : arg_max
backward : roi_pool_grad
......
......@@ -152,6 +152,18 @@
kernel :
func : atanh_grad
- backward_api : batch_norm_double_grad
forward : batch_norm_grad (Tensor x, Tensor scale, Tensor bias, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor grad_out, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(grad_x), Tensor(grad_scale), Tensor(grad_bias)
args : (Tensor x, Tensor scale, Tensor out_mean, Tensor out_variance, Tensor saved_mean, Tensor saved_variance, Tensor grad_out, Tensor grad_x_grad, Tensor grad_scale_grad, Tensor grad_bias_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
output : Tensor(x_grad), Tensor(scale_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [x, scale, x]
kernel :
func : batch_norm_grad_grad
data_type : x
optional : out_mean, out_variance
- backward_api : batch_norm_grad
forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space)
args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu)
......@@ -163,6 +175,7 @@
func : batch_norm_grad
data_type : out_grad
optional : mean_out, variance_out, reserve_space
backward : batch_norm_double_grad
- backward_api : bce_loss_grad
forward : bce_loss (Tensor input, Tensor label) -> Tensor(out)
......@@ -362,6 +375,7 @@
func : DeformableConvGradInferMeta
kernel :
func : deformable_conv_grad
data_type : x
optional : mask
- backward_api : depthwise_conv2d_transpose_grad
......@@ -414,6 +428,18 @@
kernel :
func : dist_grad
- backward_api : divide_double_grad
forward : divide_grad (Tensor x, Tensor y, Tensor out, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args : (Tensor y, Tensor out, Tensor grad_x, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output : Tensor(y_grad), Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [y, grad_x, grad_x]
kernel :
func : divide_double_grad
data_type : out
optional : grad_x_grad, grad_y_grad
- backward_api : divide_grad
forward : divide (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad, int axis = -1)
......@@ -423,6 +449,7 @@
param : [x, y]
kernel :
func : divide_grad
backward : divide_double_grad
- backward_api : dropout_grad
forward : dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask)
......@@ -455,6 +482,16 @@
kernel :
func : elementwise_pow_grad
- backward_api : elu_double_grad
forward : elu_grad (Tensor x, Tensor out, Tensor grad_out, float alpha)-> Tensor(grad_x)
args : (Tensor x, Tensor grad_out, Tensor grad_x_grad, float alpha)
output : Tensor(x_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [x, x]
kernel :
func : elu_double_grad
- backward_api : elu_grad
forward : elu (Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out, Tensor out_grad, float alpha)
......@@ -464,6 +501,7 @@
param : [x]
kernel :
func : elu_grad
backward : elu_double_grad
- backward_api : erf_grad
forward : erf (Tensor x) -> Tensor(out)
......@@ -633,6 +671,7 @@
param : [x]
kernel :
func : graph_send_recv_grad
data_type : out_grad
optional: out, dst_count
- backward_api : gumbel_softmax_grad
......@@ -1287,6 +1326,7 @@
param : [x]
kernel :
func : psroi_pool_grad
data_type : x
optional : boxes_num
# output is optional
......@@ -1381,6 +1421,7 @@
param : [x]
kernel :
func : roi_align_grad
data_type : boxes
optional : boxes_num
- backward_api : roi_pool_grad
......@@ -1392,6 +1433,7 @@
param : [x]
kernel :
func : roi_pool_grad
data_type : x
optional : boxes_num
- backward_api : roll_grad
......@@ -1498,7 +1540,7 @@
func : UnchangedInferMeta
param : [x]
kernel :
func : sigmoid_cross_entropy_with_logits_grad
func : sigmoid_cross_entropy_with_logits_grad
- backward_api : sigmoid_double_grad
forward : sigmoid_grad (Tensor out, Tensor fwd_grad_out) -> Tensor(grad_x)
......@@ -1654,6 +1696,18 @@
func : strided_slice_grad
no_need_buffer : x
- backward_api : subtract_double_grad
forward : subtract_grad (Tensor x, Tensor y, Tensor grad_out, int axis = -1) -> Tensor(grad_x), Tensor(grad_y)
args : (Tensor y, Tensor grad_out, Tensor grad_x_grad, Tensor grad_y_grad, int axis = -1)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_out]
kernel :
func : subtract_double_grad
optional : grad_x_grad, grad_y_grad
no_need_buffer : y, grad_out
- backward_api : subtract_grad
forward : subtract (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad, int axis = -1)
......@@ -1664,6 +1718,7 @@
kernel :
func : subtract_grad
no_need_buffer : x, y
backward : subtract_double_grad
- backward_api : sum_double_grad
forward : sum_grad (Tensor x, Tensor grad_out, int64_t[] dims, bool keep_dim, bool reduce_all=false) -> Tensor(grad_x)
......@@ -1720,6 +1775,17 @@
kernel :
func : tan_grad
- backward_api : tanh_double_grad
forward : tanh_grad (Tensor out, Tensor grad_out) -> Tensor(grad_x)
args : (Tensor out, Tensor grad_out, Tensor grad_x_grad)
output : Tensor(out_grad), Tensor(grad_out_grad)
infer_meta :
func : GeneralBinaryGradInferMeta
param : [out, out]
kernel :
func : tanh_double_grad
backward : tanh_triple_grad
- backward_api : tanh_grad
forward : tanh (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
......@@ -1729,6 +1795,7 @@
param : [out]
kernel :
func : tanh_grad
backward : tanh_double_grad
- backward_api : tanh_shrink_grad
forward : tanh_shrink (Tensor x) -> Tensor(out)
......@@ -1740,6 +1807,16 @@
kernel :
func : tanh_shrink_grad
- backward_api : tanh_triple_grad
forward : tanh_double_grad (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward) -> Tensor(grad_out_new), Tensor(grad_out_grad)
args : (Tensor out, Tensor grad_out_forward, Tensor grad_x_grad_forward, Tensor grad_out_new_grad, Tensor grad_out_grad_grad)
output : Tensor(out_grad), Tensor(grad_out_forward_grad), Tensor(grad_x_grad_forward_grad)
infer_meta :
func : GeneralTernaryGradInferMeta
param : [out, out, grad_x_grad_forward]
kernel :
func : tanh_triple_grad
- backward_api : thresholded_relu_grad
forward : thresholded_relu (Tensor x, float threshold) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float threshold)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册