未验证 提交 6891a4fe 编写于 作者: C Chen Weihang 提交者: GitHub

normize yaml backward op label (#46028)

上级 6bd2762c
......@@ -83,10 +83,10 @@ def ReadBwdFile(filepath):
ret = {}
if contents is not None:
for content in contents:
assert 'backward_api' in content.keys(), AssertMessage(
'backward_api', content.keys())
if 'backward_api' in content.keys():
api_name = content['backward_api']
assert 'backward_op' in content.keys(), AssertMessage(
'backward_op', content.keys())
if 'backward_op' in content.keys():
api_name = content['backward_op']
ret[api_name] = content
f.close()
......
......@@ -1485,7 +1485,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
if next_grad_api_contents:
# Fake forward_api_contents and backward_api_contents
forward_api_contents = grad_api_contents
forward_api_contents['op'] = forward_api_contents['backward_api']
forward_api_contents['op'] = forward_api_contents['backward_op']
backward_api_contents = next_grad_api_contents
next_node_generator = DygraphFunctionGeneratorBase(
......@@ -1959,8 +1959,7 @@ class DygraphForwardAndNodesGenerator(GeneratorBase):
forward_api_contents = backward_api_contents
# Fake forward_api_content
forward_api_contents['op'] = forward_api_contents[
'backward_api']
forward_api_contents['op'] = forward_api_contents['backward_op']
backward_api_contents = next_grad_api_contents
if len(namespace) > 0:
......
- backward_api : atan2_grad
- backward_op : atan2_grad
forward : atan2 (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -8,7 +8,7 @@
kernel :
func : atan2_grad
- backward_api : cholesky_grad
- backward_op : cholesky_grad
forward : cholesky (Tensor x, bool upper) -> Tensor(out)
args : (Tensor out, Tensor out_grad, bool upper)
output : Tensor(x_grad)
......@@ -18,7 +18,7 @@
kernel :
func : cholesky_grad
- backward_api : cholesky_solve_grad
- backward_op : cholesky_solve_grad
forward : cholesky_solve (Tensor x, Tensor y, bool upper) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad, bool upper)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -28,7 +28,7 @@
kernel :
func : cholesky_solve_grad
- backward_api : cross_grad
- backward_op : cross_grad
forward : cross (Tensor x, Tensor y, int axis = 9) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad, int axis)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -39,7 +39,7 @@
func : cross_grad
data_type : out_grad
- backward_api : diag_grad
- backward_op : diag_grad
forward : diag (Tensor x, int offset, float padding_value) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int offset)
output : Tensor(x_grad)
......@@ -51,7 +51,7 @@
data_type : out_grad
no_need_buffer : x
- backward_api : diagonal_grad
- backward_op : diagonal_grad
forward : diagonal (Tensor x, int offset, int axis1, int axis2) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int offset = 0, int axis1 = 0, int axis2 = 1)
output : Tensor(x_grad)
......@@ -63,7 +63,7 @@
data_type : out_grad
no_need_buffer : x
- backward_api : digamma_grad
- backward_op : digamma_grad
forward : digamma (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -73,7 +73,7 @@
kernel :
func : digamma_grad
- backward_api : dist_grad
- backward_op : dist_grad
forward : dist (Tensor x, Tensor y, float p) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad, float p)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -83,7 +83,7 @@
kernel :
func : dist_grad
- backward_api : dot_grad
- backward_op : dot_grad
forward : dot (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -94,7 +94,7 @@
func : dot_grad
data_type : out_grad
- backward_api : erf_grad
- backward_op : erf_grad
forward : erf (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -105,7 +105,7 @@
func : erf_grad
data_type : out_grad
- backward_api : erfinv_grad
- backward_op : erfinv_grad
forward : erfinv (Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
......@@ -115,7 +115,7 @@
kernel :
func : erfinv_grad
- backward_api : fft_c2c_grad
- backward_op : fft_c2c_grad
forward: fft_c2c(Tensor x, int64_t[] axes, str normalization, bool forward) -> Tensor(out)
args : (Tensor out_grad, int64_t[] axes, str normalization, bool forward)
output: Tensor(x_grad)
......@@ -125,7 +125,7 @@
kernel :
func : fft_c2c_grad
- backward_api : fft_c2r_grad
- backward_op : fft_c2r_grad
forward: fft_c2r(Tensor x, int64_t[] axes, str normalization, bool forward, int64_t last_dim_size) -> Tensor(out)
args : (Tensor out_grad, int64_t[] axes, str normalization, bool forward, int64_t last_dim_size)
output: Tensor(x_grad)
......@@ -135,7 +135,7 @@
func : fft_c2r_grad
data_type: out_grad
- backward_api : fft_r2c_grad
- backward_op : fft_r2c_grad
forward: fft_r2c(Tensor x, int64_t[] axes, str normalization, bool forward, bool onesided) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int64_t[] axes, str normalization, bool forward, bool onesided)
output: Tensor(x_grad)
......@@ -147,7 +147,7 @@
data_type: out_grad
no_need_buffer: x
- backward_api : graph_send_uv_grad
- backward_op : graph_send_uv_grad
forward : graph_send_uv (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, str message_op = "ADD") -> Tensor(out)
args: (Tensor x, Tensor y, Tensor src_index, Tensor dst_index, Tensor out_grad, str message_op = "ADD")
output : Tensor(x_grad), Tensor(y_grad)
......@@ -158,7 +158,7 @@
func : graph_send_uv_grad
data_type : x
- backward_api : lgamma_grad
- backward_op : lgamma_grad
forward : lgamma(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -168,7 +168,7 @@
kernel :
func : lgamma_grad
- backward_api : mv_grad
- backward_op : mv_grad
forward : mv (Tensor x, Tensor vec) -> Tensor(out)
args : (Tensor x, Tensor vec, Tensor out_grad)
output : Tensor(x_grad), Tensor(vec_grad)
......@@ -178,7 +178,7 @@
kernel :
func : mv_grad
- backward_api : poisson_grad
- backward_op : poisson_grad
forward : poisson (Tensor x) -> Tensor(out)
args : (Tensor out_grad)
output : Tensor(x_grad)
......@@ -188,7 +188,7 @@
kernel :
func : poisson_grad
- backward_api : solve_grad
- backward_op : solve_grad
forward : solve (Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -198,7 +198,7 @@
kernel :
func : solve_grad
- backward_api : trace_grad
- backward_op : trace_grad
forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out)
args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2)
output : Tensor(x_grad)
......@@ -210,7 +210,7 @@
data_type : out_grad
no_need_buffer : x
- backward_api : trunc_grad
- backward_op : trunc_grad
forward : trunc (Tensor x) -> Tensor(out)
args : (Tensor out_grad)
output : Tensor(x_grad)
......
......@@ -28,7 +28,7 @@ class BackwardAPI(BaseAPI):
self.no_need_buffer = self.parse_no_need_buffer(backward_item_yaml)
def get_api_name(self, api_item_yaml):
return api_item_yaml['backward_api']
return api_item_yaml['backward_op']
def parse_forward_config(self, forward_config):
# api_name (const Tensor& input, ... , int attr, ...) -> Tensor(out)
......
......@@ -27,7 +27,7 @@ def main(api_yaml_path, output_path, backward):
apis = []
else:
apis = [
parse_api_entry(api, "backward_api" if backward else "op")
parse_api_entry(api, "backward_op" if backward else "op")
for api in apis
]
......
......@@ -334,7 +334,7 @@ def parse_api_entry(api_entry: Dict[str, Any], name_field="op"):
api["backward"] = backward
# forward for backward_apis
is_backward_api = name_field == "backward_api"
is_backward_api = name_field == "backward_op"
if is_backward_api:
if "forward" in api_entry:
forward = parse_forward(api_name, api_entry["forward"])
......
- backward_api : abs_grad
- backward_op : abs_grad
forward : tanh(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -6,7 +6,7 @@
func : abs_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
abs_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : acos_grad
- backward_op : acos_grad
forward : acos(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -14,7 +14,7 @@
func : acos_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
acos_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : acosh_grad
- backward_op : acosh_grad
forward : acosh(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -22,7 +22,7 @@
func : acosh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
acosh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : add_grad
- backward_op : add_grad
forward : add(Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -30,7 +30,7 @@
func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}
- backward_api : addmm_grad
- backward_op : addmm_grad
forward : addmm(Tensor input, Tensor x, Tensor y, float alpha=1.0, float beta=1.0) -> Tensor(out)
args : (Tensor input, Tensor x, Tensor y, Tensor out_grad, float alpha=1.0, float beta=1.0)
output : Tensor(input_grad), Tensor(x_grad), Tensor(y_grad)
......@@ -40,7 +40,7 @@
addmm_coo_dense_grad {dense, sparse_coo, dense, dense -> dense, sparse_coo, dense},
addmm_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo, sparse_coo}
- backward_api : asin_grad
- backward_op : asin_grad
forward : asin(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -48,7 +48,7 @@
func : asin_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
asin_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : asinh_grad
- backward_op : asinh_grad
forward : asinh(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -56,7 +56,7 @@
func : asinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
asinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : atan_grad
- backward_op : atan_grad
forward : atan(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -64,7 +64,7 @@
func : atan_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
atan_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : atanh_grad
- backward_op : atanh_grad
forward : atanh(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -72,7 +72,7 @@
func : atanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : cast_grad
- backward_op : cast_grad
forward : cast(Tensor x, DataType index_dtype, DataType value_dtype) -> Tensor(out)
args : (Tensor x, Tensor out_grad, DataType value_dtype)
output : Tensor(x_grad)
......@@ -81,14 +81,14 @@
cast_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
data_type : out_grad
- backward_api : conv3d_coo_grad
- backward_op : conv3d_coo_grad
forward : conv3d_coo (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key) -> Tensor(out), Tensor(rulebook), Tensor(counter)
args : (Tensor x, Tensor kernel, Tensor out, Tensor rulebook, Tensor counter, Tensor out_grad, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key)
output : Tensor(x_grad), Tensor(kernel_grad)
kernel :
func : conv3d_coo_grad{sparse_coo, dense, sparse_coo, dense, dense, sparse_coo -> sparse_coo, dense}
- backward_api : divide_grad
- backward_op : divide_grad
forward : divide(Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -96,13 +96,13 @@
func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
divide_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}
- backward_api : divide_scalar_grad
- backward_op : divide_scalar_grad
forward : divide_scalar (Tensor x, float scalar) -> Tensor(out)
args : (Tensor out_grad, float scalar)
output : Tensor(x_grad)
invoke : divide_scalar(out_grad, scalar)
- backward_api : expm1_grad
- backward_op : expm1_grad
forward : expm1(Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
......@@ -110,7 +110,7 @@
func : expm1_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
expm1_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : leaky_relu_grad
- backward_op : leaky_relu_grad
forward : leaky_relu(Tensor x, float alpha) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float alpha)
output : Tensor(x_grad)
......@@ -118,7 +118,7 @@
func : leaky_relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
leaky_relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : log1p_grad
- backward_op : log1p_grad
forward : log1p(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -126,14 +126,14 @@
func : log1p_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
log1p_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : masked_matmul_grad
- backward_op : masked_matmul_grad
forward : masked_matmul(Tensor x, Tensor y, Tensor mask) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
kernel :
func : masked_matmul_csr_grad{dense, dense, sparse_csr -> dense, dense}
- backward_api : matmul_grad
- backward_op : matmul_grad
forward : matmul(Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -143,14 +143,14 @@
matmul_coo_dense_grad {sparse_coo, dense, dense -> sparse_coo, dense},
matmul_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}
- backward_api : maxpool_grad
- backward_op : maxpool_grad
forward : maxpool(Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out), Tensor(rulebook), Tensor(counter)
args : (Tensor x, Tensor rulebook, Tensor counter, Tensor out, Tensor out_grad, int[] kernel_sizes)
output : Tensor(x_grad)
kernel :
func : maxpool_coo_grad {sparse_coo, dense, dense, sparse_coo, sparse_coo -> sparse_coo}
- backward_api : multiply_grad
- backward_op : multiply_grad
forward : multiply(Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -158,7 +158,7 @@
func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
multiply_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}
- backward_api : mv_grad
- backward_op : mv_grad
forward : mv(Tensor x, Tensor vec) -> Tensor(out)
args : (Tensor x, Tensor vec, Tensor out_grad)
output : Tensor(x_grad), Tensor(vec_grad)
......@@ -166,7 +166,7 @@
func : mv_coo_grad{sparse_coo, dense, dense -> sparse_coo, dense},
mv_csr_grad{sparse_csr, dense, dense -> sparse_csr, dense}
- backward_api : pow_grad
- backward_op : pow_grad
forward : pow(Tensor x, float factor) -> Tensor(out)
args : (Tensor x, Tensor out_grad, float factor)
output : Tensor(x_grad)
......@@ -174,7 +174,7 @@
func : pow_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
pow_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : relu6_grad
- backward_op : relu6_grad
forward : relu6(Tensor x, float threshold) -> Tensor(out)
args : (Tensor out, Tensor out_grad, float threshold)
output : Tensor(x_grad)
......@@ -182,7 +182,7 @@
func : relu6_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
relu6_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : relu_grad
- backward_op : relu_grad
forward : relu(Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
......@@ -190,13 +190,13 @@
func : relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : scale_grad
- backward_op : scale_grad
forward : scale(Tensor x, float scale, float bias, bool bias_after_scale) -> Tensor(out)
args : (Tensor out_grad, float scale)
output : Tensor(x_grad)
invoke : scale(out_grad, scale, 0.0, true)
- backward_api : sin_grad
- backward_op : sin_grad
forward : sin(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -204,7 +204,7 @@
func : sin_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
sin_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : sinh_grad
- backward_op : sinh_grad
forward : sinh(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -212,21 +212,21 @@
func : sinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
sinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : softmax_grad
- backward_op : softmax_grad
forward : softmax(Tensor x, int axis=-1) -> Tensor(out)
args : (Tensor out, Tensor out_grad, int axis)
output : Tensor(x_grad)
kernel :
func : softmax_csr_grad{sparse_csr, sparse_csr -> sparse_csr}
- backward_api : sparse_coo_tensor_grad
- backward_op : sparse_coo_tensor_grad
forward : sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out)
args : (Tensor indices, Tensor out_grad)
output : Tensor(values_grad)
kernel :
func : sparse_coo_tensor_grad{dense, sparse_coo -> dense}
- backward_api : sqrt_grad
- backward_op : sqrt_grad
forward : sqrt(Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
......@@ -234,7 +234,7 @@
func : sqrt_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
sqrt_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : square_grad
- backward_op : square_grad
forward : square(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -242,7 +242,7 @@
func : square_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
square_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : subtract_grad
- backward_op : subtract_grad
forward : subtract(Tensor x, Tensor y) -> Tensor(out)
args : (Tensor x, Tensor y, Tensor out_grad)
output : Tensor(x_grad), Tensor(y_grad)
......@@ -250,7 +250,7 @@
func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo},
subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}
- backward_api : tan_grad
- backward_op : tan_grad
forward : tan(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
......@@ -258,7 +258,7 @@
func : tan_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
tan_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : tanh_grad
- backward_op : tanh_grad
forward : tanh(Tensor x) -> Tensor(out)
args : (Tensor out, Tensor out_grad)
output : Tensor(x_grad)
......@@ -266,28 +266,28 @@
func : tanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo},
tanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr}
- backward_api : to_dense_grad
- backward_op : to_dense_grad
forward : to_dense(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
kernel :
func : coo_to_dense_grad{sparse_coo, dense -> sparse_coo}
- backward_api : to_sparse_coo_grad
- backward_op : to_sparse_coo_grad
forward : to_sparse_coo(Tensor x, int64_t sparse_dim) -> Tensor(out)
args : (Tensor out_grad)
output : Tensor(x_grad)
kernel :
func : coo_to_dense { sparse_coo -> dense }
- backward_api : values_grad
- backward_op : values_grad
forward : values_coo(Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
output : Tensor(x_grad)
kernel :
func : values_coo_grad{sparse_coo, dense-> sparse_coo}
- backward_api: fused_attention_grad
- backward_op: fused_attention_grad
forward : fused_attention_csr(Tensor query, Tensor key, Tensor value, Tensor sparse_mask, Tensor key_padding_mask, Tensor attn_mask) -> Tensor(out), Tensor(softmax)
args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad)
output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册