- backward_op : abs_grad forward : tanh(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : abs_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, abs_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : acos_grad forward : acos(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : acos_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, acos_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : acosh_grad forward : acosh(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : acosh_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, acosh_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : add_grad forward : add(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}, add_coo_dense_grad{sparse_coo, dense, sparse_coo -> sparse_coo, dense} - backward_op : addmm_grad forward : addmm(Tensor input, Tensor x, Tensor y, float alpha=1.0, float beta=1.0) -> Tensor(out) args : (Tensor input, Tensor x, Tensor y, Tensor out_grad, float alpha=1.0, float beta=1.0) output : Tensor(input_grad), Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralTernaryGradInferMeta param : [input, x, y] kernel : func : addmm_csr_dense_grad {dense, sparse_csr, dense, dense -> dense, sparse_csr, dense}, addmm_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr, sparse_csr}, addmm_coo_dense_grad {dense, sparse_coo, dense, dense -> dense, sparse_coo, dense}, addmm_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo, sparse_coo} - backward_op : asin_grad forward : asin(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : asin_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, asin_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : asinh_grad forward : asinh(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : asinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, asinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : atan_grad forward : atan(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : atan_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, atan_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : atanh_grad forward : atanh(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : atanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, atanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : batch_norm_grad forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) args : (Tensor x, Tensor scale, Tensor bias, Tensor mean_out, Tensor variance_out, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) infer_meta : func : GeneralTernaryGradInferMeta param : [x, scale, bias] kernel : func : batch_norm_coo_grad {sparse_coo, dense, dense, dense, dense, dense, dense, dense, sparse_coo -> sparse_coo, dense, dense} data_type : out_grad optional : mean_out, variance_out, reserve_space - backward_op : cast_grad forward : cast(Tensor x, DataType index_dtype, DataType value_dtype) -> Tensor(out) args : (Tensor x, Tensor out_grad, DataType value_dtype) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param: [x] kernel : func : cast_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, cast_csr_grad {sparse_csr, sparse_csr -> sparse_csr} data_type : out_grad - backward_op : conv3d_grad forward : conv3d (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key) -> Tensor(out), Tensor(rulebook), Tensor(counter) args : (Tensor x, Tensor kernel, Tensor out, Tensor rulebook, Tensor counter, Tensor out_grad, int[] paddings, int[] dilations, int[] strides, int groups, bool subm, str key) output : Tensor(x_grad), Tensor(kernel_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, kernel] kernel : func : conv3d_coo_grad{sparse_coo, dense, sparse_coo, dense, dense, sparse_coo -> sparse_coo, dense} - backward_op : divide_grad forward : divide(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, divide_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_op : divide_scalar_grad forward : divide_scalar (Tensor x, float scalar) -> Tensor(out) args : (Tensor out_grad, float scalar) output : Tensor(x_grad) invoke : divide_scalar(out_grad, scalar) - backward_op : expm1_grad forward : expm1(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : expm1_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, expm1_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : leaky_relu_grad forward : leaky_relu(Tensor x, float alpha) -> Tensor(out) args : (Tensor x, Tensor out_grad, float alpha) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : leaky_relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, leaky_relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : log1p_grad forward : log1p(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : log1p_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, log1p_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : masked_matmul_grad forward : masked_matmul(Tensor x, Tensor y, Tensor mask) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : masked_matmul_csr_grad{dense, dense, sparse_csr -> dense, dense} - backward_op : matmul_grad forward : matmul(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : matmul_csr_dense_grad {sparse_csr, dense, dense -> sparse_csr, dense}, matmul_csr_csr_grad {sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr}, matmul_coo_dense_grad {sparse_coo, dense, dense -> sparse_coo, dense}, matmul_coo_coo_grad {sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo} - backward_op : maxpool_grad forward : maxpool(Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out), Tensor(rulebook), Tensor(counter) args : (Tensor x, Tensor rulebook, Tensor counter, Tensor out, Tensor out_grad, int[] kernel_sizes) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param: [x] kernel : func : maxpool_coo_grad {sparse_coo, dense, dense, sparse_coo, sparse_coo -> sparse_coo} - backward_op : multiply_grad forward : multiply(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, multiply_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_op : mv_grad forward : mv(Tensor x, Tensor vec) -> Tensor(out) args : (Tensor x, Tensor vec, Tensor out_grad) output : Tensor(x_grad), Tensor(vec_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, vec] kernel : func : mv_coo_grad{sparse_coo, dense, dense -> sparse_coo, dense}, mv_csr_grad{sparse_csr, dense, dense -> sparse_csr, dense} - backward_op : pow_grad forward : pow(Tensor x, float factor) -> Tensor(out) args : (Tensor x, Tensor out_grad, float factor) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : pow_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, pow_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : relu6_grad forward : relu6(Tensor x, float threshold) -> Tensor(out) args : (Tensor out, Tensor out_grad, float threshold) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : relu6_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, relu6_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : relu_grad forward : relu(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : relu_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, relu_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : reshape_grad forward : reshape(Tensor x, IntArray shape) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : reshape_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, reshape_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : scale_grad forward : scale(Tensor x, float scale, float bias, bool bias_after_scale) -> Tensor(out) args : (Tensor out_grad, float scale) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out_grad] invoke : scale(out_grad, scale, 0.0, true) - backward_op : sin_grad forward : sin(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : sin_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, sin_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : sinh_grad forward : sinh(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : sinh_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, sinh_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : softmax_grad forward : softmax(Tensor x, int axis=-1) -> Tensor(out) args : (Tensor out, Tensor out_grad, int axis) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : softmax_csr_grad{sparse_csr, sparse_csr -> sparse_csr} - backward_op : sparse_coo_tensor_grad forward : sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out) args : (Tensor indices, Tensor out_grad) output : Tensor(values_grad) infer_meta : func : UnchangedInferMeta param: [out_grad] kernel : func : sparse_coo_tensor_grad{dense, sparse_coo -> dense} - backward_op : sqrt_grad forward : sqrt(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : sqrt_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, sqrt_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : square_grad forward : square(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : square_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, square_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : subtract_grad forward : subtract(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_op : sync_batch_norm_grad forward : sync_batch_norm(Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) args : (Tensor x, Tensor scale, Tensor bias, Tensor saved_mean, Tensor saved_variance, Tensor reserve_space, Tensor out_grad, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) infer_meta : func : GeneralTernaryGradInferMeta param : [x, scale, bias] kernel : func : sync_batch_norm_coo_grad{sparse_coo, dense, dense, dense, dense, dense, sparse_coo -> sparse_coo, dense, dense} data_type : out_grad optional : reserve_space - backward_op : tan_grad forward : tan(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : tan_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, tan_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : tanh_grad forward : tanh(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : tanh_coo_grad {sparse_coo, sparse_coo -> sparse_coo}, tanh_csr_grad {sparse_csr, sparse_csr -> sparse_csr} - backward_op : to_dense_grad forward : to_dense(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : coo_to_dense_grad{sparse_coo, dense -> sparse_coo} - backward_op : to_sparse_coo_grad forward : to_sparse_coo(Tensor x, int64_t sparse_dim) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta kernel : func : coo_to_dense { sparse_coo -> dense } - backward_op : transpose_grad forward : transpose(Tensor x, int[] perm) -> Tensor(out) args : (Tensor out_grad, int[] perm) output : Tensor(x_grad) infer_meta : func : TransposeGradInferMeta param : [out_grad, perm] kernel : func : transpose_coo_grad {sparse_coo -> sparse_coo}, transpose_csr_grad {sparse_csr -> sparse_csr} - backward_op : values_grad forward : values_coo(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : values_coo_grad{sparse_coo, dense-> sparse_coo} - backward_op: fused_attention_grad forward : fused_attention_csr(Tensor query, Tensor key, Tensor value, Tensor sparse_mask, Tensor key_padding_mask, Tensor attn_mask) -> Tensor(out), Tensor(softmax) args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad) output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad) infer_meta : func : sparse::FusedAttentionGradInferMeta kernel : func : fused_attention_csr_grad{dense, dense, dense, sparse_csr, dense -> dense, dense, dense} layout : softmax data_type: query