- backward_api : add_grad forward : add(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) kernel : func : add_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, add_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_api : conv3d_grad forward : conv3d (Tensor x, Tensor kernel, int[] paddings, int[] dilations, int[] strides, int groups, bool subm) -> Tensor(out@SparseCooTensor), Tensor(rulebook@DenseTensor) args : (Tensor x, Tensor kernel, Tensor rulebook, Tensor out_grad, int[] paddings, int[] dilations, int[] strides, int groups, bool subm) output : Tensor(x_grad), Tensor(kernel_grad) kernel : func : sparse_conv3d_grad{sparse_coo, dense, dense, sparse_coo -> sparse_coo, dense} - backward_api : coo_to_dense_grad forward : coo_to_dense(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) kernel : func : sparse_coo_to_dense_grad{sparse_coo, dense-> sparse_coo} - backward_api : create_sparse_coo_tensor_grad forward : create_sparse_coo_tensor(Tensor values, Tensor indices, IntArray dense_shape) -> Tensor(out) args : (Tensor indices, Tensor out_grad) output : Tensor(values_grad) kernel : func : sparse_coo_tensor_grad{dense, sparse_coo -> dense} - backward_api : dense_to_coo_grad forward : dense_to_coo(Tensor x, int64_t sparse_dim) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) invoke : to_dense_impl(out_grad) - backward_api : divide_grad forward : divide(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) kernel : func : divide_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, divide_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_api : masked_matmul_grad forward : masked_matmul(Tensor x, Tensor y, Tensor mask) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) kernel : func : csr_masked_matmul_grad{dense, dense, sparse_csr -> dense, dense} - backward_api : matmul_grad forward : matmul(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) kernel : func : csr_dense_matmul_grad{sparse_csr, dense, dense -> sparse_csr, dense} - backward_api : multiply_grad forward : multiply(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) kernel : func : multiply_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, multiply_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_api : mv_grad forward : mv(Tensor x, Tensor vec) -> Tensor(out) args : (Tensor x, Tensor vec, Tensor out_grad) output : Tensor(x_grad), Tensor(vec_grad) kernel : func : mv_coo_grad{sparse_coo, dense, dense -> sparse_coo, dense}, mv_csr_grad{sparse_csr, dense, dense -> sparse_csr, dense} - backward_api : relu_grad forward : relu(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) kernel : func : sparse_coo_relu_grad {sparse_coo, sparse_coo -> sparse_coo} - backward_api : sin_grad forward : sin(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) kernel : func : sparse_coo_sin_grad {sparse_coo, sparse_coo -> sparse_coo} - backward_api : softmax_grad forward : softmax(Tensor x, int axis=-1) -> Tensor(out) args : (Tensor out, Tensor out_grad, int axis) output : Tensor(x_grad) kernel : func : softmax_csr_grad{sparse_csr, sparse_csr -> sparse_csr} - backward_api : sparse_maxpool_grad forward : sparse_maxpool(Tensor x, int[] kernel_sizes, int[] paddings, int[] dilations, int[] strides) -> Tensor(out), Tensor(rulebook) args : (Tensor x, Tensor rulebook, Tensor out, Tensor out_grad, int[] kernel_sizes) output : Tensor(x_grad) kernel : func : sparse_maxpool_grad {sparse_coo, dense, sparse_coo, sparse_coo -> sparse_coo} - backward_api : sqrt_grad forward : sqrt(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) kernel : func : sparse_coo_sqrt_grad {sparse_coo, sparse_coo -> sparse_coo} - backward_api : subtract_grad forward : subtract(Tensor x, Tensor y) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad) output : Tensor(x_grad), Tensor(y_grad) kernel : func : subtract_coo_coo_grad{sparse_coo, sparse_coo, sparse_coo -> sparse_coo, sparse_coo}, subtract_csr_csr_grad{sparse_csr, sparse_csr, sparse_csr -> sparse_csr, sparse_csr} - backward_api : tanh_grad forward : tanh(Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) kernel : func : sparse_coo_tanh_grad {sparse_coo, sparse_coo -> sparse_coo} - backward_api : values_grad forward : coo_values(Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) kernel : func : coo_values_grad{sparse_coo, dense-> sparse_coo} - backward_api: fused_attention_grad forward : fused_attention_csr(Tensor query, Tensor key, Tensor value, Tensor sparse_mask, Tensor key_padding_mask, Tensor attn_mask) -> Tensor(out), Tensor(softmax) args: (Tensor query, Tensor key, Tensor value, Tensor softmax, Tensor out_grad) output : Tensor(query_grad), Tensor(key_grad), Tensor(value_grad) kernel : func : fused_attention_csr_grad{dense, dense, dense, sparse_csr, dense -> dense, dense, dense} layout : softmax data_type: query