- backward_api : matmul_grad forward : matmul (Tensor x, Tensor y, bool transpose_x=false, bool transpose_y=false) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out_grad, bool transpose_x=false, bool transpose_y=false) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : matmul_grad - backward_api : matmul_double_grad forward : matmul_grad (Tensor x, Tensor y, Tensor out_grad, bool transpose_x, bool transpose_y) -> Tensor(dx), Tensor(dy) args : (Tensor x, Tensor y, Tensor out_grad, Tensor dx_grad, Tensor dy_grad, bool transpose_x, bool transpose_y) output : Tensor(d2x), Tensor(d2y), Tensor(dout_grad) infer_meta : func : GeneralTernaryGradInferMeta param : [x, y, out_grad] kernel : func : matmul_double_grad optional : dx_grad, dy_grad - backward_api : scale_grad forward : scale (Tensor x, Scalar scale, float bias, bool bias_after_scale) -> Tensor(out) args : (Tensor out_grad, Scalar scale, float bias=0.0, bool bias_after_scale=true) output : Tensor(x_grad) invoke : scale(out_grad, scale, bias, bias_after_scale) - backward_api : digamma_grad forward : digamma (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : digamma_grad - backward_api : abs_grad forward : abs (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : abs_grad - backward_api : trunc_grad forward : trunc (Tensor x) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out_grad] kernel : func : trunc_grad # - backward_api : norm_grad # forward : norm (Tensor x, int axis, float epsilon, bool is_test) -> Tensor(out), Tensor(norm) # args : (Tensor x, Tensor norm, Tensor out_grad, int axis, float epsilon, bool is_test) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : norm_grad - backward_api : diagonal_grad forward : diagonal (Tensor x, int offset, int axis1, int axis2) -> Tensor(out) args : (Tensor x, Tensor out_grad, int offset = 0, int axis1 = 0, int axis2 = 1) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : diagonal_grad # - backward_api : split_grad # forward : split (Tensor x, ScalarArray num_or_sections, Scalar axis) -> Tensor[](out) # args : (Tensor[] out_grad, Scalar axis) # output : Tensor(x_grad) # invoke : concat( out_grad, axis) # TODO(zhangyunfei) The config of double grad and triple grad will be supported in the future. # - backward_api : matmul_triple_grad # forward : matmul_double_grad (Tensor x, Tensor y, Tensor out_grad, Tensor dx_grad, Tensor dy_grad, bool transpose_x, bool transpose_y) -> Tensor(d2x), Tensor(d2y), Tensor(dout_grad) # args : (Tensor x, Tensor y, Tensor out_grad, Tensor dx_grad, Tensor dy_grad, Tensor d2x_grad, Tensor d2y_grad, Tensor dout_grad_grad, bool transpose_x, bool transpose_y) # output : Tensor(d3x), Tensor(d3y), Tensor(d2out_grad), Tensor(ddx_grad), Tensor(ddy_grad) # infer_meta : # func : MatmulTripleGradInferMeta # kernel : # func : matmul_triple_grad - backward_api : softmax_grad forward : softmax (Tensor x, int axis) -> Tensor(out) args : (Tensor out, Tensor out_grad, int axis) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : softmax_grad # - backward_api : maxout_grad # forward : maxout (Tensor x, int groups, int axis) -> Tensor(out) # args : (Tensor x, Tensor out, Tensor out_grad, int groups, int axis) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : maxout_grad # output is optional 如何处理 - backward_api : put_along_axis_grad forward : put_along_axis (Tensor x, Tensor index, Tensor value, int axis, str reduce) -> Tensor(out) args : (Tensor x, Tensor index, Tensor out_grad, int axis, str reduce) output : Tensor(x_grad), Tensor(value_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, index] kernel : func : put_along_axis_grad - backward_api : take_along_axis_grad forward : take_along_axis (Tensor x, Tensor index, int axis) -> Tensor(out) args : (Tensor x, Tensor index, Tensor out_grad, int axis) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : take_along_axis_grad - backward_api : matrix_power_grad forward : matrix_power (Tensor x, int n) -> Tensor(out) args : (Tensor x, Tensor out, Tensor out_grad, int n) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : matrix_power_grad - backward_api : eigh_grad forward : eigh (Tensor x, str uplo) -> Tensor(out_w), Tensor(out_v) args : (Tensor out_w, Tensor out_v, Tensor out_w_grad, Tensor out_v_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out_v] kernel : func : eigh_grad - backward_api : segment_pool_grad forward : segment_pool (Tensor x, Tensor segment_ids, str pooltype) -> Tensor(out), Tensor(summed_ids) args : (Tensor x, Tensor segment_ids, Tensor out, Tensor summed_ids, Tensor out_grad, str pooltype) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : segment_pool_grad - backward_api : cos_grad forward : cos (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : cos_grad - backward_api : tanh_grad forward : tanh (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : tanh_grad - backward_api : acos_grad forward : acos (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : acos_grad - backward_api : sin_grad forward : sin (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : sin_grad - backward_api : asin_grad forward : asin (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : asin_grad - backward_api : atan_grad forward : atan (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : atan_grad - backward_api : sinh_grad forward : sinh (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : sinh_grad - backward_api : cosh_grad forward : cosh (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : cosh_grad - backward_api : asinh_grad forward : asinh (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : asinh_grad - backward_api : acosh_grad forward : acosh (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : acosh_grad - backward_api : atanh_grad forward : atanh (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : atanh_grad - backward_api : relu_grad forward : relu (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : relu_grad - backward_api : sigmoid_grad forward : sigmoid (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : sigmoid_grad - backward_api : argsort_grad forward : argsort (Tensor x, int axis, bool descending) -> Tensor(out), Tensor(indices) args : (Tensor indices, Tensor x, Tensor out_grad, int axis, bool descending) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : argsort_grad # - backward_api : batch_norm_grad # forward : batch_norm (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) -> Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) # args : (Tensor indices, Tensor x, Tensor out_grad, int axis, bool descending) # output : Tensor(x_grad), Tensor(scale_grad), Tensor(bias_grad) # infer_meta : # func : GeneralTernaryGradInferMeta # param : [x, scale, bias] # kernel : # func : batch_norm_grad # - backward_api : bilinear_tensor_product_grad # forward : bilinear_tensor_product (Tensor x, Tensor y, Tensor weight, Tensor bias) -> Tensor(out) # args : (Tensor x, Tensor y, Tensor weight, Tensor out_grad) # output : Tensor(x_grad), Tensor(y_grad), Tensor(weight_grad), Tensor(bias_grad) # infer_meta : # func : FourXXXXInferMeta # param : [x, y, weight, bias] # kernel : # func : bilinear_tensor_product_grad # optional : bias # - backward_api : broadcast_tensor_grad # forward : broadcast_tensors (Tensor[] x) -> Tensor [] (out) # args : (Tensor [] out_grad) # output : Tensor [] (x_grad) # infer_meta : # func : XXXXInferMeta # param : [out_grad] # kernel : # func : broadcast_tensor_grad - backward_api : cholesky_grad forward : cholesky (Tensor x, bool upper) -> Tensor(out) args : (Tensor out, Tensor out_grad, bool upper) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : cholesky_grad - backward_api : cholesky_solve_grad forward : cholesky (Tensor x, Tensor y, bool upper) -> Tensor(out) args : (Tensor x, Tensor y, Tensor out, Tensor out_grad, bool upper) output : Tensor(x_grad), Tensor(y_grad) infer_meta : func : GeneralBinaryGradInferMeta param : [x, y] kernel : func : cholesky_solve_grad # - backward_api : dropout_grad # forward : dropout (Tensor x, Tensor seed_tensor, float p, bool is_test, str mode, int seed, bool fix_seed) -> Tensor(out), Tensor(mask) # args : (Tensor mask, Tensor out_grad, float p, bool is_test, str mode) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [out_grad] # kernel : # func : dropout_grad - backward_api : erf_grad forward : erf (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : erf_grad - backward_api : erfinv_grad forward : erf (Tensor x) -> Tensor(out) args : (Tensor out, Tensor out_grad) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : erfinv_grad # - backward_api : expand_as_grad # forward : expand_as (Tensor x, Tensor y, int[] target_shape) -> Tensor(out) # args : (Tensor x, Tensor out_grad, int[] target_shape) # output : Tensor(x_grad) # infer_meta : # func : UnchangedGradInferMeta # param : [x] # kernel : # func : expand_as_grad # - backward_api : expand_grad # forward : expand (Tensor x, ScalarArray shape) -> Tensor(out) # args : (Tensor x, Tensor out_grad, ScalarArray shape) # output : Tensor(x_grad) # infer_meta : # func : UnchangedGradInferMeta # param : [x] # kernel : # func : expand_grad # - backward_api : graph_send_recv_grad # forward : graph_send_recv (Tensor x, Tensor src_index, Tensor dst_index, str pool_type) -> Tensor(out), Tensor(dst_count) # args : (Tensor out_grad, Tensor x, Tensor out, Tensor src_index, Tensor dst_index, Tensor dst_count, str pool_type) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : graph_send_recv_grad # - backward_api : label_smooth_grad # forward : label_smooth (Tensor label, Tensor prior_dist, float epsilon) -> Tensor(out) # args : (Tensor out_grad, float epsilon) # output : Tensor(label_grad) # infer_meta : # func : UnchangedInferMeta # param : [out_grad] # kernel : # func : label_smooth_grad - backward_api : log_loss_grad forward : log_loss (Tensor input, Tensor label, float epsilon) -> Tensor(out) args : (Tensor input, Tensor label, Tensor out_grad, float epsilon) output : Tensor(input_grad) infer_meta : func : UnchangedInferMeta param : [input] kernel : func : log_loss_grad # - backward_api : masked_selecte_grad # forward : masked_select (Tensor x, Tensor mask) -> Tensor(out) # args : (Tensor out_grad, Tensor x, Tensor mask) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : masked_selecte_grad # - backward_api : multi_dot_grad # forward : multi_dot (Tensor[] x) -> Tensor(out) # args : (Tensor out_grad, Tensor[] x) # output : Tensor[] (x_grad) # infer_meta : # func : XXXXInferMeta # param : [x] # kernel : # func : multi_dot_grad # - backward_api : nll_loss_grad # forward : nll_loss (Tensor x, Tensor label, Tensor weight, int64 ignore_index, str reduction) -> Tensor(out), Tensor(total_weight) # args : (Tensor x, Tensor label, Tensor total_weight, Tensor weight, Tensor out_grad, int64 ignore_index, str reduction) # output : Tensor[] (x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : nll_loss_grad # - backward_api : pad_grad # forward : pad (Tensor x, int[] paddings, float pad_value) -> Tensor(out) # args : (Tensor out_grad, int[] paddings, float pad_value) # output : Tensor(x_grad) # infer_meta : # func : XXXXXInferMeta # param : [x] # kernel : # func : pad_grad # - backward_api : pixel_shuffle_grad # forward : pixel_shuffle (Tensor x, int upscale_factor, str data_format) -> Tensor(out) # args : (Tensor out_grad, int upscale_factor, str data_format) # output : Tensor(x_grad) # infer_meta : # func : XXXXXInferMeta # param : [x] # kernel : # func : pixel_shuffle_grad # - backward_api : poisson_grad # forward : poisson (Tensor x) -> Tensor(out) # args : () # output : Tensor(x_grad) # infer_meta : # func : XXXXXInferMeta # param : [x] # kernel : # func : poisson_grad # - backward_api : psroi_pool_grad # forward : psroi_pool (Tensor x, Tensor rois, Tensor rois_num, int pooled_weight, int pooled_width, int output_channels, float spatial_scale ) -> Tensor(out) # args : (Tensor x, Tensor rois, Tensor rois_num, Tensor out_grad, int pooled_weight, int pooled_width, int output_channels, float spatial_scale) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : psroi_pool_grad # optional : rois_num - backward_api : selu_grad forward : selu (Tensor x, float scale, float alpha) -> Tensor(out) args : (Tensor out, Tensor out_grad, float scale, float alpha) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [out] kernel : func : selu_grad - backward_api : sigmoid_cross_entropy_with_logits_grad forward : sigmoid_cross_entropy_with_logits (Tensor x, Tensor label, bool normalize, int ignore_index) -> Tensor(out) args : (Tensor x, Tensor label, Tensor out_grad, bool normalize, int ignore_index) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : sigmoid_cross_entropy_with_logits_grad - backward_api : tile_grad forward : tile (Tensor x, ScalarArray repeat_times) -> Tensor(out) args : (Tensor x, Tensor out_grad, ScalarArray repeat_times) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : tile_grad # # forward backward type not match # - backward_api : top_k_grad # forward : top_k (Tensor x, Scalar k, int axis, bool largest, bool sorted) -> Tensor(out), Tensor(indices) # args : (Tensor out_grad, Tensor x, Tensor indices, int k, index axis, bool largest, bool sorted) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : top_k_grad - backward_api : trace_grad forward : trace (Tensor x, int offset, int axis1, int axis2) -> Tensor(out) args : (Tensor x, Tensor out_grad, int offset, int axis1, int axis2) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : trace_grad - backward_api : unfold_grad forward : unfold (Tensor x, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) -> Tensor(out) args : (Tensor x, Tensor out_grad, int[] kernel_sizes, int[] strides, int[] paddings, int[] dilations) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta param : [x] kernel : func : unfold_grad # - backward_api : where_index_grad # forward : where_index (Tensor condition) -> Tensor(out) # args : (Tensor out_grad, Tensor x, int offset, int axis1, int axis2) # output : Tensor(x_grad) # infer_meta : # func : UnchangedInferMeta # param : [x] # kernel : # func : where_index_grad