# This file is designed for fusion C++ farward operators, which manages the # generated code for static mode and dynamic mode (when `support_dygraph_mode` is true). # "support_dygraph_mode" is an extra configuration item in this file, # if one operator have "support_dygraph_mode : true", it supports dygraph mode, # otherwise the operator only could be used in static mode. - op : add_act_xpu args : (Tensor x, Tensor x_max, Tensor y, Tensor y_max, int act_type) output : Tensor(out), Tensor(out_max) infer_meta : func : AddActXPUInferMeta kernel : func : add_act_xpu data_type : x optional : x_max, y_max - op : add_layernorm_xpu args : (Tensor x, Tensor y, Tensor scale, Tensor bias, int begin_norm_axis, float epsilon) output : Tensor(out), Tensor(mean), Tensor(variance), Tensor(z_add) infer_meta : func : AddLayernormXPUInferMeta kernel : func : add_layernorm_xpu data_type : x - op : conv1d_xpu args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, Tensor branch_max, int[] paddings, str padding_algorithm, int dilations, int strides, int groups, int act_type, float act_param) output : Tensor(out), Tensor(out_max) infer_meta : func : Conv1dXPUInferMeta kernel : func : conv1d_xpu data_type : x optional : bias, branch, branch_max, x_max - op : conv2d_transpose_xpu args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format, bool has_bias, bool with_act, str act_type) output : Tensor(out), Tensor(out_max) infer_meta : func : Conv2dTransposeXPUInferMeta kernel : func : conv2d_transpose_xpu data_type : x optional : bias, x_max - op : conv2d_xpu args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, Tensor branch_max, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, int act_type, float act_param, DataType out_dtype) output : Tensor(out), Tensor(out_max) infer_meta : func : Conv2dXPUInferMeta kernel : func : conv2d_xpu data_type : x optional : bias, branch, branch_max ,x_max - op : embedding_with_eltwise_add_xpu args : (Tensor[] ids, Tensor[] tables, Tensor mask, int64_t padding_idx) output: Tensor(out), Tensor(seq_lod), Tensor(max_seq_len) infer_meta : func: EmbeddingWithEltwiseAddXPUInferMeta kernel: func: embedding_with_eltwise_add_xpu data_type: tables optional : mask, seq_lod, max_seq_len - op : fast_layernorm_xpu args : (Tensor x, Tensor scale, Tensor bias, int begin_norm_axis, float epsilon) output : Tensor(out) infer_meta : func : FastLayernormXPUInferMeta kernel : func : fast_layernorm_xpu data_type : x - op : fast_where_xpu args : (Tensor condition, Tensor x, Tensor y) output : Tensor(out) infer_meta : func : FastWhereXPUInferMeta kernel : func : fast_where_xpu data_type : x - op : fc_xpu args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha, DataType out_dtype) output : Tensor(out), Tensor(out_max) infer_meta : func : FcXPUInferMeta kernel : func : fc_xpu data_type : x optional : bias, x_max - op : fused_bias_act args : (Tensor x, Tensor bias, Tensor dequant_scales, Tensor shift, Tensor smooth, str act_method = "gelu", str compute_dtype = "default", float quant_scale = -1, int quant_round_type = 1, float quant_max_bound = 127.0, float quant_min_bound = -127.0) output : Tensor(out) infer_meta : func: FusedBiasActInferMeta kernel : func : fused_bias_act data_type : x optional : bias, dequant_scales, shift, smooth support_dygraph_mode : true - op : fused_bias_residual_layernorm args : (Tensor x, Tensor bias, Tensor residual, Tensor norm_weight, Tensor norm_bias, float epsilon, float residual_alpha, int begin_norm_axis, float quant_scale, int quant_round_type, float quant_max_bound, float quant_min_bound) output : Tensor(out), Tensor(residual_out), Tensor(mean), Tensor(variance) infer_meta : func : FusedLayerNormInferMeta kernel : func : fused_bias_residual_layernorm data_type : x optional : bias, residual, norm_weight, norm_bias, residual_out support_dygraph_mode : true - op : fused_dropout_add args : (Tensor x, Tensor y, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed = 0, bool fix_seed = false) optional : seed_tensor output : Tensor(out), Tensor(seed_offset) infer_meta : func : FusedDropoutAddInferMeta param : [x, y] kernel : func : fused_dropout_add data_type : x backward : fused_dropout_add_grad support_dygraph_mode : true - op : fused_linear_param_grad_add args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true, bool has_bias = true) output : Tensor(dweight_out), Tensor(dbias_out) infer_meta: func : FusedLinearParamGradAddInferMeta optional : dweight, dbias kernel: func : fused_linear_param_grad_add data_type : dout support_dygraph_mode : true - op : fused_multi_transformer_xpu args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, Tensor gather_index, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id, int gather_axis) output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()} infer_meta : func : FusedMultiTransformerXpuInferMeta kernel : func : fused_multi_transformer_xpu data_type : x optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask, gather_index - op : fused_rotary_position_embedding args : (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos) output : Tensor(out_q), Tensor(out_k), Tensor(out_v) infer_meta : func : FusedRopeInferMeta optional : k,v,sin,cos, out_k, out_v kernel : func : fused_rotary_position_embedding data_type : q backward: fused_rotary_position_embedding_grad support_dygraph_mode : true - op : generate_sequence_xpu args : (Tensor x, DataType dtype) output : Tensor infer_meta : func : GenerateSequenceXPUInferMeta kernel : func : generate_sequence_xpu data_type : dtype - op : multi_encoder_xpu args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, Tensor seq_lod, Tensor max_seq_len, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx) output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16) infer_meta : func : MultiEncoderXPUInferMeta kernel : func : multi_encoder_xpu data_type : x optional : mask, seq_lod, max_seq_len, x_fp16, out_fp16 - op : yolo_box_xpu args : (Tensor x, Tensor x_max, Tensor grid, Tensor stride, Tensor anchor_grid, float offset) output : Tensor(out), Tensor(out_max) infer_meta : func : YoloBoxXPUInferMeta kernel : func : yolo_box_xpu data_type : x optional : x_max