fused_ops.yaml 7.5 KB
Newer Older
1
# This file is designed for fusion C++ farward operators, which manages the
Z
zyfncg 已提交
2
# generated code for static mode and dynamic mode (when `support_dygraph_mode` is true).
Z
zyfncg 已提交
3
# "support_dygraph_mode" is an extra configuration item in this file,
Z
zyfncg 已提交
4 5
# if one operator have "support_dygraph_mode : true", it supports dygraph mode,
# otherwise the operator only could be used in static mode.
6

W
wz1qqx 已提交
7 8 9 10 11 12 13 14 15 16
- op : add_act_xpu
  args : (Tensor x, Tensor x_max, Tensor y, Tensor y_max, int act_type)
  output : Tensor(out), Tensor(out_max)
  infer_meta :
    func : AddActXPUInferMeta
  kernel :
    func : add_act_xpu
    data_type : x
  optional : x_max, y_max

W
wz1qqx 已提交
17
- op : add_layernorm_xpu
W
wz1qqx 已提交
18
  args : (Tensor x, Tensor y, Tensor scale, Tensor bias, int begin_norm_axis, float epsilon)
19
  output : Tensor(out)
W
wz1qqx 已提交
20 21 22 23 24 25
  infer_meta :
    func : AddLayernormXPUInferMeta
  kernel :
    func : add_layernorm_xpu
    data_type : x

W
wz1qqx 已提交
26 27 28 29 30 31 32 33 34 35
- op : conv1d_xpu
  args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, Tensor branch_max, int[] paddings, str padding_algorithm, int dilations, int strides, int groups, int act_type, float act_param)
  output : Tensor(out), Tensor(out_max)
  infer_meta :
    func : Conv1dXPUInferMeta
  kernel :
    func : conv1d_xpu
    data_type : x
  optional : bias, branch, branch_max, x_max

36 37 38 39 40 41 42 43 44 45
- op : conv2d_transpose_xpu
  args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, int[] strides, int[] paddings, int[] output_padding, IntArray output_size, str padding_algorithm, int groups, int[] dilations, str data_format, bool has_bias, bool with_act, str act_type)
  output : Tensor(out), Tensor(out_max)
  infer_meta :
    func : Conv2dTransposeXPUInferMeta
  kernel :
    func : conv2d_transpose_xpu
    data_type : x
  optional : bias, x_max

46
- op : conv2d_xpu
47
  args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, Tensor branch_max, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, int act_type, float act_param, DataType out_dtype)
48
  output : Tensor(out), Tensor(out_max)
49 50 51 52
  infer_meta :
    func : Conv2dXPUInferMeta
  kernel :
    func : conv2d_xpu
53
    data_type : x
W
wz1qqx 已提交
54
  optional : bias, branch, branch_max ,x_max
55

56
- op : embedding_with_eltwise_add_xpu
57 58
  args : (Tensor[] ids, Tensor[] tables, Tensor mask, int64_t padding_idx)
  output: Tensor(out), Tensor(seq_lod), Tensor(max_seq_len)
59 60
  infer_meta :
    func: EmbeddingWithEltwiseAddXPUInferMeta
Z
zhangbo9674 已提交
61
    param : [ids, tables, mask]
62 63 64
  kernel:
    func: embedding_with_eltwise_add_xpu
    data_type: tables
65
  optional : mask, seq_lod, max_seq_len
66

67 68 69 70 71 72 73 74 75
- op : fast_layernorm_xpu
  args : (Tensor x, Tensor scale, Tensor bias, int begin_norm_axis, float epsilon)
  output : Tensor(out)
  infer_meta :
    func : FastLayernormXPUInferMeta
  kernel :
    func : fast_layernorm_xpu
    data_type : x

76 77 78 79 80 81 82 83 84
- op : fast_where_xpu
  args : (Tensor condition, Tensor x, Tensor y)
  output : Tensor(out)
  infer_meta :
    func : FastWhereXPUInferMeta
  kernel :
    func : fast_where_xpu
    data_type : x

85
- op : fc_xpu
86
  args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha, DataType out_dtype)
87 88 89 90 91 92 93 94
  output : Tensor(out), Tensor(out_max)
  infer_meta :
    func : FcXPUInferMeta
  kernel :
    func : fc_xpu
    data_type : x
  optional : bias, x_max

95
- op : fused_bias_act
F
freeliuzc 已提交
96
  args : (Tensor x, Tensor bias, Tensor dequant_scales, Tensor shift, Tensor smooth, str act_method = "gelu", str compute_dtype = "default", float quant_scale = -1, int quant_round_type = 1, float quant_max_bound = 127.0, float quant_min_bound = -127.0)
97 98 99 100 101 102 103 104 105
  output : Tensor(out)
  infer_meta :
    func: FusedBiasActInferMeta
  kernel :
    func : fused_bias_act
    data_type : x
  optional : bias, dequant_scales, shift, smooth
  support_dygraph_mode : true

MarDino's avatar
MarDino 已提交
106 107 108 109 110 111 112 113 114 115 116
- op : fused_bias_residual_layernorm
  args : (Tensor x, Tensor bias, Tensor residual, Tensor norm_weight, Tensor norm_bias, float epsilon, float residual_alpha, int begin_norm_axis, float quant_scale, int quant_round_type, float quant_max_bound, float quant_min_bound)
  output : Tensor(out), Tensor(residual_out), Tensor(mean), Tensor(variance)
  infer_meta :
    func : FusedLayerNormInferMeta
  kernel :
    func : fused_bias_residual_layernorm
    data_type : x
  optional : bias, residual, norm_weight, norm_bias, residual_out
  support_dygraph_mode : true

117
- op : fused_dropout_add
118 119
  args : (Tensor x, Tensor y, Tensor seed_tensor, Scalar p, bool is_test, str mode, int seed = 0, bool fix_seed = false)
  optional : seed_tensor
120 121 122
  output : Tensor(out), Tensor(seed_offset)
  infer_meta :
    func : FusedDropoutAddInferMeta
123
    param : [x, y]
124 125 126 127 128 129 130
  kernel :
    func : fused_dropout_add
    data_type : x
  backward : fused_dropout_add_grad
  support_dygraph_mode : true

- op : fused_linear_param_grad_add
Y
Yuang Liu 已提交
131
  args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true, bool has_bias = true)
132 133 134 135 136 137 138 139 140 141
  output : Tensor(dweight_out), Tensor(dbias_out)
  infer_meta:
    func : FusedLinearParamGradAddInferMeta
  optional : dweight, dbias
  kernel:
    func : fused_linear_param_grad_add
    data_type : dout
  support_dygraph_mode : true

- op : fused_multi_transformer_xpu
142
  args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, Tensor gather_index, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id, int gather_axis)
143 144 145 146 147 148
  output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()}
  infer_meta :
    func : FusedMultiTransformerXpuInferMeta
  kernel :
    func : fused_multi_transformer_xpu
    data_type : x
149
  optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask, gather_index
150

151
- op : fused_rotary_position_embedding
152
  args : (Tensor q, Tensor k, Tensor v, Tensor sin, Tensor cos)
153 154 155
  output : Tensor(out_q), Tensor(out_k), Tensor(out_v)
  infer_meta :
    func : FusedRopeInferMeta
156
  optional : k,v,sin,cos, out_k, out_v
157 158 159 160 161 162
  kernel :
    func : fused_rotary_position_embedding
    data_type : q
  backward: fused_rotary_position_embedding_grad
  support_dygraph_mode : true

163 164 165 166 167 168 169 170 171 172
- op : generate_sequence_xpu
  args : (Tensor x, DataType dtype)
  output : Tensor
  infer_meta :
    func : GenerateSequenceXPUInferMeta
  kernel :
    func : generate_sequence_xpu
    data_type : dtype

- op : multi_encoder_xpu
173
  args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, Tensor seq_lod, Tensor max_seq_len, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx)
174 175 176 177 178 179
  output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16)
  infer_meta :
    func : MultiEncoderXPUInferMeta
  kernel :
    func : multi_encoder_xpu
    data_type : x
180
  optional : mask, seq_lod, max_seq_len, x_fp16, out_fp16
181 182 183 184 185 186 187 188 189 190

- op : yolo_box_xpu
  args : (Tensor x, Tensor x_max, Tensor grid, Tensor stride, Tensor anchor_grid, float offset)
  output : Tensor(out), Tensor(out_max)
  infer_meta :
    func : YoloBoxXPUInferMeta
  kernel :
    func : yolo_box_xpu
    data_type : x
  optional : x_max