fused_ops.yaml 3.7 KB
Newer Older
1
# This file is designed for fusion C++ farward operators, which manages the
Z
zyfncg 已提交
2
# generated code for static mode and dynamic mode (when `support_dygraph_mode` is true).
Z
zyfncg 已提交
3
# "support_dygraph_mode" is an extra configuration item in this file,
Z
zyfncg 已提交
4 5
# if one operator have "support_dygraph_mode : true", it supports dygraph mode,
# otherwise the operator only could be used in static mode.
6

7
- op : conv2d_xpu
8 9
  args : (Tensor x, Tensor x_max, Tensor filter, Tensor filter_max, Tensor bias, Tensor branch, int[] paddings, int[] dilations, int[] strides, str padding_algorithm, int groups, bool has_bias, bool has_branch, int act_type, float act_param)
  output : Tensor(out), Tensor(out_max)
10 11 12 13
  infer_meta :
    func : Conv2dXPUInferMeta
  kernel :
    func : conv2d_xpu
14 15
    data_type : x
  optional : bias, branch, x_max
16

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
- op : embedding_with_eltwise_add_xpu
  args : (Tensor[] ids, Tensor[] tables, int64_t padding_idx)
  output: Tensor
  infer_meta :
    func: EmbeddingWithEltwiseAddXPUInferMeta
  kernel:
    func: embedding_with_eltwise_add_xpu
    data_type: tables

- op : fc_xpu
  args : (Tensor x, Tensor x_max, Tensor w, Tensor w_max, Tensor bias, int in_num_col_dims, bool transpose_x, float alpha, float beta, int act_type, float act_alpha)
  output : Tensor(out), Tensor(out_max)
  infer_meta :
    func : FcXPUInferMeta
  kernel :
    func : fc_xpu
    data_type : x
  optional : bias, x_max

- op : fused_dropout_add
  args : (Tensor x, Tensor y, Scalar p, bool is_test, str mode, int seed, bool fix_seed)
  output : Tensor(out), Tensor(seed_offset)
  infer_meta :
    func : FusedDropoutAddInferMeta
  kernel :
    func : fused_dropout_add
    data_type : x
  backward : fused_dropout_add_grad
  support_dygraph_mode : true

- op : fused_linear_param_grad_add
  args : (Tensor x, Tensor dout, Tensor dweight, Tensor dbias, bool multi_precision = true)
  output : Tensor(dweight_out), Tensor(dbias_out)
  infer_meta:
    func : FusedLinearParamGradAddInferMeta
  optional : dweight, dbias
  kernel:
    func : fused_linear_param_grad_add
    data_type : dout
  support_dygraph_mode : true

- op : fused_multi_transformer_xpu
  args : (Tensor x, Tensor[] ln_scale, Tensor[] ln_bias, Tensor[] qkvw, Tensor[] qkvw_max, Tensor[] qkv_bias, Tensor[] out_linear_w, Tensor[] out_linear_wmax, Tensor[] out_linear_bias, Tensor[] ffn_ln_scale, Tensor[] ffn_ln_bias, Tensor[] ffn1_weight, Tensor[] ffn1_weight_max, Tensor[] ffn1_bias, Tensor[] ffn2_weight, Tensor[] ffn2_weight_max, Tensor[] ffn2_bias, Tensor[] cache_kv, Tensor[] pre_caches, Tensor rotary_pos_emb, Tensor time_step, Tensor seq_lengths, Tensor src_mask, bool pre_layer_norm, int rotary_emb_dims, float epsilon, float dropout_rate, bool is_test, str dropout_implementation, str act_method, bool trans_qkvw, int ring_id)
  output : Tensor(out), Tensor[](cache_kv_out){out_linear_w.size()}
  infer_meta :
    func : FusedMultiTransformerXpuInferMeta
  kernel :
    func : fused_multi_transformer_xpu
    data_type : x
  optional : cache_kv, pre_caches, rotary_pos_emb, time_step, seq_lengths, src_mask

- op : generate_sequence_xpu
  args : (Tensor x, DataType dtype)
  output : Tensor
  infer_meta :
    func : GenerateSequenceXPUInferMeta
  kernel :
    func : generate_sequence_xpu
    data_type : dtype

- op : multi_encoder_xpu
  args : (Tensor x, Tensor[] fc_weight, Tensor[] fc_weight_max, Tensor[] fc_bias, Tensor[] ln_scale, Tensor[] ln_bias, Tensor mask, int layer_num, bool norm_before, int hidden_dim, int head_num, int size_per_head, int ffn_hidden_dim_scale, int act_type, int relative_type, int slice_idx)
  output : Tensor(out), Tensor(x_fp16), Tensor(out_fp16)
  infer_meta :
    func : MultiEncoderXPUInferMeta
  kernel :
    func : multi_encoder_xpu
    data_type : x
  optional : mask, x_fp16, out_fp16