未验证 提交 016cc56d 编写于 作者: F Feng Xing 提交者: GitHub

transformer c files (#34706)

This PR adds fused transformer related files defining c interface including class, function etc..
上级 0e28c8bb
......@@ -15,7 +15,8 @@ register_operators(EXCLUDES
fusion_group_op
fusion_gru_op
fusion_lstm_op
fused_bn_add_activation_op)
fused_bn_add_activation_op
fused_transformer_op)
# fusion_gru_op does not have CUDA kernel
op_library(fusion_gru_op)
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fused/fused_transformer_op.h"
#include <string>
namespace paddle {
namespace operators {
// constructor and init
template <typename T>
FusedTransformerEncoderLayer<T>::FusedTransformerEncoderLayer(
int batch_size_, int max_seq_len_, int dim_embed_, int dim_feedforward_,
int num_head_, float dropout_, float act_dropout_, float attn_dropout_,
std::string act_method_, bool normalize_pre_or_post_) {
// configurations
batch_size = batch_size_;
max_seq_len = max_seq_len_;
dim_embed = dim_embed_;
dim_feedforward = dim_feedforward_;
num_head = num_head_;
head_size = dim_embed_ / num_head;
dropout = dropout_;
act_dropout = act_dropout_;
attn_dropout = attn_dropout_;
act_method = act_method_;
normalize_pre_or_post = normalize_pre_or_post_;
// init attn
fused_attn =
new FusedAttention<T>(batch_size, max_seq_len, dim_embed, num_head,
dropout, attn_dropout, normalize_pre_or_post);
// init ffn
fused_ffn =
new FusedFFN<T>(batch_size, max_seq_len, dim_embed, dim_feedforward_,
act_dropout, act_method, normalize_pre_or_post);
}
// deconstructor
template <typename T>
FusedTransformerEncoderLayer<T>::~FusedTransformerEncoderLayer() {
delete fused_attn;
delete fused_ffn;
}
// compute forward
template <typename T>
void FusedTransformerEncoderLayer<T>::ComputeForward(T* src, T* output) {
T* output_attn; // todo
fused_attn->ComputeForward(src, output_attn);
fused_ffn->ComputeForward(output_attn, output);
}
// compute backward
template <typename T>
void FusedTransformerEncoderLayer<T>::ComputeBackward() {}
// constructor and init
template <typename T>
FusedAttention<T>::FusedAttention(int batch_size_, int max_seq_len_,
int dim_embed_, int num_head_, float dropout_,
float attn_dropout_,
bool normalize_pre_or_post_) {
// configurations
batch_size = batch_size_;
max_seq_len = max_seq_len_;
dim_embed = dim_embed_;
num_head = num_head_;
head_size = dim_embed_ / num_head;
dropout = dropout_;
attn_dropout = attn_dropout_;
normalize_pre_or_post = normalize_pre_or_post_;
// init fmha
fmha = new FusedMHA<T>();
}
// compute forward
template <typename T>
void FusedAttention<T>::ComputeForward(T* src, T* output) {}
template <typename T>
FusedAttention<T>::~FusedAttention() {
delete fmha;
}
// compute backward
template <typename T>
void FusedAttention<T>::ComputeBackward() {}
// constructor and init
template <typename T>
FusedFFN<T>::FusedFFN(int batch_size_, int max_seq_len_, int dim_embed_,
int dim_feedforward_, float act_dropout_,
std::string act_method_, bool normalize_pre_or_post_) {
batch_size = batch_size_;
max_seq_len = max_seq_len_;
dim_embed = dim_embed_;
dim_feedforward = dim_feedforward_;
act_dropout = act_dropout_;
act_method = act_method_;
normalize_pre_or_post = normalize_pre_or_post_;
}
template <typename T>
FusedFFN<T>::~FusedFFN() {}
// compute forward
template <typename T>
void FusedFFN<T>::ComputeForward(T* src, T* output) {}
// compute backward
template <typename T>
void FusedFFN<T>::ComputeBackward() {}
// init
template <typename T>
FusedMHA<T>::FusedMHA(int batch_size_, int max_seq_len_, int dim_embed_,
int num_head_, float dropout_, bool is_test_,
uint64_t seed_, uint64_t* seqlen_, uint64_t* cu_seqlen_) {
batch_size = batch_size_;
max_seq_len = max_seq_len_;
dim_embed = dim_embed_;
num_head = num_head_;
head_size = dim_embed_ / num_head;
dropout = dropout_;
is_test = is_test_;
seed = seed_;
seqlen = seqlen_;
cu_seqlen = cu_seqlen_;
}
// compute forward
template <typename T>
void FusedMHA<T>::ComputeForward(T* output, T* softmax_mask) {}
// compute backward
template <typename T>
void FusedMHA<T>::ComputeBackward(const T* grad_output, T* softmax_mask,
T* grad_x) {}
}
}
\ No newline at end of file
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
\ No newline at end of file
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
Indicesou may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
namespace paddle {
namespace operators {
template <typename T>
class FusedMHA {
FusedMHA(int, int, int, int, float, bool, uint64_t, uint64_t*, uint64_t*);
~FusedMHA();
void ComputeForward(T*, T*);
void ComputeBackward(const T*, T*, T*);
private:
int batch_size;
int max_seq_len;
int dim_embed;
int num_head;
int head_size;
float dropout;
bool is_test;
uint64_t seed;
int32_t seqlen;
int32_t* cu_seqlen;
};
template <typename T>
class FusedAttention {
public:
FusedAttention(int, int, int, int, float, float, bool);
~FusedAttention();
void ComputeForward(T*, T*);
void ComputeBackward();
private:
FusedMHA<T>* fmha; // fused multihead attention
int batch_size;
int max_seq_len;
int dim_embed;
int num_head;
int head_size;
float dropout;
T attn_dropout;
bool normalize_pre_or_post;
// weights and bias used in attention
T* fattn_qkv_w;
T* fattn_qkv_b;
T* fattn_o_w;
T* fattn_o_b;
T* fattn_n_w;
T* fattn_n_b;
T* fattn_norm_w;
T* fattn_norm_b;
T* fattn_grad_qkv_w;
T* fattn_grad_qkv_b;
T* fattn_grad_o_w;
T* fattn_grad_o_b;
T* fattn_grad_n_w;
T* fattn_grad_n_b;
T* fattn_grad_norm_w;
T* fattn_grad_norm_b;
};
template <typename T>
class FusedFFN {
FusedFFN(int, int, int, int, float, std::string, bool);
~FusedFFN();
void ComputeForward(T*, T*);
void ComputeBackward();
private:
int batch_size;
int max_seq_len;
int dim_embed;
int dim_feedforward;
float attn_dropout;
float act_dropout;
bool normalize_pre_or_post;
std::string act_method;
// weights and bias used in ffn
T* fffn_inter_w;
T* fffn_inter_b;
T* fffn_output_w;
T* fffn_output_b;
T* fffn_grad_inter_w;
T* fffn_grad_inter_b;
T* fffn_grad_output_w;
T* fffn_grad_output_b;
};
template <typename T>
class FusedTransformerEncoderLayer {
public:
FusedTransformerEncoderLayer(int, int, int, int, int, float, float, float,
std::string, bool);
~FusedTransformerEncoderLayer();
void ComputeForward(T* src, T* output);
void ComputeBackward();
private:
FusedAttention<T>* fused_attn;
FusedFFN<T>* fused_ffn;
int batch_size;
int max_seq_len;
int dim_embed;
int dim_feedforward;
int num_head;
int head_size;
float dropout;
float attn_dropout;
float act_dropout;
bool normalize_pre_or_post;
std::string act_method;
};
}
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册