未验证 提交 6d31dc93 编写于 作者: T Tomasz Socha 提交者: GitHub

Enable Bert on bfloat16 datatype (#43455)

* Remove output arguments from functions.
Replace pointers with references

* Name used bool flags

* Reorder functions

* Enable bfloat16 data type

* Give declarations some space

* Style

* Style
上级 99ea0a9c
...@@ -145,7 +145,7 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -145,7 +145,7 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
/// \param[in] other another AnalysisConfig /// \param[in] other another AnalysisConfig
/// ///
explicit AnalysisConfig(const AnalysisConfig& other); AnalysisConfig(const AnalysisConfig& other);
/// ///
/// \brief Construct a new AnalysisConfig from a no-combined model. /// \brief Construct a new AnalysisConfig from a no-combined model.
/// ///
......
...@@ -20,165 +20,44 @@ namespace inference { ...@@ -20,165 +20,44 @@ namespace inference {
using paddle::PaddleTensor; using paddle::PaddleTensor;
template <typename T> void profile(bool use_mkldnn = false, bool use_bfloat16 = false);
void GetValueFromStream(std::stringstream *ss, T *t) { std::vector<std::vector<paddle::PaddleTensor>> LoadInputData();
(*ss) >> (*t); void CompareNativeAndAnalysisWrapper(bool use_mkldnn = false);
} std::vector<paddle::PaddleTensor> ParseInputStreamToVector(
const std::string &line);
template <> AnalysisConfig SetConfig(bool use_mkldnn = false, bool use_bfloat16 = false);
void GetValueFromStream<std::string>(std::stringstream *ss, std::string *t) {
*t = ss->str();
}
// Split string to vector
template <typename T> template <typename T>
void Split(const std::string &line, char sep, std::vector<T> *v) { paddle::PaddleTensor ParseTensor(const std::string &field);
std::stringstream ss;
T t;
for (auto c : line) {
if (c != sep) {
ss << c;
} else {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
if (!ss.str().empty()) {
GetValueFromStream<T>(&ss, &t);
v->push_back(std::move(t));
ss.str({});
ss.clear();
}
}
// Parse tensor from string
template <typename T> template <typename T>
bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) { std::vector<T> Split(const std::string &line, char separator);
std::vector<std::string> data;
Split(field, ':', &data);
if (data.size() < 2) return false;
std::string shape_str = data[0];
std::vector<int> shape; template <typename T>
Split(shape_str, ' ', &shape); T GetValueFromStream(std::stringstream &ss);
std::string mat_str = data[1];
std::vector<T> mat;
Split(mat_str, ' ', &mat);
tensor->shape = shape;
auto size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()) *
sizeof(T);
tensor->data.Resize(size);
std::copy(mat.begin(), mat.end(), static_cast<T *>(tensor->data.data()));
tensor->dtype = GetPaddleDType<T>();
return true;
}
// Parse input tensors from string
bool ParseLine(const std::string &line,
std::vector<paddle::PaddleTensor> *tensors) {
std::vector<std::string> fields;
Split(line, ';', &fields);
if (fields.size() < 5) return false;
tensors->clear();
tensors->reserve(5);
int i = 0;
// src_id
paddle::PaddleTensor src_id;
ParseTensor<int64_t>(fields[i++], &src_id);
tensors->push_back(src_id);
// pos_id
paddle::PaddleTensor pos_id;
ParseTensor<int64_t>(fields[i++], &pos_id);
tensors->push_back(pos_id);
// segment_id
paddle::PaddleTensor segment_id;
ParseTensor<int64_t>(fields[i++], &segment_id);
tensors->push_back(segment_id);
// self_attention_bias
paddle::PaddleTensor self_attention_bias;
ParseTensor<float>(fields[i++], &self_attention_bias);
tensors->push_back(self_attention_bias);
// next_segment_index
paddle::PaddleTensor next_segment_index;
ParseTensor<int64_t>(fields[i++], &next_segment_index);
tensors->push_back(next_segment_index);
return true;
}
bool LoadInputData(std::vector<std::vector<paddle::PaddleTensor>> *inputs) {
if (FLAGS_infer_data.empty()) {
LOG(ERROR) << "please set input data path";
return false;
}
std::ifstream fin(FLAGS_infer_data);
std::string line;
int sample = 0;
// The unit-test dataset only have 10 samples, each sample have 5 feeds. template <>
while (std::getline(fin, line)) { std::string GetValueFromStream<std::string>(std::stringstream &ss);
std::vector<paddle::PaddleTensor> feed_data;
ParseLine(line, &feed_data);
inputs->push_back(std::move(feed_data));
sample++;
if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break;
}
LOG(INFO) << "number of samples: " << sample;
return true; TEST(Analyzer_bert, profile) { profile(); }
}
void SetConfig(AnalysisConfig *config) { #ifdef PADDLE_WITH_MKLDNN
config->SetModel(FLAGS_infer_model); TEST(Analyzer_bert, profile_mkldnn) {
config->DisableFCPadding(); auto use_mkldnn = true;
profile(use_mkldnn);
} }
void profile(bool use_mkldnn = false) { TEST(Analyzer_bert, profile_mkldnn_bf16) {
AnalysisConfig config; auto use_mkldnn = true;
SetConfig(&config); auto use_bfloat16 = true;
profile(use_mkldnn, use_bfloat16);
if (use_mkldnn) {
config.EnableMKLDNN();
config.pass_builder()->AppendPass("fc_mkldnn_pass");
config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass");
config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass");
}
std::vector<std::vector<PaddleTensor>> outputs;
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config),
inputs,
&outputs,
FLAGS_num_threads);
} }
TEST(Analyzer_bert, profile) { profile(); }
#ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_bert, profile_mkldnn) { profile(true); }
#endif #endif
// Check the fuse status // Check the fuse status
TEST(Analyzer_bert, fuse_statis) { TEST(Analyzer_bert, fuse_statis) {
AnalysisConfig cfg; auto cfg(SetConfig());
SetConfig(&cfg);
int num_ops; int num_ops;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg); auto predictor = CreatePaddlePredictor<AnalysisConfig>(cfg);
auto fuse_statis = GetFuseStatis( auto fuse_statis = GetFuseStatis(
...@@ -186,39 +65,25 @@ TEST(Analyzer_bert, fuse_statis) { ...@@ -186,39 +65,25 @@ TEST(Analyzer_bert, fuse_statis) {
LOG(INFO) << "num_ops: " << num_ops; LOG(INFO) << "num_ops: " << num_ops;
} }
// Compare result of NativeConfig and AnalysisConfig TEST(Analyzer_bert, compare) { CompareNativeAndAnalysisWrapper(); }
void compare(bool use_mkldnn = false) {
AnalysisConfig cfg;
SetConfig(&cfg);
if (use_mkldnn) {
cfg.EnableMKLDNN();
}
std::vector<std::vector<PaddleTensor>> inputs;
LoadInputData(&inputs);
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
}
TEST(Analyzer_bert, compare) { compare(); }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
TEST(Analyzer_bert, compare_mkldnn) { compare(true /* use_mkldnn */); } TEST(Analyzer_bert, compare_mkldnn) {
auto use_mkldnn = true;
CompareNativeAndAnalysisWrapper(use_mkldnn);
}
#endif #endif
// Compare Deterministic result // Compare Deterministic result
TEST(Analyzer_bert, compare_determine) { TEST(Analyzer_bert, compare_determine) {
AnalysisConfig cfg; auto cfg(SetConfig());
SetConfig(&cfg);
std::vector<std::vector<PaddleTensor>> inputs; auto inputs = LoadInputData();
LoadInputData(&inputs);
CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg), CompareDeterministic(reinterpret_cast<const PaddlePredictor::Config *>(&cfg),
inputs); inputs);
} }
TEST(Analyzer_bert, transfer_scope_cache) { TEST(Analyzer_bert, transfer_scope_cache) {
AnalysisConfig config; auto config(SetConfig());
SetConfig(&config);
std::vector<PaddleTensor> input, output; std::vector<PaddleTensor> input, output;
auto predictor = CreatePaddlePredictor<AnalysisConfig>(config); auto predictor = CreatePaddlePredictor<AnalysisConfig>(config);
...@@ -236,7 +101,7 @@ TEST(Analyzer_bert, transfer_scope_cache) { ...@@ -236,7 +101,7 @@ TEST(Analyzer_bert, transfer_scope_cache) {
for (int i = 0; i < threads_num; i++) { for (int i = 0; i < threads_num; i++) {
threads.emplace_back([&, i]() { threads.emplace_back([&, i]() {
std::getline(fin, line); std::getline(fin, line);
ParseLine(line, &input); input = ParseInputStreamToVector(line);
predictor->Run(input, &output, FLAGS_batch_size); predictor->Run(input, &output, FLAGS_batch_size);
global_transfer_scope_cache.insert( global_transfer_scope_cache.insert(
&paddle::framework::global_transfer_scope_cache()); &paddle::framework::global_transfer_scope_cache());
...@@ -262,5 +127,141 @@ TEST(Analyzer_bert, transfer_scope_cache) { ...@@ -262,5 +127,141 @@ TEST(Analyzer_bert, transfer_scope_cache) {
"The size of data cache is not equal to thread number.")); "The size of data cache is not equal to thread number."));
} }
void profile(bool use_mkldnn, bool use_bfloat16) {
auto config(SetConfig(use_mkldnn, use_bfloat16));
std::vector<std::vector<PaddleTensor>> outputs;
auto inputs = LoadInputData();
TestPrediction(reinterpret_cast<const PaddlePredictor::Config *>(&config),
inputs,
&outputs,
FLAGS_num_threads);
}
std::vector<std::vector<paddle::PaddleTensor>> LoadInputData() {
if (FLAGS_infer_data.empty()) {
LOG(ERROR) << "please set input data path";
throw "missing input data path";
}
std::ifstream fin(FLAGS_infer_data);
std::string line;
int sample = 0;
std::vector<std::vector<paddle::PaddleTensor>> inputs;
// The unit-test dataset only have 10 samples, each sample have 5 feeds.
while (std::getline(fin, line)) {
inputs.push_back(ParseInputStreamToVector(line));
sample++;
if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break;
}
LOG(INFO) << "number of samples: " << sample;
return inputs;
}
void CompareNativeAndAnalysisWrapper(bool use_mkldnn) {
auto cfg(SetConfig(use_mkldnn));
auto inputs = LoadInputData();
CompareNativeAndAnalysis(
reinterpret_cast<const PaddlePredictor::Config *>(&cfg), inputs);
}
std::vector<paddle::PaddleTensor> ParseInputStreamToVector(
const std::string &line) {
const auto fields = Split<std::string>(line, ';');
if (fields.size() < 5) throw "invalid input line";
std::vector<paddle::PaddleTensor> tensors;
tensors.reserve(5);
const std::size_t src_id = 0;
const std::size_t pos_id = 1;
const std::size_t segment_id = 2;
const std::size_t self_attention_bias = 3;
const std::size_t next_segment_index = 4;
tensors.push_back(ParseTensor<int64_t>(fields[src_id]));
tensors.push_back(ParseTensor<int64_t>(fields[pos_id]));
tensors.push_back(ParseTensor<int64_t>(fields[segment_id]));
tensors.push_back(ParseTensor<float>(fields[self_attention_bias]));
tensors.push_back(ParseTensor<int64_t>(fields[next_segment_index]));
return tensors;
}
AnalysisConfig SetConfig(bool use_mkldnn, bool use_bfloat16) {
AnalysisConfig config;
config.SetModel(FLAGS_infer_model);
config.DisableFCPadding();
if (use_mkldnn) {
config.EnableMKLDNN();
config.pass_builder()->AppendPass("fc_mkldnn_pass");
config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass");
config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass");
}
if (use_bfloat16) config.EnableMkldnnBfloat16();
return config;
}
template <typename T>
paddle::PaddleTensor ParseTensor(const std::string &field) {
const auto data = Split<std::string>(field, ':');
if (data.size() < 2) throw "invalid data field";
std::string shape_str = data[0];
const auto shape = Split<int>(shape_str, ' ');
paddle::PaddleTensor tensor;
tensor.shape = shape;
auto size =
std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int>()) *
sizeof(T);
tensor.data.Resize(size);
std::string mat_str = data[1];
const auto mat = Split<T>(mat_str, ' ');
std::copy(mat.cbegin(), mat.cend(), static_cast<T *>(tensor.data.data()));
tensor.dtype = GetPaddleDType<T>();
return tensor;
}
template <typename T>
std::vector<T> Split(const std::string &line, char separator) {
std::vector<T> result;
std::stringstream ss;
for (auto c : line) {
if (c != separator) {
ss << c;
} else {
result.emplace_back(GetValueFromStream<T>(ss));
ss.str({});
ss.clear();
}
}
auto ss_is_not_empty = !ss.str().empty();
if (ss_is_not_empty) result.emplace_back(GetValueFromStream<T>(ss));
return result;
}
template <typename T>
T GetValueFromStream(std::stringstream &ss) {
T result;
ss >> result;
return result;
}
template <>
std::string GetValueFromStream<std::string>(std::stringstream &ss) {
return ss.str();
}
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册