From 6d31dc937704380efe2dee97716c3da47b7060f1 Mon Sep 17 00:00:00 2001 From: Tomasz Socha Date: Tue, 28 Jun 2022 11:01:57 +0200 Subject: [PATCH] Enable Bert on bfloat16 datatype (#43455) * Remove output arguments from functions. Replace pointers with references * Name used bool flags * Reorder functions * Enable bfloat16 data type * Give declarations some space * Style * Style --- .../inference/api/paddle_analysis_config.h | 2 +- .../tests/api/analyzer_bert_tester.cc | 333 +++++++++--------- 2 files changed, 168 insertions(+), 167 deletions(-) diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 74a57cbc26..34e18a407e 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -145,7 +145,7 @@ struct PD_INFER_DECL AnalysisConfig { /// /// \param[in] other another AnalysisConfig /// - explicit AnalysisConfig(const AnalysisConfig& other); + AnalysisConfig(const AnalysisConfig& other); /// /// \brief Construct a new AnalysisConfig from a no-combined model. /// diff --git a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc index 5b33784997..727d2576e5 100644 --- a/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_bert_tester.cc @@ -20,165 +20,44 @@ namespace inference { using paddle::PaddleTensor; -template -void GetValueFromStream(std::stringstream *ss, T *t) { - (*ss) >> (*t); -} +void profile(bool use_mkldnn = false, bool use_bfloat16 = false); +std::vector> LoadInputData(); +void CompareNativeAndAnalysisWrapper(bool use_mkldnn = false); +std::vector ParseInputStreamToVector( + const std::string &line); -template <> -void GetValueFromStream(std::stringstream *ss, std::string *t) { - *t = ss->str(); -} +AnalysisConfig SetConfig(bool use_mkldnn = false, bool use_bfloat16 = false); -// Split string to vector template -void Split(const std::string &line, char sep, std::vector *v) { - std::stringstream ss; - T t; - for (auto c : line) { - if (c != sep) { - ss << c; - } else { - GetValueFromStream(&ss, &t); - v->push_back(std::move(t)); - ss.str({}); - ss.clear(); - } - } - - if (!ss.str().empty()) { - GetValueFromStream(&ss, &t); - v->push_back(std::move(t)); - ss.str({}); - ss.clear(); - } -} +paddle::PaddleTensor ParseTensor(const std::string &field); -// Parse tensor from string template -bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) { - std::vector data; - Split(field, ':', &data); - if (data.size() < 2) return false; - - std::string shape_str = data[0]; - - std::vector shape; - Split(shape_str, ' ', &shape); - - std::string mat_str = data[1]; - - std::vector mat; - Split(mat_str, ' ', &mat); - - tensor->shape = shape; - auto size = - std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * - sizeof(T); - tensor->data.Resize(size); - std::copy(mat.begin(), mat.end(), static_cast(tensor->data.data())); - tensor->dtype = GetPaddleDType(); - - return true; -} - -// Parse input tensors from string -bool ParseLine(const std::string &line, - std::vector *tensors) { - std::vector fields; - Split(line, ';', &fields); - - if (fields.size() < 5) return false; - - tensors->clear(); - tensors->reserve(5); - - int i = 0; - // src_id - paddle::PaddleTensor src_id; - ParseTensor(fields[i++], &src_id); - tensors->push_back(src_id); - - // pos_id - paddle::PaddleTensor pos_id; - ParseTensor(fields[i++], &pos_id); - tensors->push_back(pos_id); - - // segment_id - paddle::PaddleTensor segment_id; - ParseTensor(fields[i++], &segment_id); - tensors->push_back(segment_id); - - // self_attention_bias - paddle::PaddleTensor self_attention_bias; - ParseTensor(fields[i++], &self_attention_bias); - tensors->push_back(self_attention_bias); - - // next_segment_index - paddle::PaddleTensor next_segment_index; - ParseTensor(fields[i++], &next_segment_index); - tensors->push_back(next_segment_index); - - return true; -} - -bool LoadInputData(std::vector> *inputs) { - if (FLAGS_infer_data.empty()) { - LOG(ERROR) << "please set input data path"; - return false; - } +std::vector Split(const std::string &line, char separator); - std::ifstream fin(FLAGS_infer_data); - std::string line; - int sample = 0; +template +T GetValueFromStream(std::stringstream &ss); - // The unit-test dataset only have 10 samples, each sample have 5 feeds. - while (std::getline(fin, line)) { - std::vector feed_data; - ParseLine(line, &feed_data); - inputs->push_back(std::move(feed_data)); - sample++; - if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break; - } - LOG(INFO) << "number of samples: " << sample; +template <> +std::string GetValueFromStream(std::stringstream &ss); - return true; -} +TEST(Analyzer_bert, profile) { profile(); } -void SetConfig(AnalysisConfig *config) { - config->SetModel(FLAGS_infer_model); - config->DisableFCPadding(); +#ifdef PADDLE_WITH_MKLDNN +TEST(Analyzer_bert, profile_mkldnn) { + auto use_mkldnn = true; + profile(use_mkldnn); } -void profile(bool use_mkldnn = false) { - AnalysisConfig config; - SetConfig(&config); - - if (use_mkldnn) { - config.EnableMKLDNN(); - config.pass_builder()->AppendPass("fc_mkldnn_pass"); - config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass"); - config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass"); - } - - std::vector> outputs; - std::vector> inputs; - LoadInputData(&inputs); - TestPrediction(reinterpret_cast(&config), - inputs, - &outputs, - FLAGS_num_threads); +TEST(Analyzer_bert, profile_mkldnn_bf16) { + auto use_mkldnn = true; + auto use_bfloat16 = true; + profile(use_mkldnn, use_bfloat16); } - -TEST(Analyzer_bert, profile) { profile(); } -#ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_bert, profile_mkldnn) { profile(true); } #endif // Check the fuse status TEST(Analyzer_bert, fuse_statis) { - AnalysisConfig cfg; - SetConfig(&cfg); + auto cfg(SetConfig()); int num_ops; auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( @@ -186,39 +65,25 @@ TEST(Analyzer_bert, fuse_statis) { LOG(INFO) << "num_ops: " << num_ops; } -// Compare result of NativeConfig and AnalysisConfig -void compare(bool use_mkldnn = false) { - AnalysisConfig cfg; - SetConfig(&cfg); - if (use_mkldnn) { - cfg.EnableMKLDNN(); - } - - std::vector> inputs; - LoadInputData(&inputs); - CompareNativeAndAnalysis( - reinterpret_cast(&cfg), inputs); -} - -TEST(Analyzer_bert, compare) { compare(); } +TEST(Analyzer_bert, compare) { CompareNativeAndAnalysisWrapper(); } #ifdef PADDLE_WITH_MKLDNN -TEST(Analyzer_bert, compare_mkldnn) { compare(true /* use_mkldnn */); } +TEST(Analyzer_bert, compare_mkldnn) { + auto use_mkldnn = true; + CompareNativeAndAnalysisWrapper(use_mkldnn); +} #endif // Compare Deterministic result TEST(Analyzer_bert, compare_determine) { - AnalysisConfig cfg; - SetConfig(&cfg); + auto cfg(SetConfig()); - std::vector> inputs; - LoadInputData(&inputs); + auto inputs = LoadInputData(); CompareDeterministic(reinterpret_cast(&cfg), inputs); } TEST(Analyzer_bert, transfer_scope_cache) { - AnalysisConfig config; - SetConfig(&config); + auto config(SetConfig()); std::vector input, output; auto predictor = CreatePaddlePredictor(config); @@ -236,7 +101,7 @@ TEST(Analyzer_bert, transfer_scope_cache) { for (int i = 0; i < threads_num; i++) { threads.emplace_back([&, i]() { std::getline(fin, line); - ParseLine(line, &input); + input = ParseInputStreamToVector(line); predictor->Run(input, &output, FLAGS_batch_size); global_transfer_scope_cache.insert( &paddle::framework::global_transfer_scope_cache()); @@ -262,5 +127,141 @@ TEST(Analyzer_bert, transfer_scope_cache) { "The size of data cache is not equal to thread number.")); } +void profile(bool use_mkldnn, bool use_bfloat16) { + auto config(SetConfig(use_mkldnn, use_bfloat16)); + std::vector> outputs; + auto inputs = LoadInputData(); + TestPrediction(reinterpret_cast(&config), + inputs, + &outputs, + FLAGS_num_threads); +} + +std::vector> LoadInputData() { + if (FLAGS_infer_data.empty()) { + LOG(ERROR) << "please set input data path"; + throw "missing input data path"; + } + + std::ifstream fin(FLAGS_infer_data); + std::string line; + int sample = 0; + + std::vector> inputs; + + // The unit-test dataset only have 10 samples, each sample have 5 feeds. + while (std::getline(fin, line)) { + inputs.push_back(ParseInputStreamToVector(line)); + sample++; + if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break; + } + LOG(INFO) << "number of samples: " << sample; + + return inputs; +} + +void CompareNativeAndAnalysisWrapper(bool use_mkldnn) { + auto cfg(SetConfig(use_mkldnn)); + auto inputs = LoadInputData(); + CompareNativeAndAnalysis( + reinterpret_cast(&cfg), inputs); +} + +std::vector ParseInputStreamToVector( + const std::string &line) { + const auto fields = Split(line, ';'); + + if (fields.size() < 5) throw "invalid input line"; + + std::vector tensors; + + tensors.reserve(5); + + const std::size_t src_id = 0; + const std::size_t pos_id = 1; + const std::size_t segment_id = 2; + const std::size_t self_attention_bias = 3; + const std::size_t next_segment_index = 4; + + tensors.push_back(ParseTensor(fields[src_id])); + tensors.push_back(ParseTensor(fields[pos_id])); + tensors.push_back(ParseTensor(fields[segment_id])); + tensors.push_back(ParseTensor(fields[self_attention_bias])); + tensors.push_back(ParseTensor(fields[next_segment_index])); + + return tensors; +} + +AnalysisConfig SetConfig(bool use_mkldnn, bool use_bfloat16) { + AnalysisConfig config; + config.SetModel(FLAGS_infer_model); + config.DisableFCPadding(); + + if (use_mkldnn) { + config.EnableMKLDNN(); + config.pass_builder()->AppendPass("fc_mkldnn_pass"); + config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass"); + config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass"); + } + + if (use_bfloat16) config.EnableMkldnnBfloat16(); + + return config; +} + +template +paddle::PaddleTensor ParseTensor(const std::string &field) { + const auto data = Split(field, ':'); + if (data.size() < 2) throw "invalid data field"; + + std::string shape_str = data[0]; + const auto shape = Split(shape_str, ' '); + paddle::PaddleTensor tensor; + tensor.shape = shape; + auto size = + std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * + sizeof(T); + tensor.data.Resize(size); + + std::string mat_str = data[1]; + const auto mat = Split(mat_str, ' '); + std::copy(mat.cbegin(), mat.cend(), static_cast(tensor.data.data())); + tensor.dtype = GetPaddleDType(); + + return tensor; +} + +template +std::vector Split(const std::string &line, char separator) { + std::vector result; + std::stringstream ss; + for (auto c : line) { + if (c != separator) { + ss << c; + } else { + result.emplace_back(GetValueFromStream(ss)); + ss.str({}); + ss.clear(); + } + } + + auto ss_is_not_empty = !ss.str().empty(); + if (ss_is_not_empty) result.emplace_back(GetValueFromStream(ss)); + + return result; +} + +template +T GetValueFromStream(std::stringstream &ss) { + T result; + ss >> result; + return result; +} + +template <> +std::string GetValueFromStream(std::stringstream &ss) { + return ss.str(); +} + } // namespace inference } // namespace paddle -- GitLab