// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { using paddle::PaddleTensor; void profile(bool use_mkldnn = false, bool use_bfloat16 = false); std::vector> LoadInputData(); void CompareNativeAndAnalysisWrapper(bool use_mkldnn = false); std::vector ParseInputStreamToVector( const std::string &line); AnalysisConfig SetConfig(bool use_mkldnn = false, bool use_bfloat16 = false); template paddle::PaddleTensor ParseTensor(const std::string &field); template std::vector Split(const std::string &line, char separator); template T GetValueFromStream(std::stringstream &ss); template <> std::string GetValueFromStream(std::stringstream &ss); TEST(Analyzer_bert, profile) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); #endif profile(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_bert, profile_mkldnn) { auto use_mkldnn = true; profile(use_mkldnn); } TEST(Analyzer_bert, profile_mkldnn_bf16) { auto use_mkldnn = true; auto use_bfloat16 = true; profile(use_mkldnn, use_bfloat16); } #endif // Check the fuse status TEST(Analyzer_bert, fuse_statis) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); #endif auto cfg(SetConfig()); int num_ops; auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); LOG(INFO) << "num_ops: " << num_ops; } TEST(Analyzer_bert, compare) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); #endif CompareNativeAndAnalysisWrapper(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_bert, compare_mkldnn) { auto use_mkldnn = true; CompareNativeAndAnalysisWrapper(use_mkldnn); } #endif // Compare Deterministic result TEST(Analyzer_bert, compare_determine) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); #endif auto cfg(SetConfig()); auto inputs = LoadInputData(); CompareDeterministic(reinterpret_cast(&cfg), inputs); } TEST(Analyzer_bert, transfer_scope_cache) { #if !defined(_WIN32) setenv("NVIDIA_TF32_OVERRIDE", "0", 1); #endif auto config(SetConfig()); std::vector input, output; auto predictor = CreatePaddlePredictor(config); int threads_num = 10; std::vector threads; std::unordered_set *> global_transfer_scope_cache; std::unordered_set *> global_transfer_data_cache; std::ifstream fin(FLAGS_infer_data); std::string line; for (int i = 0; i < threads_num; i++) { threads.emplace_back([&, i]() { std::getline(fin, line); input = ParseInputStreamToVector(line); predictor->Run(input, &output, FLAGS_batch_size); global_transfer_scope_cache.insert( &paddle::framework::global_transfer_scope_cache()); global_transfer_data_cache.insert( &paddle::framework::global_transfer_data_cache()); }); threads[0].join(); threads.clear(); std::vector().swap(input); } // Since paddle::framework::global_transfer_scope_cache() and // paddle::framework::global_transfer_data_cache() are thread_local, // their pointer should be different among different thread id. PADDLE_ENFORCE_EQ( global_transfer_scope_cache.size(), threads_num, paddle::platform::errors::Fatal( "The size of scope cache is not equal to thread number.")); PADDLE_ENFORCE_EQ( global_transfer_data_cache.size(), threads_num, paddle::platform::errors::Fatal( "The size of data cache is not equal to thread number.")); } void profile(bool use_mkldnn, bool use_bfloat16) { auto config(SetConfig(use_mkldnn, use_bfloat16)); std::vector> outputs; auto inputs = LoadInputData(); TestPrediction(reinterpret_cast(&config), inputs, &outputs, FLAGS_num_threads); } std::vector> LoadInputData() { if (FLAGS_infer_data.empty()) { LOG(ERROR) << "please set input data path"; throw "missing input data path"; } std::ifstream fin(FLAGS_infer_data); std::string line; int sample = 0; std::vector> inputs; // The unit-test dataset only have 10 samples, each sample have 5 feeds. while (std::getline(fin, line)) { inputs.push_back(ParseInputStreamToVector(line)); sample++; if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break; } LOG(INFO) << "number of samples: " << sample; return inputs; } void CompareNativeAndAnalysisWrapper(bool use_mkldnn) { auto cfg(SetConfig(use_mkldnn)); auto inputs = LoadInputData(); CompareNativeAndAnalysis( reinterpret_cast(&cfg), inputs); } std::vector ParseInputStreamToVector( const std::string &line) { const auto fields = Split(line, ';'); if (fields.size() < 5) throw "invalid input line"; std::vector tensors; tensors.reserve(5); const std::size_t src_id = 0; const std::size_t pos_id = 1; const std::size_t segment_id = 2; const std::size_t self_attention_bias = 3; const std::size_t next_segment_index = 4; tensors.push_back(ParseTensor(fields[src_id])); tensors.push_back(ParseTensor(fields[pos_id])); tensors.push_back(ParseTensor(fields[segment_id])); tensors.push_back(ParseTensor(fields[self_attention_bias])); tensors.push_back(ParseTensor(fields[next_segment_index])); return tensors; } AnalysisConfig SetConfig(bool use_mkldnn, bool use_bfloat16) { AnalysisConfig config; config.SetModel(FLAGS_infer_model); config.DisableFCPadding(); if (use_mkldnn) { config.EnableMKLDNN(); config.pass_builder()->AppendPass("fc_mkldnn_pass"); config.pass_builder()->AppendPass("fc_act_mkldnn_fuse_pass"); config.pass_builder()->AppendPass("fc_elementwise_add_mkldnn_fuse_pass"); } if (use_bfloat16) config.EnableMkldnnBfloat16(); return config; } template paddle::PaddleTensor ParseTensor(const std::string &field) { const auto data = Split(field, ':'); if (data.size() < 2) throw "invalid data field"; std::string shape_str = data[0]; const auto shape = Split(shape_str, ' '); paddle::PaddleTensor tensor; tensor.shape = shape; auto size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * sizeof(T); tensor.data.Resize(size); std::string mat_str = data[1]; const auto mat = Split(mat_str, ' '); std::copy(mat.cbegin(), mat.cend(), static_cast(tensor.data.data())); tensor.dtype = GetPaddleDType(); return tensor; } template std::vector Split(const std::string &line, char separator) { std::vector result; std::stringstream ss; for (auto c : line) { if (c != separator) { ss << c; } else { result.emplace_back(GetValueFromStream(ss)); ss.str({}); ss.clear(); } } auto ss_is_not_empty = !ss.str().empty(); if (ss_is_not_empty) result.emplace_back(GetValueFromStream(ss)); return result; } template T GetValueFromStream(std::stringstream &ss) { T result; ss >> result; return result; } template <> std::string GetValueFromStream(std::stringstream &ss) { return ss.str(); } } // namespace inference } // namespace paddle