// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/inference/tests/api/tester_helper.h" namespace paddle { namespace inference { using paddle::PaddleTensor; template void GetValueFromStream(std::stringstream *ss, T *t) { (*ss) >> (*t); } template <> void GetValueFromStream(std::stringstream *ss, std::string *t) { *t = ss->str(); } // Split string to vector template void Split(const std::string &line, char sep, std::vector *v) { std::stringstream ss; T t; for (auto c : line) { if (c != sep) { ss << c; } else { GetValueFromStream(&ss, &t); v->push_back(std::move(t)); ss.str({}); ss.clear(); } } if (!ss.str().empty()) { GetValueFromStream(&ss, &t); v->push_back(std::move(t)); ss.str({}); ss.clear(); } } // Parse tensor from string template bool ParseTensor(const std::string &field, paddle::PaddleTensor *tensor) { std::vector data; Split(field, ':', &data); if (data.size() < 2) return false; std::string shape_str = data[0]; std::vector shape; Split(shape_str, ' ', &shape); std::string mat_str = data[1]; std::vector mat; Split(mat_str, ' ', &mat); tensor->shape = shape; auto size = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies()) * sizeof(T); tensor->data.Resize(size); std::copy(mat.begin(), mat.end(), static_cast(tensor->data.data())); tensor->dtype = GetPaddleDType(); return true; } // Parse input tensors from string bool ParseLine(const std::string &line, std::vector *tensors) { std::vector fields; Split(line, ';', &fields); if (fields.size() < 5) return false; tensors->clear(); tensors->reserve(5); int i = 0; // src_id paddle::PaddleTensor src_id; ParseTensor(fields[i++], &src_id); tensors->push_back(src_id); // pos_id paddle::PaddleTensor pos_id; ParseTensor(fields[i++], &pos_id); tensors->push_back(pos_id); // segment_id paddle::PaddleTensor segment_id; ParseTensor(fields[i++], &segment_id); tensors->push_back(segment_id); // self_attention_bias paddle::PaddleTensor self_attention_bias; ParseTensor(fields[i++], &self_attention_bias); tensors->push_back(self_attention_bias); // next_segment_index paddle::PaddleTensor next_segment_index; ParseTensor(fields[i++], &next_segment_index); tensors->push_back(next_segment_index); return true; } bool LoadInputData(std::vector> *inputs) { if (FLAGS_infer_data.empty()) { LOG(ERROR) << "please set input data path"; return false; } std::ifstream fin(FLAGS_infer_data); std::string line; int sample = 0; // The unit-test dataset only have 10 samples, each sample have 5 feeds. while (std::getline(fin, line)) { std::vector feed_data; ParseLine(line, &feed_data); inputs->push_back(std::move(feed_data)); sample++; if (!FLAGS_test_all_data && sample == FLAGS_batch_size) break; } LOG(INFO) << "number of samples: " << sample; return true; } void SetConfig(AnalysisConfig *config) { config->SetModel(FLAGS_infer_model); } void profile(bool use_mkldnn = false, bool use_ngraph = false) { AnalysisConfig config; SetConfig(&config); if (use_mkldnn) { config.EnableMKLDNN(); config.pass_builder()->AppendPass("fc_mkldnn_pass"); } if (use_ngraph) { config.EnableNgraph(); } std::vector> outputs; std::vector> inputs; LoadInputData(&inputs); TestPrediction(reinterpret_cast(&config), inputs, &outputs, FLAGS_num_threads); } TEST(Analyzer_bert, profile) { profile(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_bert, profile_mkldnn) { profile(true, false); } #endif #ifdef PADDLE_WITH_NGRAPH TEST(Analyzer_bert, profile_ngraph) { profile(false, true); } #endif // Check the fuse status TEST(Analyzer_bert, fuse_statis) { AnalysisConfig cfg; SetConfig(&cfg); int num_ops; auto predictor = CreatePaddlePredictor(cfg); auto fuse_statis = GetFuseStatis( static_cast(predictor.get()), &num_ops); LOG(INFO) << "num_ops: " << num_ops; } // Compare result of NativeConfig and AnalysisConfig void compare(bool use_mkldnn = false, bool use_ngraph = false) { AnalysisConfig cfg; SetConfig(&cfg); if (use_mkldnn) { cfg.EnableMKLDNN(); cfg.pass_builder()->AppendPass("fc_mkldnn_pass"); } if (use_ngraph) { cfg.EnableNgraph(); } std::vector> inputs; LoadInputData(&inputs); CompareNativeAndAnalysis( reinterpret_cast(&cfg), inputs); } TEST(Analyzer_bert, compare) { compare(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_bert, compare_mkldnn) { compare(true, false /* use_mkldnn, no use_ngraph */); } #endif #ifdef PADDLE_WITH_NGRAPH TEST(Analyzer_bert, compare_ngraph) { compare(false, true /* no use_mkldnn, use_ngraph */); } #endif // Compare Deterministic result TEST(Analyzer_bert, compare_determine) { AnalysisConfig cfg; SetConfig(&cfg); std::vector> inputs; LoadInputData(&inputs); CompareDeterministic(reinterpret_cast(&cfg), inputs); } void verify_transfer_scope_cache(bool is_static = false) { AnalysisConfig config; SetConfig(&config); std::vector input, output; auto predictor = CreatePaddlePredictor(config); int threads_num = 10; std::vector threads; std::unordered_set *> global_transfer_scope_cache; std::unordered_set *> global_transfer_data_cache; std::ifstream fin(FLAGS_infer_data); std::string line; for (int i = 0; i < threads_num; i++) { threads.emplace_back([&, i]() { std::getline(fin, line); ParseLine(line, &input); #ifdef PADDLE_WITH_MKLDNN // Use static method to handle transfer_scope_cache() // TODO(intel) explicit session id setting will be deprecated. if (is_static) platform::set_cur_mkldnn_session_id(1); #endif predictor->Run(input, &output, FLAGS_batch_size); global_transfer_scope_cache.insert( &paddle::framework::global_transfer_scope_cache()); global_transfer_data_cache.insert( &paddle::framework::global_transfer_data_cache()); }); threads[0].join(); threads.clear(); std::vector().swap(input); } #ifdef PADDLE_WITH_MKLDNN if (is_static) { // Use static method to do transfer_scope_cache() instead of thread_local // so paddle::framework::global_transfer_data_cache() should be 1 PADDLE_ENFORCE(global_transfer_scope_cache.size(), 1); PADDLE_ENFORCE(global_transfer_data_cache.size(), 1); } else { #endif // Since paddle::framework::global_transfer_scope_cache() and // paddle::framework::global_transfer_data_cache() are thread_local, // their pointer should be different among different thread id. PADDLE_ENFORCE(global_transfer_scope_cache.size(), threads_num); PADDLE_ENFORCE(global_transfer_data_cache.size(), threads_num); #ifdef PADDLE_WITH_MKLDNN } #endif } TEST(Analyzer_bert, threadlocal_transfer_scope_cache) { verify_transfer_scope_cache(); } #ifdef PADDLE_WITH_MKLDNN TEST(Analyzer_bert, static_transfer_scope_cache) { verify_transfer_scope_cache(true); } #endif } // namespace inference } // namespace paddle