未验证 提交 503b5eb4 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #16678 from luotao1/int8_cache

test_analyzer_int8 tests use default pass order
...@@ -87,6 +87,7 @@ const std::vector<std::string> kAnakinSubgraphPasses({ ...@@ -87,6 +87,7 @@ const std::vector<std::string> kAnakinSubgraphPasses({
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({ passes_.assign({
"infer_clean_graph_pass", // "infer_clean_graph_pass", //
"runtime_context_cache_pass", //
// "identity_scale_op_clean_pass", // // "identity_scale_op_clean_pass", //
"conv_affine_channel_fuse_pass", // "conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", // "conv_eltwiseadd_affine_channel_fuse_pass", //
...@@ -96,7 +97,6 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { ...@@ -96,7 +97,6 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_act_fuse_pass", // "conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", // "conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", // "conv_elementwise_add_fuse_pass", //
"runtime_context_cache_pass", //
#endif // #endif //
"transpose_flatten_concat_fuse_pass", "transpose_flatten_concat_fuse_pass",
}); });
...@@ -117,6 +117,10 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { ...@@ -117,6 +117,10 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// not be damaged by smaller ones. // not be damaged by smaller ones.
passes_.assign({ passes_.assign({
"infer_clean_graph_pass", // "infer_clean_graph_pass", //
// TODO(luotao): runtime_context_cache_pass should be located in the
// front, see https://github.com/PaddlePaddle/Paddle/issues/16609,
// will enhance this pass later.
"runtime_context_cache_pass", //
"attention_lstm_fuse_pass", // "attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", // "seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", // "seqconv_eltadd_relu_fuse_pass", //
...@@ -132,7 +136,6 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { ...@@ -132,7 +136,6 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
"conv_bn_fuse_pass", // "conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", // "conv_eltwiseadd_bn_fuse_pass", //
"is_test_pass", // "is_test_pass", //
"runtime_context_cache_pass", //
}); });
use_gpu_ = false; use_gpu_ = false;
......
...@@ -23,18 +23,11 @@ namespace analysis { ...@@ -23,18 +23,11 @@ namespace analysis {
void SetConfig(AnalysisConfig *cfg) { void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model); cfg->SetModel(FLAGS_infer_model);
cfg->SetProgFile("__model__");
cfg->DisableGpu(); cfg->DisableGpu();
cfg->SwitchIrOptim(); cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames(false); cfg->SwitchSpecifyInputNames();
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads); cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
cfg->EnableMKLDNN(); cfg->EnableMKLDNN();
cfg->pass_builder()->SetPasses(
{"infer_clean_graph_pass", "mkldnn_placement_pass",
"depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass",
"conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass",
"conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass",
"fc_fuse_pass", "is_test_pass"});
} }
template <typename T> template <typename T>
...@@ -84,13 +77,13 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData( ...@@ -84,13 +77,13 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
std::to_string(num_images) + " is bigger than all test data size."); std::to_string(num_images) + " is bigger than all test data size.");
PaddleTensor images; PaddleTensor images;
images.name = "input"; images.name = "image";
images.shape = {num_images, 3, 224, 224}; images.shape = {num_images, 3, 224, 224};
images.dtype = PaddleDType::FLOAT32; images.dtype = PaddleDType::FLOAT32;
images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224); images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224);
PaddleTensor labels; PaddleTensor labels;
labels.name = "labels"; labels.name = "label";
labels.shape = {num_images, 1}; labels.shape = {num_images, 1};
labels.dtype = PaddleDType::INT64; labels.dtype = PaddleDType::INT64;
labels.data.Resize(sizeof(int64_t) * num_images); labels.data.Resize(sizeof(int64_t) * num_images);
...@@ -132,7 +125,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs, ...@@ -132,7 +125,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224; images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224;
TensorReader<float> image_reader(file, images_offset_in_file, TensorReader<float> image_reader(file, images_offset_in_file,
image_batch_shape, "input"); image_batch_shape, "image");
TensorReader<int64_t> label_reader(file, labels_offset_in_file, TensorReader<int64_t> label_reader(file, labels_offset_in_file,
label_batch_shape, "label"); label_batch_shape, "label");
......
...@@ -316,7 +316,8 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -316,7 +316,8 @@ void PredictionRun(PaddlePredictor *predictor,
int num_threads, int tid) { int num_threads, int tid) {
int num_times = FLAGS_repeat; int num_times = FLAGS_repeat;
int iterations = inputs.size(); // process the whole dataset ... int iterations = inputs.size(); // process the whole dataset ...
if (FLAGS_iterations > 0 && FLAGS_iterations < inputs.size()) if (FLAGS_iterations > 0 &&
FLAGS_iterations < static_cast<int64_t>(inputs.size()))
iterations = iterations =
FLAGS_iterations; // ... unless the number of iterations is set FLAGS_iterations; // ... unless the number of iterations is set
outputs->resize(iterations); outputs->resize(iterations);
...@@ -329,14 +330,14 @@ void PredictionRun(PaddlePredictor *predictor, ...@@ -329,14 +330,14 @@ void PredictionRun(PaddlePredictor *predictor,
#endif #endif
if (!FLAGS_zero_copy) { if (!FLAGS_zero_copy) {
run_timer.tic(); run_timer.tic();
for (size_t i = 0; i < iterations; i++) { for (int i = 0; i < iterations; i++) {
for (int j = 0; j < num_times; j++) { for (int j = 0; j < num_times; j++) {
predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size); predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size);
} }
} }
elapsed_time = run_timer.toc(); elapsed_time = run_timer.toc();
} else { } else {
for (size_t i = 0; i < iterations; i++) { for (int i = 0; i < iterations; i++) {
ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]); ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]);
run_timer.tic(); run_timer.tic();
for (int j = 0; j < num_times; j++) { for (int j = 0; j < num_times; j++) {
...@@ -366,9 +367,8 @@ void TestOneThreadPrediction( ...@@ -366,9 +367,8 @@ void TestOneThreadPrediction(
const std::vector<std::vector<PaddleTensor>> &inputs, const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) { std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
auto predictor = CreateTestPredictor(config, use_analysis); auto predictor = CreateTestPredictor(config, use_analysis);
PredictionWarmUp(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
0); PredictionRun(predictor.get(), inputs, outputs, 1, 0);
PredictionRun(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, 0);
} }
void TestMultiThreadPrediction( void TestMultiThreadPrediction(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册