未验证 提交 503b5eb4 编写于 作者: T Tao Luo 提交者: GitHub

Merge pull request #16678 from luotao1/int8_cache

test_analyzer_int8 tests use default pass order
......@@ -87,6 +87,7 @@ const std::vector<std::string> kAnakinSubgraphPasses({
GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
passes_.assign({
"infer_clean_graph_pass", //
"runtime_context_cache_pass", //
// "identity_scale_op_clean_pass", //
"conv_affine_channel_fuse_pass", //
"conv_eltwiseadd_affine_channel_fuse_pass", //
......@@ -96,7 +97,6 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
"conv_elementwise_add_act_fuse_pass", //
"conv_elementwise_add2_act_fuse_pass", //
"conv_elementwise_add_fuse_pass", //
"runtime_context_cache_pass", //
#endif //
"transpose_flatten_concat_fuse_pass",
});
......@@ -117,6 +117,10 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
// not be damaged by smaller ones.
passes_.assign({
"infer_clean_graph_pass", //
// TODO(luotao): runtime_context_cache_pass should be located in the
// front, see https://github.com/PaddlePaddle/Paddle/issues/16609,
// will enhance this pass later.
"runtime_context_cache_pass", //
"attention_lstm_fuse_pass", //
"seqpool_concat_fuse_pass", //
"seqconv_eltadd_relu_fuse_pass", //
......@@ -132,7 +136,6 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
"conv_bn_fuse_pass", //
"conv_eltwiseadd_bn_fuse_pass", //
"is_test_pass", //
"runtime_context_cache_pass", //
});
use_gpu_ = false;
......
......@@ -23,18 +23,11 @@ namespace analysis {
void SetConfig(AnalysisConfig *cfg) {
cfg->SetModel(FLAGS_infer_model);
cfg->SetProgFile("__model__");
cfg->DisableGpu();
cfg->SwitchIrOptim();
cfg->SwitchSpecifyInputNames(false);
cfg->SwitchSpecifyInputNames();
cfg->SetCpuMathLibraryNumThreads(FLAGS_paddle_num_threads);
cfg->EnableMKLDNN();
cfg->pass_builder()->SetPasses(
{"infer_clean_graph_pass", "mkldnn_placement_pass",
"depthwise_conv_mkldnn_pass", "conv_bn_fuse_pass",
"conv_eltwiseadd_bn_fuse_pass", "conv_bias_mkldnn_fuse_pass",
"conv_elementwise_add_mkldnn_fuse_pass", "conv_relu_mkldnn_fuse_pass",
"fc_fuse_pass", "is_test_pass"});
}
template <typename T>
......@@ -84,13 +77,13 @@ std::shared_ptr<std::vector<PaddleTensor>> GetWarmupData(
std::to_string(num_images) + " is bigger than all test data size.");
PaddleTensor images;
images.name = "input";
images.name = "image";
images.shape = {num_images, 3, 224, 224};
images.dtype = PaddleDType::FLOAT32;
images.data.Resize(sizeof(float) * num_images * 3 * 224 * 224);
PaddleTensor labels;
labels.name = "labels";
labels.name = "label";
labels.shape = {num_images, 1};
labels.dtype = PaddleDType::INT64;
labels.data.Resize(sizeof(int64_t) * num_images);
......@@ -132,7 +125,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
images_offset_in_file + sizeof(float) * total_images * 3 * 224 * 224;
TensorReader<float> image_reader(file, images_offset_in_file,
image_batch_shape, "input");
image_batch_shape, "image");
TensorReader<int64_t> label_reader(file, labels_offset_in_file,
label_batch_shape, "label");
......
......@@ -316,7 +316,8 @@ void PredictionRun(PaddlePredictor *predictor,
int num_threads, int tid) {
int num_times = FLAGS_repeat;
int iterations = inputs.size(); // process the whole dataset ...
if (FLAGS_iterations > 0 && FLAGS_iterations < inputs.size())
if (FLAGS_iterations > 0 &&
FLAGS_iterations < static_cast<int64_t>(inputs.size()))
iterations =
FLAGS_iterations; // ... unless the number of iterations is set
outputs->resize(iterations);
......@@ -329,14 +330,14 @@ void PredictionRun(PaddlePredictor *predictor,
#endif
if (!FLAGS_zero_copy) {
run_timer.tic();
for (size_t i = 0; i < iterations; i++) {
for (int i = 0; i < iterations; i++) {
for (int j = 0; j < num_times; j++) {
predictor->Run(inputs[i], &(*outputs)[i], FLAGS_batch_size);
}
}
elapsed_time = run_timer.toc();
} else {
for (size_t i = 0; i < iterations; i++) {
for (int i = 0; i < iterations; i++) {
ConvertPaddleTensorToZeroCopyTensor(predictor, inputs[i]);
run_timer.tic();
for (int j = 0; j < num_times; j++) {
......@@ -366,9 +367,8 @@ void TestOneThreadPrediction(
const std::vector<std::vector<PaddleTensor>> &inputs,
std::vector<std::vector<PaddleTensor>> *outputs, bool use_analysis = true) {
auto predictor = CreateTestPredictor(config, use_analysis);
PredictionWarmUp(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads,
0);
PredictionRun(predictor.get(), inputs, outputs, FLAGS_paddle_num_threads, 0);
PredictionWarmUp(predictor.get(), inputs, outputs, 1, 0);
PredictionRun(predictor.get(), inputs, outputs, 1, 0);
}
void TestMultiThreadPrediction(
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册