diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 1723a9a78a0da6e3eac7f823f79fe802a916e5b3..78410c0d094993a4b809fc608fd9b1fe9a28bca2 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -1095,6 +1095,17 @@ Scope* OperatorWithKernel::PrepareData( if (!new_scope) { new_scope = &scope.NewScope(); } + // For inference, if a gpu model has an op which could only run on CPU, + // each result of different input will be the same with the first one. + // The reason is that if a gpu tensor is the input of a cpu kernel, + // we will create a new cpu tensor in new scope. + // However, if enable_cache_runtime_context, we get the cpu tensor each + // time, not the gpu tensor. + // Thus, we set pre_scope_ = nullptr to trigger `new RuntimeContext()` in + // RunImpl(). + if (enable_cache_runtime_context) { + pre_scope_ = nullptr; + } auto* trans_var = new_scope->Var(var_name); input_vars[i] = trans_var; diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 30966772ebfa413b779102d094f4f2ee394bcd59..2fba560ac2e29fd685c6afaee6055fc11ecd75fa 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -98,7 +98,10 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) { "conv_elementwise_add_fuse_pass", // #endif // "transpose_flatten_concat_fuse_pass", + // following two passes should be located in the last, since they will + // work on all fused ops. "expected_kernel_cache_pass", // + "runtime_context_cache_pass" }); use_gpu_ = true; @@ -115,25 +118,26 @@ void GpuPassStrategy::EnableMkldnnQuantizer() { CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) { // NOTE the large fusions should be located in the front, so that they will // not be damaged by smaller ones. - passes_.assign({ - "infer_clean_graph_pass", // - "attention_lstm_fuse_pass", // - "seqconv_eltadd_relu_fuse_pass", // - // "seqpool_concat_fuse_pass", // - // "embedding_fc_lstm_fuse_pass", // - "fc_lstm_fuse_pass", // - "mul_lstm_fuse_pass", // - "fc_gru_fuse_pass", // - "mul_gru_fuse_pass", // - "seq_concat_fc_fuse_pass", // - "fc_fuse_pass", // - "repeated_fc_relu_fuse_pass", // - "squared_mat_sub_fuse_pass", // - "conv_bn_fuse_pass", // - "conv_eltwiseadd_bn_fuse_pass", // - "is_test_pass", // - "expected_kernel_cache_pass", // - }); + passes_.assign({"infer_clean_graph_pass", // + "attention_lstm_fuse_pass", // + "seqconv_eltadd_relu_fuse_pass", // + // "seqpool_concat_fuse_pass", // + // "embedding_fc_lstm_fuse_pass", // + "fc_lstm_fuse_pass", // + "mul_lstm_fuse_pass", // + "fc_gru_fuse_pass", // + "mul_gru_fuse_pass", // + "seq_concat_fc_fuse_pass", // + "fc_fuse_pass", // + "repeated_fc_relu_fuse_pass", // + "squared_mat_sub_fuse_pass", // + "conv_bn_fuse_pass", // + "conv_eltwiseadd_bn_fuse_pass", // + "is_test_pass", // + // following two passes should be located in the last, since + // they will work on all fused ops. + "expected_kernel_cache_pass", // + "runtime_context_cache_pass"}); use_gpu_ = false; } diff --git a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc index 1bb06aa21af55ce298756c1a95f4f95222218b6e..9443b08063b8f61d3d6b291a7217d645d8825c54 100644 --- a/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc +++ b/paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc @@ -110,11 +110,6 @@ void SetConfig(AnalysisConfig *cfg) { if (FLAGS_zero_copy) { cfg->SwitchUseFeedFetchOps(false); } - // Enable runtime_context_cache_pass, disabled by default since it doesn't - // cover all the cases. - // See detail: https://github.com/PaddlePaddle/Paddle/issues/16609 - // https://github.com/PaddlePaddle/Paddle/issues/16841 - cfg->pass_builder()->AppendPass("runtime_context_cache_pass"); } void SetInput(std::vector> *inputs) {