diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index 8875ef74bce14eea9d78434f2a0aa174e8bfa092..54f46e49c4f730904056aa546a0c1d6ce51c7791 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -487,6 +487,14 @@ static void PreparedOpRunImpl( op.Type(), outs, dev_ctx->GetPlace()); } + if (FLAGS_benchmark) { + dev_ctx->Wait(); +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + PADDLE_ENFORCE_GPU_SUCCESS(platform::GpuGetLastError()); + VLOG(4) << "Operator(" << op.Type() << "): context wait and get last error"; +#endif + } + /** * [ Why need handle complex gradient to real gradient? ] *