diff --git a/paddle/framework/operator.cc b/paddle/framework/operator.cc index 35ebe48ba682f135b7f85edb3b2999db7c29e51a..c1a6d0221baa1238adb4efa46b1f071cf2efbaf7 100644 --- a/paddle/framework/operator.cc +++ b/paddle/framework/operator.cc @@ -543,8 +543,14 @@ void OperatorWithKernel::Run(const Scope& scope, auto kernel_iter = kernels.find(expected_kernel_key); - kernel_iter->second->Compute(ExecutionContext( - *this, new_scope, *pool.Get(expected_kernel_key.place_))); + auto* new_dev_ctx = pool.Get(expected_kernel_key.place_); + kernel_iter->second->Compute( + ExecutionContext(*this, new_scope, *new_dev_ctx)); + + /*For profiling/benchmark only*/ + if (FLAGS_op_sync) { + new_dev_ctx->Wait(); + } } proto::DataType OperatorWithKernel::IndicateDataType( diff --git a/paddle/platform/gpu_info.cc b/paddle/platform/gpu_info.cc index 7037551d7544d6fea54e2f4bf887309b7dc5a52e..9d3147362ab18cc37dbf75b85c71cf9a307a16cf 100644 --- a/paddle/platform/gpu_info.cc +++ b/paddle/platform/gpu_info.cc @@ -22,6 +22,10 @@ DEFINE_double(fraction_of_gpu_memory_to_use, 0.92, "Default use 92% of GPU memory for PaddlePaddle," "reserve the rest for page tables, etc"); +DEFINE_bool(op_sync, false, + "Default cuda is asynchronous device, set to True will" + "force op run in synchronous mode."); + namespace paddle { namespace platform {