提交 f0316bdb 编写于 作者: D dzhwinter

"add flags"

上级 42daf4c3
......@@ -543,8 +543,14 @@ void OperatorWithKernel::Run(const Scope& scope,
auto kernel_iter = kernels.find(expected_kernel_key);
kernel_iter->second->Compute(ExecutionContext(
*this, new_scope, *pool.Get(expected_kernel_key.place_)));
auto* new_dev_ctx = pool.Get(expected_kernel_key.place_);
kernel_iter->second->Compute(
ExecutionContext(*this, new_scope, *new_dev_ctx));
/*For profiling/benchmark only*/
if (FLAGS_op_sync) {
new_dev_ctx->Wait();
}
}
proto::DataType OperatorWithKernel::IndicateDataType(
......
......@@ -22,6 +22,10 @@ DEFINE_double(fraction_of_gpu_memory_to_use, 0.92,
"Default use 92% of GPU memory for PaddlePaddle,"
"reserve the rest for page tables, etc");
DEFINE_bool(op_sync, false,
"Default cuda is asynchronous device, set to True will"
"force op run in synchronous mode.");
namespace paddle {
namespace platform {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册