提交 d6ad8937 编写于 作者: D dingminghui

feat(subgraph): execute cnml compute forward asynchronously

上级 26c8b551
......@@ -125,10 +125,7 @@ class Graph {
que));
#if PRINT_HW_TIME
CNRT_CALL(cnrtPlaceNotifier(notifier_end_, que));
#endif
CNRT_CALL(cnrtSyncQueue(que));
#if PRINT_HW_TIME
CNRT_CALL(cnrtNotifierDuration(notifier_start_, notifier_end_, &hw_time));
hw_time /= 1000.0f;
DLOG(INFO) << "cnml hardware time " << hw_time << "ms" << std::endl;
......
......@@ -79,6 +79,11 @@ class IoCopyMluToHostCompute
CHECK(param.x->target() == TARGET(kMLU));
auto mem_size = param.x->memory_size();
auto* data = param.y->mutable_data(TARGET(kHost), mem_size);
// sync queue to ensure process done
auto& mlu_context = this->ctx_->template As<MLUContext>();
CNRT_CALL(cnrtSyncQueue(mlu_context.exec_queue()));
CopyToHostSync(data, param.x->raw_data(), mem_size);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册