未验证 提交 93d20c44 编写于 作者: Z ZhouMengLei1999 提交者: GitHub

[XPU] fix bug of AnalyseOpFuncType about xpu op : memcpy_d2d of xpu is actually async (#52042)

上级 67a105f9
......@@ -191,7 +191,9 @@ void DataTranferHelper::RunAndConstructOpFuncNode(
(op_type == kMemcpyD2H ? OpFuncType::kGpuSync : OpFuncType::kGpuAsync);
} else if (platform::is_xpu_place(place)) {
// Memcpy in xpu is synchronous
new_op_func_node.type_ = OpFuncType::kGpuSync;
new_op_func_node.type_ = (op_type == kMemcpyD2H || op_type == kMemcpyH2D)
? OpFuncType::kGpuSync
: OpFuncType::kGpuAsync;
} else {
// Memcpy in npu and custom devices is asynchronous
new_op_func_node.type_ = OpFuncType::kGpuAsync;
......
......@@ -336,6 +336,8 @@ OpFuncType AnalyseOpFuncType(const OpFuncNode& op_func_node,
// and so that they would be dispatched to host thread.
std::shared_ptr<OperatorBase> op = op_func_node.operator_base_;
if (op->Type() == kCoalesceTensor &&
(!platform::is_xpu_place(place) ||
op->Attr<bool>("persist_output") == false) &&
op->Attr<bool>("set_constant") == false &&
op->Attr<bool>("copy_data") == false) {
return OpFuncType::kGpuSync;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册