未验证 提交 2259ced1 编写于 作者: J JZ-LIANG 提交者: GitHub

[AutoParallel-Performance] AMP Flag Memcpy support newexe Overlap (#49219)

* memcpy overlap

* memcpy newexe
上级 fd1730e4
......@@ -320,6 +320,11 @@ OpFuncType AnalyseOpFuncType(const OpFuncNode& op_func_node,
return OpFuncType::kGpuSync;
}
// for memcpy explicitly called by user
if (platform::is_gpu_place(place) && op->Type() == interpreter::kMemcpyD2H) {
return OpFuncType::kGpuSync;
}
if (op->Type() == "shape") {
return OpFuncType::kGpuSync;
}
......
......@@ -663,8 +663,6 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
# TODO to support CUDAPinned/NPU/XPU Places
if direction == "D2H":
dst_place_type = 0
elif direction == "D2H":
dst_place_type = 1
else:
raise NotImplementedError(
"direction [{}] is not supported yet.".format(direction)
......@@ -673,7 +671,7 @@ def _insert_memcopy(block, idx, src_var, dist_context, direction="D2H"):
attrs = {'dst_place_type': dst_place_type}
new_op = block._insert_op_without_sync(
index=idx,
type='memcpy',
type='memcpy_d2h',
inputs={'X': [src_var]},
outputs={'Out': [output_var]},
attrs=attrs,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册