Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
03533b0c
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
03533b0c
编写于
4月 19, 2022
作者:
F
fwenguang
提交者:
GitHub
4月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[MLU] support add callback to stream (#41831)
上级
bb71d834
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
32 addition
and
28 deletion
+32
-28
paddle/fluid/framework/data_device_transform.cc
paddle/fluid/framework/data_device_transform.cc
+0
-8
paddle/fluid/operators/pool_op_mlu.cc
paddle/fluid/operators/pool_op_mlu.cc
+10
-5
paddle/fluid/platform/device/mlu/mlu_stream.h
paddle/fluid/platform/device/mlu/mlu_stream.h
+0
-1
paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc
paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc
+1
-0
paddle/fluid/platform/stream_callback_manager.cc
paddle/fluid/platform/stream_callback_manager.cc
+2
-4
paddle/phi/common/backend.h
paddle/phi/common/backend.h
+6
-0
paddle/phi/core/compat/convert_utils.cc
paddle/phi/core/compat/convert_utils.cc
+2
-0
python/paddle/fluid/dygraph/varbase_patch_methods.py
python/paddle/fluid/dygraph/varbase_patch_methods.py
+2
-1
tools/dockerfile/Dockerfile.mlu
tools/dockerfile/Dockerfile.mlu
+9
-9
未找到文件。
paddle/fluid/framework/data_device_transform.cc
浏览文件 @
03533b0c
...
...
@@ -34,14 +34,6 @@ void TransDataDevice(const Tensor &in, const platform::Place &dst_place,
return
;
}
// NOTE(hqp): Special case for CPU->MLU, avoid stream sync.
if
(
platform
::
is_cpu_place
(
in
.
place
())
&&
platform
::
is_mlu_place
(
dst_place
))
{
paddle
::
framework
::
TensorCopy
(
in
,
dst_place
,
*
platform
::
DeviceContextPool
::
Instance
().
Get
(
dst_place
),
out
);
return
;
}
// NOTE(yy): TransDataDevice should wait for computation of input.
if
(
!
platform
::
is_cuda_pinned_place
(
in
.
place
()))
{
platform
::
DeviceContextPool
::
Instance
().
Get
(
in
.
place
())
->
Wait
();
...
...
paddle/fluid/operators/pool_op_mlu.cc
浏览文件 @
03533b0c
...
...
@@ -116,11 +116,16 @@ class MLUPoolOpKernel : public framework::OpKernel<T> {
framework
::
Tensor
extra_device_tensor
=
ctx
.
AllocateTmpTensor
<
int8_t
,
MLUDeviceContext
>
(
{
static_cast
<
int64_t
>
(
extra_input_size
)},
dev_ctx
);
// TODO(fwg): use Async copy, and add a callback to stream that free
// host
// memory.
framework
::
TensorCopySync
(
extra_host_tensor
,
ctx
.
GetPlace
(),
framework
::
TensorCopy
(
extra_host_tensor
,
ctx
.
GetPlace
(),
&
extra_device_tensor
);
// Increase extra_host_tensor holder_ reference count until copy
// complete.
auto
increase_ref_count
=
[
extra_host_tensor
]()
{
VLOG
(
4
)
<<
"Finished copying extra_host_tensor["
<<
GetBasePtr
(
&
extra_host_tensor
)
<<
"] in mlu pooling kernel."
;
};
dev_ctx
.
AddStreamCallback
(
increase_ref_count
);
MLUCnnl
::
PoolingForward
(
ctx
,
pool_mode
,
out_h
,
out_w
,
pool_desc
.
get
(),
nullptr
/*alpha*/
,
in_x_desc
.
get
(),
GetBasePtr
(
in_x
),
nullptr
/*beta*/
,
...
...
paddle/fluid/platform/device/mlu/mlu_stream.h
浏览文件 @
03533b0c
...
...
@@ -40,7 +40,6 @@ class MLUStream final {
template
<
typename
Callback
>
void
AddCallback
(
Callback
&&
callback
)
const
{
// TODO(mlu): mlu not support AddCallback
callback_manager_
->
AddCallback
(
callback
);
}
...
...
paddle/fluid/platform/profiler/mlu/cnpapi_data_process.cc
浏览文件 @
03533b0c
...
...
@@ -202,6 +202,7 @@ CnpapiRuntimeCbidStr::CnpapiRuntimeCbidStr() {
REGISTER_RUNTIME_CBID_STR
(
cnCtxSetCurrent
);
REGISTER_RUNTIME_CBID_STR
(
cnCtxGetDevice
);
REGISTER_RUNTIME_CBID_STR
(
cnCtxSync
);
REGISTER_RUNTIME_CBID_STR
(
cnInvokeHostFunc
);
#undef REGISTER_RUNTIME_CBID_STR
}
...
...
paddle/fluid/platform/stream_callback_manager.cc
浏览文件 @
03533b0c
...
...
@@ -80,10 +80,8 @@ void StreamCallbackManager<Stream>::AddCallback(
#endif
#if PADDLE_WITH_MLU
VLOG
(
3
)
<<
"MLULaunchCallback at stream: "
<<
stream_
<<
" Failed to call MLULaunchCallback, "
<<
"because mlu not support StreamAddCallback yet. "
<<
"function: "
<<
func
;
VLOG
(
3
)
<<
"MLULaunchCallback at stream: "
<<
stream_
;
cnrtInvokeHostFunc
(
stream_
,
StreamCallbackFunc
,
func
);
#endif
}
...
...
paddle/phi/common/backend.h
浏览文件 @
03533b0c
...
...
@@ -47,6 +47,7 @@ enum class Backend : uint8_t {
GPU
,
XPU
,
// XPU currently does not exist at the same time as CUDA
NPU
,
// NPU currently does not exist at the same time as CUDA
MLU
,
// MLU currently does not exist at the same time as CUDA
// the third library backend
MKLDNN
,
...
...
@@ -114,6 +115,9 @@ inline std::ostream& operator<<(std::ostream& os, Backend backend) {
case
Backend
::
NPU
:
os
<<
"NPU"
;
break
;
case
Backend
::
MLU
:
os
<<
"MLU"
;
break
;
case
Backend
::
MKLDNN
:
os
<<
"MKLDNN"
;
break
;
...
...
@@ -154,6 +158,8 @@ inline Backend StringToBackend(const char* backend_cstr) {
return
Backend
::
XPU
;
}
else
if
(
s
==
std
::
string
(
"NPU"
))
{
return
Backend
::
NPU
;
}
else
if
(
s
==
std
::
string
(
"MLU"
))
{
return
Backend
::
MLU
;
}
else
if
(
s
==
std
::
string
(
"MKLDNN"
))
{
return
Backend
::
MKLDNN
;
}
else
if
(
s
==
std
::
string
(
"GPUDNN"
))
{
...
...
paddle/phi/core/compat/convert_utils.cc
浏览文件 @
03533b0c
...
...
@@ -40,6 +40,8 @@ Backend TransToPhiBackend(const phi::Place& place) {
return
Backend
::
NPU
;
}
else
if
(
allocation_type
==
phi
::
AllocationType
::
IPU
)
{
return
Backend
::
IPU
;
}
else
if
(
allocation_type
==
phi
::
AllocationType
::
MLU
)
{
return
Backend
::
MLU
;
}
else
if
(
allocation_type
==
phi
::
AllocationType
::
CUSTOM
)
{
return
static_cast
<
Backend
>
(
static_cast
<
size_t
>
(
Backend
::
NUM_BACKENDS
)
+
...
...
python/paddle/fluid/dygraph/varbase_patch_methods.py
浏览文件 @
03533b0c
...
...
@@ -271,7 +271,8 @@ def monkey_patch_varbase():
if
_grad_scalar
:
# When using amp with Fleet DistributedStrategy, we do loss scaling implicitly.
self
=
_grad_scalar
.
scale
(
self
)
if
paddle
.
is_compiled_with_xpu
()
or
paddle
.
is_compiled_with_npu
():
if
paddle
.
is_compiled_with_xpu
()
or
paddle
.
is_compiled_with_npu
(
)
or
paddle
.
is_compiled_with_mlu
():
# TODO(liuyuhui): Currently only for xpu. Will be removed in the future.
scaled_loss
=
scale_loss
(
self
)
if
framework
.
_in_eager_mode_
:
...
...
tools/dockerfile/Dockerfile.mlu
浏览文件 @
03533b0c
...
...
@@ -2,9 +2,9 @@
# Update CNTOOLKIT_VERSION, CNNL_VERSION and CNCL_VERSION if using other versions
#
# Build:
# - CNTOOLKIT_VERSION 2.
6.5
-1
# - CNNL_VERSION 1.
8
.3-1
# - CNCL_VERSION 1.0.
2
-1
# - CNTOOLKIT_VERSION 2.
8.1
-1
# - CNNL_VERSION 1.
9
.3-1
# - CNCL_VERSION 1.0.
4
-1
#
# Download three packages from FTP (need to connect cambricon AE to get FTP url)
# - cntoolkit_2.6.5-1.ubuntu18.04_amd64.deb
...
...
@@ -21,9 +21,9 @@
# (get cncl pkg)
#
# docker build -f Dockerfile.mlu \
# --build-arg CNTOOLKIT_VERSION=2.
6.5
-1 \
# --build-arg CNNL_VERSION=1.
8
.3-1 \
# --build-arg CNCL_VERSION=1.0.
2
-1 \
# --build-arg CNTOOLKIT_VERSION=2.
8.1
-1 \
# --build-arg CNNL_VERSION=1.
9
.3-1 \
# --build-arg CNCL_VERSION=1.0.
4
-1 \
# -t paddlepaddle/paddle:latest-dev-mlu .
#
# without mlu device:
...
...
@@ -40,9 +40,9 @@ MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
ENV WITH_GPU=OFF
ARG CNTOOLKIT_VERSION=2.
6.5
-1
ARG CNNL_VERSION=1.
8
.3-1
ARG CNCL_VERSION=1.0.
2
-1
ARG CNTOOLKIT_VERSION=2.
8.1
-1
ARG CNNL_VERSION=1.
9
.3-1
ARG CNCL_VERSION=1.0.
4
-1
ARG CNTOOLKIT_PKG=cntoolkit_$CNTOOLKIT_VERSION.ubuntu18.04_amd64.deb
ARG CNNL_PKG=cnnl_$CNNL_VERSION.ubuntu18.04_amd64.deb
ARG CNCL_PKG=cncl_$CNCL_VERSION.ubuntu18.04_amd64.deb
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录