Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
1967c6a6
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
1967c6a6
编写于
9月 06, 2022
作者:
W
Wilber
提交者:
GitHub
9月 06, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
enable memory optimize when fp16. (#45792)
上级
8f37c66f
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
46 addition
and
9 deletion
+46
-9
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+4
-8
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+41
-0
paddle/phi/backends/gpu/gpu_context.cc
paddle/phi/backends/gpu/gpu_context.cc
+1
-1
未找到文件。
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
1967c6a6
...
...
@@ -292,6 +292,7 @@ bool AnalysisPredictor::Init(
}
}
#endif
inference
::
DisplayMemoryInfo
(
place_
,
"Init predictor"
);
return
true
;
}
...
...
@@ -1050,14 +1051,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetUseFcPadding
(
config_
.
use_fc_padding
());
argument_
.
SetGPUDeviceId
(
config_
.
gpu_device_id
());
argument_
.
SetEnableAnalysisOptim
(
config_
.
enable_ir_optim_
);
if
(
model_precision_
==
phi
::
DataType
::
FLOAT32
)
{
argument_
.
SetEnableMemoryOptim
(
config_
.
enable_memory_optim
());
}
else
{
// TODO(inference): mixed precision temporarily not support memory_optim
LOG_FIRST_N
(
WARNING
,
1
)
<<
"mixed precision model temporarily not support "
"memory optim, so we just turn off that."
;
argument_
.
SetEnableMemoryOptim
(
false
);
}
argument_
.
SetEnableMemoryOptim
(
config_
.
enable_memory_optim
());
argument_
.
SetModelFromMemory
(
config_
.
model_from_memory_
);
// Analyze inference_program
argument_
.
SetPredictorID
(
predictor_id_
);
...
...
@@ -1622,6 +1616,7 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
}
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
inference
::
DisplayMemoryInfo
(
place_
,
"before run"
);
#if defined(PADDLE_WITH_DISTRIBUTE) && defined(PADDLE_WITH_PSCORE)
if
(
config_
.
dist_config
().
use_dist_model
())
{
VLOG
(
3
)
<<
"ZeroCopyRun will use the fleet executor."
;
...
...
@@ -1659,6 +1654,7 @@ bool AnalysisPredictor::ZeroCopyRun() {
#endif
executor_
->
Run
();
inference
::
DisplayMemoryInfo
(
place_
,
"after run"
);
if
(
config_
.
shape_range_info_collected
())
{
CollectShapeRangeInfo
();
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
1967c6a6
...
...
@@ -31,7 +31,9 @@
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/memory/stats.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/phi/backends/dynload/port.h"
...
...
@@ -421,5 +423,44 @@ static bool IsFileExists(const std::string &path) {
void
RegisterAllCustomOperator
();
static
inline
double
ToMegaBytes
(
size_t
bytes
)
{
return
static_cast
<
double
>
(
bytes
)
/
(
1
<<
20
);
}
static
inline
void
DisplayMemoryInfo
(
platform
::
Place
place
,
const
std
::
string
&
hint
)
{
#ifdef PADDLE_WITH_CUDA
// size_t free, total;
// cudaSetDevice(place.GetDeviceId());
// cudaMemGetInfo(&free, &total);
// VLOG(1) << "[" << ToMegaBytes(total - free) << "MB/" << ToMegaBytes(total)
// << "MB]";
VLOG
(
1
)
<<
hint
<<
" : [gpu current allocated memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
DeviceMemoryStatCurrentValue
(
"Allocated"
,
place
.
GetDeviceId
()))
<<
"MB], [gpu current reserved memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
DeviceMemoryStatCurrentValue
(
"Reserved"
,
place
.
GetDeviceId
()))
<<
"MB], [gpu peak allocated memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
DeviceMemoryStatPeakValue
(
"Allocated"
,
place
.
GetDeviceId
()))
<<
"MB], [gpu peak reserved memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
DeviceMemoryStatPeakValue
(
"Reserved"
,
place
.
GetDeviceId
()))
<<
"MB]"
;
#endif
VLOG
(
1
)
<<
hint
<<
" : [cpu current allocated memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
HostMemoryStatCurrentValue
(
"Allocated"
,
0
))
<<
"MB], [cpu current reserved memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
HostMemoryStatCurrentValue
(
"Reserved"
,
0
))
<<
"MB], [cpu peak allocated memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
HostMemoryStatPeakValue
(
"Allocated"
,
0
))
<<
"MB], [cpu peak reserved memory: "
<<
ToMegaBytes
(
paddle
::
memory
::
HostMemoryStatPeakValue
(
"Reserved"
,
0
))
<<
"MB]"
;
}
}
// namespace inference
}
// namespace paddle
paddle/phi/backends/gpu/gpu_context.cc
浏览文件 @
1967c6a6
...
...
@@ -575,7 +575,7 @@ struct GPUContext::Impl {
if
(
!
blas_tensor_core_handle_creator_
)
{
phi
::
InitBlasHandle
(
&
blas_tensor_core_handle_
,
stream
());
}
else
{
phi
::
InitBlasHandle
(
&
blas_tensor_core_handle_
,
stream
()
);
blas_tensor_core_handle_
=
blas_tensor_core_handle_creator_
(
);
}
PADDLE_RETRY_CUDA_SUCCESS
(
phi
::
dynload
::
cublasSetMathMode
(
blas_tensor_core_handle_
,
CUBLAS_TENSOR_OP_MATH
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录