Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
de6e7431
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
de6e7431
编写于
10月 19, 2022
作者:
Y
Yuanle Liu
提交者:
GitHub
10月 19, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add nvtxRangePush/Pop for naive_executor and refine some code (#47139)
上级
065608dd
变更
11
显示空白变更内容
内联
并排
Showing
11 changed file
with
66 addition
and
27 deletion
+66
-27
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+4
-0
paddle/fluid/framework/naive_executor.cc
paddle/fluid/framework/naive_executor.cc
+17
-2
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+4
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+1
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+6
-6
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+1
-1
paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc
paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc
+13
-5
paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h
paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h
+14
-4
paddle/fluid/platform/dynload/nvtx.h
paddle/fluid/platform/dynload/nvtx.h
+4
-8
paddle/phi/backends/dynload/nvtx.h
paddle/phi/backends/dynload/nvtx.h
+1
-0
未找到文件。
CMakeLists.txt
浏览文件 @
de6e7431
...
...
@@ -268,6 +268,7 @@ option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
option
(
WITH_HETERPS
"Compile with heterps"
OFF}
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference C++ high-level api interface"
OFF
)
option
(
WITH_INFERENCE_NVTX
"Paddle inference with nvtx for profiler"
OFF
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
WITH_DGC
"Use DGC(Deep Gradient Compression) or not"
${
WITH_DISTRIBUTE
}
)
option
(
...
...
cmake/inference_lib.cmake
浏览文件 @
de6e7431
...
...
@@ -356,6 +356,10 @@ else()
)
endif
()
if
(
WITH_INFERENCE_NVTX AND NOT WIN32
)
add_definitions
(
-DPADDLE_WITH_INFERENCE_NVTX
)
endif
()
copy
(
inference_lib_dist
SRCS
${
src_dir
}
/inference/capi_exp/pd_*.h
${
paddle_inference_c_lib
}
...
...
paddle/fluid/framework/naive_executor.cc
浏览文件 @
de6e7431
...
...
@@ -22,9 +22,12 @@
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
#include "paddle/fluid/operators/tensorrt/tensorrt_engine_op.h"
#endif
#ifdef PADDLE_WITH_INFERENCE_NVTX
#include "paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h"
#endif
namespace
paddle
{
namespace
framework
{
...
...
@@ -48,12 +51,24 @@ void NaiveExecutor::Run() {
platform
::
RegisterModelLayout
(
ops_
,
place_
);
#endif
platform
::
ScopedFlushDenormal
flush
;
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePush
(
"model"
,
platform
::
NvtxRangeColor
::
Yellow
);
#endif
for
(
auto
&
op
:
ops_
)
{
VLOG
(
4
)
<<
std
::
this_thread
::
get_id
()
<<
" run "
<<
op
->
DebugStringEx
(
scope_
)
<<
" on scope "
<<
scope_
;
op
->
SetIsCalledByExecutor
(
false
);
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePush
(
op
->
Type
(),
platform
::
NvtxRangeColor
::
Green
);
#endif
op
->
Run
(
*
scope_
,
place_
);
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePop
();
#endif
}
#ifdef PADDLE_WITH_INFERENCE_NVTX
platform
::
CudaNvtxRangePop
();
#endif
}
void
NaiveExecutor
::
CreateVariables
(
const
ProgramDesc
&
desc
,
...
...
@@ -146,7 +161,7 @@ NaiveExecutor::~NaiveExecutor() {
}
void
NaiveExecutor
::
ResetTrtOps
(
int
num
)
{
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
for
(
auto
&
op
:
ops_
)
{
if
(
op
->
Type
()
==
"tensorrt_engine"
)
{
operators
::
TensorRTEngineOp
*
trtop
=
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
de6e7431
...
...
@@ -108,6 +108,10 @@ if(WITH_PSCORE)
tensor_table
)
endif
()
if
(
WITH_INFERENCE_NVTX AND NOT WIN32
)
set
(
SHARED_INFERENCE_DEPS
${
SHARED_INFERENCE_DEPS
}
cuda_profiler
)
endif
()
if
(
WITH_ONNXRUNTIME
)
set
(
SHARED_INFERENCE_SRCS
${
SHARED_INFERENCE_SRCS
}
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
de6e7431
...
...
@@ -655,7 +655,7 @@ void AnalysisConfig::EnableTensorRtEngine(
}
use_tensorrt_
=
true
;
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
// https://forums.developer.nvidia.com/t/nvinfer1-createexecutioncontextwithoutdevicememory-returns-nullptr/111878/2
// when trt version less than 7.2,
// createExecutionContextWithoutDeviceMemory() has bug.
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
de6e7431
...
...
@@ -79,7 +79,7 @@
#include "paddle/fluid/inference/api/onnxruntime_predictor.h"
#endif
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/helper.h"
#include "paddle/fluid/inference/tensorrt/trt_int8_calibrator.h"
...
...
@@ -92,7 +92,7 @@
namespace
paddle
{
using
inference
::
Singleton
;
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
using
inference
::
tensorrt
::
TRTCalibratorEngine
;
using
inference
::
tensorrt
::
TRTCalibratorEngineManager
;
using
inference
::
tensorrt
::
TRTInt8Calibrator
;
...
...
@@ -1271,7 +1271,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
[](
framework
::
ProgramDesc
*
prog
)
{
// Note, please do NOT use any member variables, because member variables may
// have been destructed in multiple threads.
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
auto
&
block
=
prog
->
Block
(
0
);
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
if
(
op_desc
->
Type
()
==
"tensorrt_engine"
)
{
...
...
@@ -1977,7 +1977,7 @@ void AnalysisPredictor::ClearIntermediateTensor() {
}
}
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
bool
AnalysisPredictor
::
SaveTrtCalibToDisk
()
{
PADDLE_ENFORCE_EQ
(
config_
.
tensorrt_engine_enabled
(),
true
,
...
...
@@ -2033,7 +2033,7 @@ bool AnalysisPredictor::SaveTrtCalibToDisk() {
#endif
AnalysisPredictor
::~
AnalysisPredictor
()
{
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
if
(
config_
.
tensorrt_engine_enabled
()
&&
config_
.
tensorrt_precision_mode_
==
AnalysisConfig
::
Precision
::
kInt8
&&
Singleton
<
TRTCalibratorEngineManager
>::
Global
().
Has
())
{
...
...
@@ -2157,7 +2157,7 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<AnalysisConfig>(
}
// namespace paddle
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
USE_TRT_CONVERTER
(
elementwise_add_weight
);
USE_TRT_CONVERTER
(
elementwise_sub_weight
);
USE_TRT_CONVERTER
(
elementwise_mul_weight
);
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
de6e7431
...
...
@@ -382,7 +382,7 @@ class AnalysisPredictor : public PaddlePredictor {
///
void
MkldnnPostReset
();
#if PADDLE_WITH_TENSORRT
#if
def
PADDLE_WITH_TENSORRT
///
/// \brief save calibration table
///
...
...
paddle/fluid/platform/device/gpu/cuda/cuda_profiler.cc
浏览文件 @
de6e7431
...
...
@@ -17,9 +17,9 @@
namespace
paddle
{
namespace
platform
{
void
CudaProfilerInit
(
std
::
string
output_file
,
std
::
string
output_mode
,
std
::
string
config_file
)
{
void
CudaProfilerInit
(
const
std
::
string
&
output_file
,
const
std
::
string
&
output_mode
,
const
std
::
string
&
config_file
)
{
PADDLE_ENFORCE
(
output_mode
==
"kvp"
||
output_mode
==
"csv"
,
platform
::
errors
::
InvalidArgument
(
"Unsupported cuda profiler output mode, expect `kvp` or "
...
...
@@ -35,8 +35,16 @@ void CudaProfilerStart() { PADDLE_ENFORCE_GPU_SUCCESS(cudaProfilerStart()); }
void
CudaProfilerStop
()
{
PADDLE_ENFORCE_GPU_SUCCESS
(
cudaProfilerStop
());
}
#ifndef _WIN32
void
CudaNvtxRangePush
(
std
::
string
name
)
{
dynload
::
nvtxRangePushA
(
name
.
c_str
());
void
CudaNvtxRangePush
(
const
std
::
string
&
name
,
const
NvtxRangeColor
color
)
{
nvtxEventAttributes_t
eventAttrib
;
eventAttrib
.
version
=
NVTX_VERSION
;
eventAttrib
.
size
=
NVTX_EVENT_ATTRIB_STRUCT_SIZE
;
eventAttrib
.
colorType
=
NVTX_COLOR_ARGB
;
eventAttrib
.
color
=
static_cast
<
uint32_t
>
(
color
);
eventAttrib
.
messageType
=
NVTX_MESSAGE_TYPE_ASCII
;
eventAttrib
.
message
.
ascii
=
name
.
c_str
();
dynload
::
nvtxRangePushEx
(
&
eventAttrib
);
}
void
CudaNvtxRangePop
()
{
dynload
::
nvtxRangePop
();
}
...
...
paddle/fluid/platform/device/gpu/cuda/cuda_profiler.h
浏览文件 @
de6e7431
...
...
@@ -23,16 +23,26 @@ limitations under the License. */
namespace
paddle
{
namespace
platform
{
void
CudaProfilerInit
(
std
::
string
output_file
,
std
::
string
output_mode
,
std
::
string
config_file
);
void
CudaProfilerInit
(
const
std
::
string
&
output_file
,
const
std
::
string
&
output_mode
,
const
std
::
string
&
config_file
);
void
CudaProfilerStart
();
void
CudaProfilerStop
();
#ifndef _WIN32
void
CudaNvtxRangePush
(
std
::
string
name
);
enum
class
NvtxRangeColor
:
uint32_t
{
Black
=
0x00000000
,
Red
=
0x00ff0000
,
Green
=
0x0000ff00
,
Blue
=
0x000000ff
,
White
=
0x00ffffff
,
Yellow
=
0x00ffff00
,
};
void
CudaNvtxRangePush
(
const
std
::
string
&
name
,
const
NvtxRangeColor
color
=
NvtxRangeColor
::
Blue
);
void
CudaNvtxRangePop
();
#endif
...
...
paddle/fluid/platform/dynload/nvtx.h
浏览文件 @
de6e7431
...
...
@@ -13,11 +13,6 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef _WIN32
#include <cuda.h>
#include <nvToolsExt.h>
#include <mutex> // NOLINT
#include "paddle/phi/backends/dynload/nvtx.h"
namespace
paddle
{
...
...
@@ -28,11 +23,12 @@ namespace dynload {
using DynLoad__##__name = phi::dynload::DynLoad__##__name; \
extern DynLoad__##__name __name
#define NVTX_ROUTINE_EACH(__macro) \
#define
PLATFORM_
NVTX_ROUTINE_EACH(__macro) \
__macro(nvtxRangePushA); \
__macro(nvtxRangePushEx); \
__macro(nvtxRangePop);
NVTX_ROUTINE_EACH
(
PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP
);
PLATFORM_
NVTX_ROUTINE_EACH
(
PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP
);
#undef PLATFORM_DECLARE_DYNAMIC_LOAD_NVTX_WRAP
}
// namespace dynload
...
...
paddle/phi/backends/dynload/nvtx.h
浏览文件 @
de6e7431
...
...
@@ -42,6 +42,7 @@ extern void *nvtx_dso_handle;
#define NVTX_ROUTINE_EACH(__macro) \
__macro(nvtxRangePushA); \
__macro(nvtxRangePushEx); \
__macro(nvtxRangePop);
NVTX_ROUTINE_EACH
(
DECLARE_DYNAMIC_LOAD_NVTX_WRAP
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录