Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
8037901b
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
8037901b
编写于
7月 22, 2022
作者:
Y
yuguo
提交者:
GitHub
7月 22, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add code of occupancy computing on DCU and avoid threadID bug for DCU profiler (#44520)
上级
fcfaa104
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
108 addition
and
3 deletion
+108
-3
CMakeLists.txt
CMakeLists.txt
+1
-0
cmake/configure.cmake
cmake/configure.cmake
+7
-0
cmake/cupti.cmake
cmake/cupti.cmake
+1
-1
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+3
-0
paddle/fluid/platform/profiler/chrometracing_logger.cc
paddle/fluid/platform/profiler/chrometracing_logger.cc
+15
-0
paddle/fluid/platform/profiler/cupti_data_process.cc
paddle/fluid/platform/profiler/cupti_data_process.cc
+8
-0
paddle/fluid/platform/profiler/trace_event.h
paddle/fluid/platform/profiler/trace_event.h
+4
-0
paddle/fluid/platform/profiler/utils.cc
paddle/fluid/platform/profiler/utils.cc
+55
-1
paddle/fluid/platform/profiler/utils.h
paddle/fluid/platform/profiler/utils.h
+11
-1
paddle/phi/backends/dynload/CMakeLists.txt
paddle/phi/backends/dynload/CMakeLists.txt
+3
-0
未找到文件。
CMakeLists.txt
浏览文件 @
8037901b
...
@@ -435,6 +435,7 @@ endif()
...
@@ -435,6 +435,7 @@ endif()
if
(
WITH_ROCM
)
if
(
WITH_ROCM
)
include
(
hip
)
include
(
hip
)
include
(
miopen
)
# set miopen libraries, must before configure
include
(
miopen
)
# set miopen libraries, must before configure
include
(
cupti
)
endif
()
endif
()
if
(
WITH_XPU_KP
)
if
(
WITH_XPU_KP
)
...
...
cmake/configure.cmake
浏览文件 @
8037901b
...
@@ -178,6 +178,13 @@ elseif(WITH_ROCM)
...
@@ -178,6 +178,13 @@ elseif(WITH_ROCM)
add_definitions
(
-DEIGEN_USE_GPU
)
add_definitions
(
-DEIGEN_USE_GPU
)
add_definitions
(
-DEIGEN_USE_HIP
)
add_definitions
(
-DEIGEN_USE_HIP
)
if
(
CUPTI_FOUND
)
include_directories
(
${
CUPTI_INCLUDE_DIR
}
)
add_definitions
(
-DPADDLE_WITH_CUPTI
)
else
()
message
(
STATUS
"Cannot find CUPTI, GPU Profiling is incorrect."
)
endif
()
if
(
NOT MIOPEN_FOUND
)
if
(
NOT MIOPEN_FOUND
)
message
(
FATAL_ERROR
"Paddle needs MIOpen to compile"
)
message
(
FATAL_ERROR
"Paddle needs MIOpen to compile"
)
endif
()
endif
()
...
...
cmake/cupti.cmake
浏览文件 @
8037901b
if
(
NOT WITH_GPU
)
if
(
NOT WITH_GPU
AND NOT WITH_ROCM
)
return
()
return
()
endif
()
endif
()
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
8037901b
...
@@ -35,6 +35,9 @@ if(NOT APPLE)
...
@@ -35,6 +35,9 @@ if(NOT APPLE)
if
(
WITH_RCCL
)
if
(
WITH_RCCL
)
list
(
APPEND HIP_SRCS rccl.cc
)
list
(
APPEND HIP_SRCS rccl.cc
)
endif
()
endif
()
if
(
CUPTI_FOUND
)
list
(
APPEND HIP_SRCS cupti.cc
)
endif
()
endif
()
endif
()
endif
()
endif
()
...
...
paddle/fluid/platform/profiler/chrometracing_logger.cc
浏览文件 @
8037901b
...
@@ -401,7 +401,11 @@ void ChromeTracingLogger::HandleTypeKernel(
...
@@ -401,7 +401,11 @@ void ChromeTracingLogger::HandleTypeKernel(
float
warps_per_sm
=
0.0
;
float
warps_per_sm
=
0.0
;
float
occupancy
=
0.0
;
float
occupancy
=
0.0
;
#if defined(PADDLE_WITH_CUPTI)
#if defined(PADDLE_WITH_CUPTI)
#ifdef PADDLE_WITH_HIP
constexpr
int
threads_per_warp
=
64
;
#else
constexpr
int
threads_per_warp
=
32
;
constexpr
int
threads_per_warp
=
32
;
#endif
const
gpuDeviceProp
&
device_property
=
const
gpuDeviceProp
&
device_property
=
GetDeviceProperties
(
device_node
.
DeviceId
());
GetDeviceProperties
(
device_node
.
DeviceId
());
blocks_per_sm
=
static_cast
<
float
>
(
kernel_info
.
grid_x
*
kernel_info
.
grid_y
*
blocks_per_sm
=
static_cast
<
float
>
(
kernel_info
.
grid_x
*
kernel_info
.
grid_y
*
...
@@ -411,6 +415,15 @@ void ChromeTracingLogger::HandleTypeKernel(
...
@@ -411,6 +415,15 @@ void ChromeTracingLogger::HandleTypeKernel(
blocks_per_sm
*
blocks_per_sm
*
(
kernel_info
.
block_x
*
kernel_info
.
block_y
*
kernel_info
.
block_z
)
/
(
kernel_info
.
block_x
*
kernel_info
.
block_y
*
kernel_info
.
block_z
)
/
threads_per_warp
;
threads_per_warp
;
#ifdef PADDLE_WITH_HIP
occupancy
=
CalculateEstOccupancy
(
device_node
.
DeviceId
(),
kernel_info
.
dynamic_shared_memory
,
kernel_info
.
block_x
,
kernel_info
.
block_y
,
kernel_info
.
block_z
,
kernel_info
.
kernelFunc
,
kernel_info
.
launchType
);
#else
occupancy
=
CalculateEstOccupancy
(
device_node
.
DeviceId
(),
occupancy
=
CalculateEstOccupancy
(
device_node
.
DeviceId
(),
kernel_info
.
registers_per_thread
,
kernel_info
.
registers_per_thread
,
kernel_info
.
static_shared_memory
,
kernel_info
.
static_shared_memory
,
...
@@ -419,6 +432,8 @@ void ChromeTracingLogger::HandleTypeKernel(
...
@@ -419,6 +432,8 @@ void ChromeTracingLogger::HandleTypeKernel(
kernel_info
.
block_y
,
kernel_info
.
block_y
,
kernel_info
.
block_z
,
kernel_info
.
block_z
,
blocks_per_sm
);
blocks_per_sm
);
#endif // PADDLE_WITH_HIP
#endif
#endif
float
dur
=
nsToMsFloat
(
device_node
.
Duration
());
float
dur
=
nsToMsFloat
(
device_node
.
Duration
());
std
::
string
dur_display
;
std
::
string
dur_display
;
...
...
paddle/fluid/platform/profiler/cupti_data_process.cc
浏览文件 @
8037901b
...
@@ -52,6 +52,10 @@ void AddKernelRecord(const CUpti_ActivityKernel4* kernel,
...
@@ -52,6 +52,10 @@ void AddKernelRecord(const CUpti_ActivityKernel4* kernel,
event
.
kernel_info
.
queued
=
kernel
->
queued
;
event
.
kernel_info
.
queued
=
kernel
->
queued
;
event
.
kernel_info
.
submitted
=
kernel
->
submitted
;
event
.
kernel_info
.
submitted
=
kernel
->
submitted
;
event
.
kernel_info
.
completed
=
kernel
->
completed
;
event
.
kernel_info
.
completed
=
kernel
->
completed
;
#ifdef PADDLE_WITH_HIP
event
.
kernel_info
.
kernelFunc
=
kernel
->
kernelFunc
;
event
.
kernel_info
.
launchType
=
kernel
->
launchType
;
#endif
collector
->
AddDeviceEvent
(
std
::
move
(
event
));
collector
->
AddDeviceEvent
(
std
::
move
(
event
));
}
}
...
@@ -279,7 +283,11 @@ void AddApiRecord(const CUpti_ActivityAPI* api,
...
@@ -279,7 +283,11 @@ void AddApiRecord(const CUpti_ActivityAPI* api,
}
else
{
}
else
{
tid
=
iter
->
second
;
tid
=
iter
->
second
;
}
}
#ifdef PADDLE_WITH_HIP
event
.
thread_id
=
api
->
threadId
;
#else
event
.
thread_id
=
tid
;
event
.
thread_id
=
tid
;
#endif
event
.
correlation_id
=
api
->
correlationId
;
event
.
correlation_id
=
api
->
correlationId
;
event
.
callback_id
=
api
->
cbid
;
event
.
callback_id
=
api
->
cbid
;
collector
->
AddRuntimeEvent
(
std
::
move
(
event
));
collector
->
AddRuntimeEvent
(
std
::
move
(
event
));
...
...
paddle/fluid/platform/profiler/trace_event.h
浏览文件 @
8037901b
...
@@ -105,6 +105,10 @@ struct KernelEventInfo {
...
@@ -105,6 +105,10 @@ struct KernelEventInfo {
uint64_t
submitted
;
uint64_t
submitted
;
// The completed timestamp for the kernel execution, in ns.
// The completed timestamp for the kernel execution, in ns.
uint64_t
completed
;
uint64_t
completed
;
#ifdef PADDLE_WITH_HIP
void
*
kernelFunc
;
uint8_t
launchType
;
#endif
};
};
static
constexpr
size_t
kMemKindMaxLen
=
50
;
static
constexpr
size_t
kMemKindMaxLen
=
50
;
...
...
paddle/fluid/platform/profiler/utils.cc
浏览文件 @
8037901b
...
@@ -43,6 +43,58 @@ std::string json_vector<std::string>(
...
@@ -43,6 +43,58 @@ std::string json_vector<std::string>(
}
}
#ifdef PADDLE_WITH_CUPTI
#ifdef PADDLE_WITH_CUPTI
#ifdef PADDLE_WITH_HIP
#include "hip/hip_runtime.h"
float
CalculateEstOccupancy
(
uint32_t
DeviceId
,
int32_t
DynamicSharedMemory
,
int32_t
BlockX
,
int32_t
BlockY
,
int32_t
BlockZ
,
void
*
kernelFunc
,
uint8_t
launchType
)
{
float
occupancy
=
0.0
;
std
::
vector
<
int
>
device_ids
=
GetSelectedDevices
();
if
(
DeviceId
<
device_ids
.
size
())
{
const
gpuDeviceProp
&
device_property
=
GetDeviceProperties
(
DeviceId
);
int
blockSize
=
BlockX
*
BlockY
*
BlockZ
;
int
numBlock
=
0
;
hipError_t
status
;
if
(
launchType
==
0
)
{
status
=
hipOccupancyMaxActiveBlocksPerMultiprocessor
(
&
numBlock
,
kernelFunc
,
blockSize
,
DynamicSharedMemory
);
if
(
status
==
hipSuccess
)
{
occupancy
=
static_cast
<
double
>
(
numBlock
)
*
blockSize
/
device_property
.
maxThreadsPerMultiProcessor
;
}
else
{
LOG
(
WARNING
)
<<
"Failed to calculate estimated occupancy, status = "
<<
status
<<
std
::
endl
;
}
}
else
if
(
launchType
==
100
)
{
status
=
hipModuleOccupancyMaxActiveBlocksPerMultiprocessor
(
&
numBlock
,
reinterpret_cast
<
hipFunction_t
>
(
kernelFunc
),
blockSize
,
DynamicSharedMemory
);
if
(
status
==
hipSuccess
)
{
occupancy
=
static_cast
<
double
>
(
numBlock
)
*
blockSize
/
device_property
.
maxThreadsPerMultiProcessor
;
}
else
{
LOG
(
WARNING
)
<<
"Failed to calculate estimated occupancy, status = "
<<
status
<<
std
::
endl
;
}
}
else
{
LOG
(
WARNING
)
<<
"Failed to calculate estimated occupancy, can not "
"recognize launchType : "
<<
launchType
<<
std
::
endl
;
}
}
return
occupancy
;
}
#else
float
CalculateEstOccupancy
(
uint32_t
DeviceId
,
float
CalculateEstOccupancy
(
uint32_t
DeviceId
,
uint16_t
RegistersPerThread
,
uint16_t
RegistersPerThread
,
int32_t
StaticSharedMemory
,
int32_t
StaticSharedMemory
,
...
@@ -88,7 +140,9 @@ float CalculateEstOccupancy(uint32_t DeviceId,
...
@@ -88,7 +140,9 @@ float CalculateEstOccupancy(uint32_t DeviceId,
}
}
return
occupancy
;
return
occupancy
;
}
}
#endif
#endif // PADDLE_WITH_HIP
#endif // PADDLE_WITH_CUPTI
const
char
*
StringTracerMemEventType
(
TracerMemEventType
type
)
{
const
char
*
StringTracerMemEventType
(
TracerMemEventType
type
)
{
static
const
char
*
categary_name_
[]
=
{
static
const
char
*
categary_name_
[]
=
{
...
...
paddle/fluid/platform/profiler/utils.h
浏览文件 @
8037901b
...
@@ -125,6 +125,15 @@ static float nsToMsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
...
@@ -125,6 +125,15 @@ static float nsToMsFloat(uint64_t end_ns, uint64_t start_ns = 0) {
}
}
#ifdef PADDLE_WITH_CUPTI
#ifdef PADDLE_WITH_CUPTI
#ifdef PADDLE_WITH_HIP
float
CalculateEstOccupancy
(
uint32_t
DeviceId
,
int32_t
DynamicSharedMemory
,
int32_t
BlockX
,
int32_t
BlockY
,
int32_t
BlockZ
,
void
*
kernelFunc
,
uint8_t
launchType
);
#else
float
CalculateEstOccupancy
(
uint32_t
deviceId
,
float
CalculateEstOccupancy
(
uint32_t
deviceId
,
uint16_t
registersPerThread
,
uint16_t
registersPerThread
,
int32_t
staticSharedMemory
,
int32_t
staticSharedMemory
,
...
@@ -133,7 +142,8 @@ float CalculateEstOccupancy(uint32_t deviceId,
...
@@ -133,7 +142,8 @@ float CalculateEstOccupancy(uint32_t deviceId,
int32_t
blockY
,
int32_t
blockY
,
int32_t
blockZ
,
int32_t
blockZ
,
float
blocksPerSm
);
float
blocksPerSm
);
#endif
#endif // PADDLE_WITH_HIP
#endif // PADDLE_WITH_CUPTI
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
paddle/phi/backends/dynload/CMakeLists.txt
浏览文件 @
8037901b
...
@@ -35,6 +35,9 @@ if(NOT APPLE)
...
@@ -35,6 +35,9 @@ if(NOT APPLE)
if
(
WITH_RCCL
)
if
(
WITH_RCCL
)
list
(
APPEND HIP_SRCS rccl.cc
)
list
(
APPEND HIP_SRCS rccl.cc
)
endif
()
endif
()
if
(
CUPTI_FOUND
)
list
(
APPEND HIP_SRCS cupti.cc
)
endif
()
endif
()
endif
()
endif
()
endif
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录