Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
cbe7466f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
cbe7466f
编写于
4月 14, 2022
作者:
L
liutiexing
提交者:
GitHub
4月 14, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
executor perf statistics (#41648)
* executor perf statistics * fix ut * fix ut * fix ut * add ut * add ut
上级
d0f3296b
变更
10
展开全部
隐藏空白更改
内联
并排
Showing
10 changed file
with
782 addition
and
10 deletion
+782
-10
paddle/fluid/framework/new_executor/CMakeLists.txt
paddle/fluid/framework/new_executor/CMakeLists.txt
+2
-0
paddle/fluid/framework/new_executor/executor_statistics.cc
paddle/fluid/framework/new_executor/executor_statistics.cc
+627
-0
paddle/fluid/framework/new_executor/executor_statistics.h
paddle/fluid/framework/new_executor/executor_statistics.h
+27
-0
paddle/fluid/framework/new_executor/standalone_executor.cc
paddle/fluid/framework/new_executor/standalone_executor.cc
+7
-0
paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt
paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt
+1
-1
paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h
...framework/new_executor/workqueue/nonblocking_threadpool.h
+6
-3
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+5
-4
python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt
...n/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt
+1
-1
python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
...d/tests/unittests/interpreter/test_standalone_executor.py
+105
-0
未找到文件。
paddle/fluid/framework/new_executor/CMakeLists.txt
浏览文件 @
cbe7466f
...
...
@@ -20,6 +20,8 @@ endif()
cc_library
(
standalone_executor SRCS standalone_executor.cc DEPS interpretercore
)
cc_library
(
staticgraph_executor_statistics SRCS executor_statistics.cc DEPS enforce glog os_info
)
# cc_binary(standalone_executor_test SRCS standalone_executor_test.cc DEPS interpretercore standalone_executor operator op_registry executor ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS} profiler)
# skip win32 since wget is not installed by default on windows machine.
if
(
WITH_GPU AND WITH_TESTING AND NOT WIN32 AND NOT
"$ENV{CI_SKIP_CPP_TEST}"
STREQUAL
"ON"
)
...
...
paddle/fluid/framework/new_executor/executor_statistics.cc
0 → 100644
浏览文件 @
cbe7466f
此差异已折叠。
点击以展开。
paddle/fluid/framework/new_executor/executor_statistics.h
0 → 100644
浏览文件 @
cbe7466f
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include "paddle/fluid/platform/profiler/event_node.h"
namespace
paddle
{
namespace
framework
{
void
StaticGraphExecutorPerfStatistics
(
std
::
shared_ptr
<
const
platform
::
NodeTrees
>
profiling_data
);
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/new_executor/standalone_executor.cc
浏览文件 @
cbe7466f
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/framework/new_executor/interpretercore_util.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -59,6 +60,9 @@ paddle::framework::FetchList StandaloneExecutor::Run(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
framework
::
LoDTensor
>&
feed_tensors
,
const
std
::
vector
<
std
::
string
>&
fetch_names
)
{
platform
::
RecordEvent
record_event
(
"StandaloneExecutor::run"
,
platform
::
TracerEventType
::
UserDefined
,
1
);
auto
core
=
GetInterpreterCore
(
feed_names
,
fetch_names
,
true
);
return
core
->
Run
(
feed_names
,
feed_tensors
);
...
...
@@ -67,6 +71,9 @@ paddle::framework::FetchList StandaloneExecutor::Run(
paddle
::
framework
::
FetchList
StandaloneExecutor
::
Run
(
const
std
::
vector
<
std
::
string
>&
feed_names
,
const
std
::
vector
<
std
::
string
>&
fetch_names
)
{
platform
::
RecordEvent
record_event
(
"StandaloneExecutor::run"
,
platform
::
TracerEventType
::
UserDefined
,
1
);
auto
core
=
GetInterpreterCore
(
feed_names
,
fetch_names
,
false
);
VLOG
(
4
)
<<
"StandaloneExecutor: "
<<
this
<<
", InterpreterCore: "
<<
core
;
return
core
->
Run
(
feed_names
);
...
...
paddle/fluid/framework/new_executor/workqueue/CMakeLists.txt
浏览文件 @
cbe7466f
cc_library
(
workqueue_utils SRCS workqueue_utils.cc events_waiter.cc DEPS enforce glog
)
cc_library
(
workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog
)
cc_library
(
workqueue SRCS workqueue.cc DEPS workqueue_utils enforce glog
os_info
)
cc_test
(
workqueue_test SRCS workqueue_test.cc DEPS workqueue
)
paddle/fluid/framework/new_executor/workqueue/nonblocking_threadpool.h
浏览文件 @
cbe7466f
...
...
@@ -129,6 +129,7 @@ class ThreadPoolTempl {
// this. We expect that such scenario is prevented by program, that is,
// this is kept alive while any threads can potentially be in Schedule.
if
(
!
t
.
f
)
{
// Allow 'false positive' which makes a redundant notification.
if
(
num_tasks
>
num_threads_
-
blocked_
)
{
VLOG
(
6
)
<<
"Add task, Notify"
;
ec_
.
Notify
(
false
);
...
...
@@ -379,9 +380,8 @@ class ThreadPoolTempl {
return
false
;
}
// Number of blocked threads is used as termination condition.
// If we are shutting down and all worker threads blocked without work,
// that's we are done.
// Number of blocked threads is used as notification condition.
// We must increase the counter before the emptiness check.
blocked_
++
;
// Now do a reliable emptiness check.
...
...
@@ -393,6 +393,9 @@ class ThreadPoolTempl {
return
true
;
}
// Number of blocked threads is used as termination condition.
// If we are shutting down and all worker threads blocked without work,
// that's we are done.
if
(
done_
&&
blocked_
==
static_cast
<
unsigned
>
(
num_threads_
))
{
ec_
.
CancelWait
();
// Almost done, but need to re-check queues.
...
...
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
cbe7466f
...
...
@@ -350,7 +350,7 @@ if(WITH_PYTHON)
add_custom_target
(
eager_op_function_generator_cmd ALL DEPENDS
${
eager_impl_file
}
)
endif
()
list
(
APPEND PYBIND_DEPS interpretercore standalone_executor
)
list
(
APPEND PYBIND_DEPS interpretercore standalone_executor
staticgraph_executor_statistics
)
cc_library
(
op_function_common SRCS op_function_common.cc DEPS
${
PYBIND_DEPS
}
)
list
(
APPEND PYBIND_DEPS op_function_common
)
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
cbe7466f
...
...
@@ -46,6 +46,7 @@ limitations under the License. */
#include "paddle/fluid/framework/ir/pass_builder.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/new_executor/executor_statistics.h"
#include "paddle/fluid/framework/new_executor/standalone_executor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
...
...
@@ -2903,9 +2904,6 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"run"
,
[](
StandaloneExecutor
&
self
,
std
::
vector
<
std
::
string
>
feed_names
,
std
::
vector
<
std
::
string
>
fetch_names
)
{
platform
::
RecordEvent
record_event
(
"StandaloneExecutor::run"
,
platform
::
TracerEventType
::
UserDefined
,
1
);
paddle
::
framework
::
FetchList
ret
;
{
pybind11
::
gil_scoped_release
release
;
...
...
@@ -3380,7 +3378,10 @@ All parameter, weight, gradient are variables in Paddle.
.
def
(
"stop"
,
[](
paddle
::
platform
::
Profiler
*
profiler
)
{
platform
::
DisableHostEventRecorder
();
return
profiler
->
Stop
();
auto
result
=
profiler
->
Stop
();
framework
::
StaticGraphExecutorPerfStatistics
(
result
->
GetNodeTrees
());
return
result
;
},
py
::
return_value_policy
::
automatic_reference
);
...
...
python/paddle/fluid/tests/unittests/interpreter/CMakeLists.txt
浏览文件 @
cbe7466f
...
...
@@ -2,7 +2,7 @@ file(GLOB TEST_INTERP_CASES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}" "test_*.py")
string
(
REPLACE
".py"
""
TEST_INTERP_CASES
"
${
TEST_INTERP_CASES
}
"
)
foreach
(
target
${
TEST_INTERP_CASES
}
)
py_test_modules
(
${
target
}
MODULES
${
target
}
ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=false FLAGS_eager_delete_tensor_gb=0
)
py_test_modules
(
${
target
}
MODULES
${
target
}
ENVS FLAGS_
host_trace_level=10 FLAGS_static_executor_perfstat_filepath=./perfstat FLAGS_
allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=false FLAGS_eager_delete_tensor_gb=0
)
py_test_modules
(
${
target
}
_non_eager_deletion MODULES
${
target
}
ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=false FLAGS_eager_delete_tensor_gb=0.000001
)
py_test_modules
(
${
target
}
_fast_gc MODULES
${
target
}
ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=true FLAGS_eager_delete_tensor_gb=0
)
py_test_modules
(
${
target
}
_fast_gc_non_eager_deletion MODULES
${
target
}
ENVS FLAGS_allocator_strategy=auto_growth FLAGS_use_stream_safe_cuda_allocator=true FLAGS_fast_eager_deletion_mode=true FLAGS_eager_delete_tensor_gb=0.000001
)
...
...
python/paddle/fluid/tests/unittests/interpreter/test_standalone_executor.py
浏览文件 @
cbe7466f
...
...
@@ -15,10 +15,13 @@
import
os
os
.
environ
[
'FLAGS_use_stream_safe_cuda_allocator'
]
=
"true"
import
sys
import
shutil
import
unittest
import
paddle
import
json
from
paddle.fluid
import
core
from
paddle.fluid.core
import
StandaloneExecutor
from
paddle.profiler
import
profiler
import
numpy
as
np
...
...
@@ -116,6 +119,107 @@ def build_program():
return
main_program
,
startup_program
,
[
mean
]
class
ExecutorStatisticsTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
iter_n
=
3
self
.
place
=
paddle
.
CUDAPlace
(
0
)
if
core
.
is_compiled_with_cuda
(
)
else
paddle
.
CPUPlace
()
def
test_standalone_executor_statistics
(
self
):
if
os
.
getenv
(
"FLAGS_static_executor_perfstat_filepath"
)
is
None
:
return
paddle
.
seed
(
2020
)
main_program
,
startup_program
,
fetch_list
=
build_program
()
fetch_list
=
[
x
.
name
for
x
in
fetch_list
]
p
=
core
.
Place
()
p
.
set_place
(
self
.
place
)
executor
=
StandaloneExecutor
(
p
,
startup_program
.
desc
,
main_program
.
desc
,
core
.
Scope
())
helper_profiler
=
profiler
.
Profiler
(
targets
=
[
profiler
.
ProfilerTarget
.
CPU
],
scheduler
=
(
1
,
2
))
helper_profiler
.
start
()
for
i
in
range
(
self
.
iter_n
):
executor
.
run
({},
fetch_list
)
helper_profiler
.
step
()
helper_profiler
.
stop
()
perfstat_filepath
=
os
.
environ
[
'FLAGS_static_executor_perfstat_filepath'
]
self
.
assertTrue
(
os
.
path
.
exists
(
perfstat_filepath
))
with
open
(
perfstat_filepath
,
'r'
)
as
load_f
:
stat_res
=
json
.
load
(
load_f
)
self
.
assertTrue
(
len
(
stat_res
)
>
0
)
os
.
remove
(
perfstat_filepath
)
shutil
.
rmtree
(
'./profiler_log'
)
def
test_parallel_executor_statistics
(
self
):
if
os
.
getenv
(
"FLAGS_static_executor_perfstat_filepath"
)
is
None
:
return
paddle
.
seed
(
2020
)
main_program
,
startup_program
,
fetch_list
=
build_program
()
fetch_list
=
[
x
.
name
for
x
in
fetch_list
]
main_program
=
paddle
.
fluid
.
compiler
.
CompiledProgram
(
main_program
)
os
.
environ
[
'FLAGS_USE_STANDALONE_EXECUTOR'
]
=
'0'
executor
=
paddle
.
static
.
Executor
(
self
.
place
)
os
.
environ
[
'FLAGS_USE_STANDALONE_EXECUTOR'
]
=
'1'
executor
.
run
(
startup_program
)
helper_profiler
=
profiler
.
Profiler
(
targets
=
[
profiler
.
ProfilerTarget
.
CPU
],
scheduler
=
(
1
,
2
))
helper_profiler
.
start
()
for
i
in
range
(
self
.
iter_n
):
executor
.
run
(
main_program
,
fetch_list
=
fetch_list
)
helper_profiler
.
step
()
helper_profiler
.
stop
()
perfstat_filepath
=
os
.
environ
[
'FLAGS_static_executor_perfstat_filepath'
]
self
.
assertTrue
(
os
.
path
.
exists
(
perfstat_filepath
))
with
open
(
perfstat_filepath
,
'r'
)
as
load_f
:
stat_res
=
json
.
load
(
load_f
)
self
.
assertTrue
(
len
(
stat_res
)
>
0
)
os
.
remove
(
perfstat_filepath
)
shutil
.
rmtree
(
'./profiler_log'
)
def
test_executor_statistics
(
self
):
if
os
.
getenv
(
"FLAGS_static_executor_perfstat_filepath"
)
is
None
:
return
paddle
.
seed
(
2020
)
main_program
,
startup_program
,
fetch_list
=
build_program
()
fetch_list
=
[
x
.
name
for
x
in
fetch_list
]
os
.
environ
[
'FLAGS_USE_STANDALONE_EXECUTOR'
]
=
'0'
executor
=
paddle
.
static
.
Executor
(
self
.
place
)
os
.
environ
[
'FLAGS_USE_STANDALONE_EXECUTOR'
]
=
'1'
executor
.
run
(
startup_program
)
helper_profiler
=
profiler
.
Profiler
(
targets
=
[
profiler
.
ProfilerTarget
.
CPU
],
scheduler
=
(
1
,
2
))
helper_profiler
.
start
()
for
i
in
range
(
self
.
iter_n
):
executor
.
run
(
main_program
,
fetch_list
=
fetch_list
)
helper_profiler
.
step
()
helper_profiler
.
stop
()
perfstat_filepath
=
os
.
environ
[
'FLAGS_static_executor_perfstat_filepath'
]
self
.
assertTrue
(
os
.
path
.
exists
(
perfstat_filepath
))
with
open
(
perfstat_filepath
,
'r'
)
as
load_f
:
stat_res
=
json
.
load
(
load_f
)
self
.
assertTrue
(
len
(
stat_res
)
>
0
)
os
.
remove
(
perfstat_filepath
)
shutil
.
rmtree
(
'./profiler_log'
)
class
MultiStreamModelTestCase
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
iter_n
=
2
...
...
@@ -155,6 +259,7 @@ class MultiStreamModelTestCase(unittest.TestCase):
p
.
set_place
(
self
.
place
)
inter_core
=
StandaloneExecutor
(
p
,
startup_program
.
desc
,
main_program
.
desc
,
core
.
Scope
())
outs
=
[]
for
i
in
range
(
self
.
iter_n
):
outs
.
append
(
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录