Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
4e91d8d2
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4e91d8d2
编写于
1月 22, 2019
作者:
W
WangZhen
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into graph_quantization
test=develop
上级
3b668c15
b3fdf708
变更
3
显示空白变更内容
内联
并排
Showing
3 changed file
with
26 addition
and
27 deletion
+26
-27
paddle/fluid/operators/distributed/CMakeLists.txt
paddle/fluid/operators/distributed/CMakeLists.txt
+1
-1
paddle/fluid/operators/group_norm_op.cu
paddle/fluid/operators/group_norm_op.cu
+8
-8
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+17
-18
未找到文件。
paddle/fluid/operators/distributed/CMakeLists.txt
浏览文件 @
4e91d8d2
...
...
@@ -37,7 +37,7 @@ else()
variable_response.cc
collective_client.cc collective_server.cc
${
BRPC_SRCS
}
PROTO
${
CMAKE_CURRENT_BINARY_DIR
}
/
send_recv.proto
PROTO send_recv.proto
DEPS lod_tensor selected_rows memory
)
set
(
RPC_DEPS sendrecvop_rpc brpc ssl crypto protobuf leveldb snappystream snappy zlib
)
...
...
paddle/fluid/operators/group_norm_op.cu
浏览文件 @
4e91d8d2
...
...
@@ -21,20 +21,20 @@ namespace operators {
enum
GroupNormKernelFlags
{
kHasScale
=
1
,
kHasBias
=
2
};
#define CHECK_CASE(i, flags, kernel_name,
args...)
\
#define CHECK_CASE(i, flags, kernel_name,
...)
\
if (i == flags) { \
kernel_name<T, i><<<grid, threads, 0, dev_ctx.stream()>>>(
args
); \
kernel_name<T, i><<<grid, threads, 0, dev_ctx.stream()>>>(
__VA_ARGS__
); \
}
// 0 for no scale, no bias
// 1 for has scale, no bias
// 2 for no scale, has bias
// 3 for has scale, has bias
#define UNROLL_ALL_CASES(flags, kernel_name,
args
...) \
CHECK_CASE(0, flags, kernel_name,
args)
\
CHECK_CASE(1, flags, kernel_name,
args)
\
CHECK_CASE(2, flags, kernel_name,
args)
\
CHECK_CASE(3, flags, kernel_name,
args
)
#define UNROLL_ALL_CASES(flags, kernel_name, ...) \
CHECK_CASE(0, flags, kernel_name,
__VA_ARGS__)
\
CHECK_CASE(1, flags, kernel_name,
__VA_ARGS__)
\
CHECK_CASE(2, flags, kernel_name,
__VA_ARGS__)
\
CHECK_CASE(3, flags, kernel_name,
__VA_ARGS__
)
template
<
typename
T
>
__device__
__inline__
void
CudaAtomicAddWithWarp
(
T
*
sum
,
T
value
)
{
...
...
python/paddle/fluid/executor.py
浏览文件 @
4e91d8d2
...
...
@@ -305,7 +305,9 @@ class Executor(object):
def
__init__
(
self
,
place
):
self
.
place
=
place
self
.
program_caches
=
dict
()
self
.
executor
=
None
p
=
core
.
Place
()
p
.
set_place
(
self
.
place
)
self
.
_default_executor
=
core
.
Executor
(
p
)
self
.
_closed
=
False
def
_get_program_cache
(
self
,
program_cache_key
):
...
...
@@ -397,12 +399,13 @@ class Executor(object):
>>> ...
>>> exe.close()
"""
if
not
self
.
_closed
and
self
.
executor
:
self
.
executor
.
close
()
if
not
self
.
_closed
:
self
.
_default_
executor
.
close
()
self
.
_closed
=
True
def
_run_parallel
(
self
,
program
,
scope
,
feed
,
fetch_list
,
fetch_var_name
,
return_numpy
):
exe
=
program
.
_executor
if
isinstance
(
feed
,
dict
):
feed_tensor_dict
=
dict
()
for
feed_name
in
feed
:
...
...
@@ -414,8 +417,7 @@ class Executor(object):
feed_tensor
.
set
(
feed
[
feed_name
],
core
.
CPUPlace
())
feed_tensor_dict
[
feed_name
]
=
feed_tensor
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
exe
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
if
len
(
feed
)
!=
len
(
program
.
_places
):
raise
ValueError
(
...
...
@@ -436,10 +438,10 @@ class Executor(object):
tensor
=
tmp
res_dict
[
feed_name
]
=
tensor
res
.
append
(
res_dict
)
self
.
executor
.
feed_tensors_into_local_scopes
(
res
)
exe
.
feed_tensors_into_local_scopes
(
res
)
fetch_var_names
=
list
(
map
(
_to_name_str
,
fetch_list
))
self
.
executor
.
run
(
fetch_var_names
,
fetch_var_name
)
exe
.
run
(
fetch_var_names
,
fetch_var_name
)
arr
=
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
if
return_numpy
:
...
...
@@ -511,12 +513,9 @@ class Executor(object):
compiled
=
isinstance
(
program
,
compiler
.
CompiledProgram
)
# For backward compatibility, run directly.
if
not
compiled
:
if
not
self
.
executor
:
p
=
core
.
Place
()
p
.
set_place
(
self
.
place
)
self
.
executor
=
core
.
Executor
(
p
)
return
self
.
_run
(
program
,
self
.
_default_executor
,
feed
=
feed
,
fetch_list
=
fetch_list
,
feed_var_name
=
feed_var_name
,
...
...
@@ -526,7 +525,6 @@ class Executor(object):
use_program_cache
=
use_program_cache
)
program
.
_compile
(
scope
,
self
.
place
)
self
.
executor
=
program
.
_executor
if
program
.
_is_data_parallel
:
return
self
.
_run_parallel
(
program
,
...
...
@@ -536,12 +534,13 @@ class Executor(object):
fetch_var_name
=
fetch_var_name
,
return_numpy
=
return_numpy
)
elif
program
.
_is_inference
:
return
self
.
_run_inference
(
program
,
feed
)
return
self
.
_run_inference
(
program
.
_executor
,
feed
)
else
:
# TODO(panyx0718): Can compile program to optimize executor
# performance.
return
self
.
_run
(
program
.
_program
,
self
.
_default_executor
,
feed
=
feed
,
fetch_list
=
fetch_list
,
feed_var_name
=
feed_var_name
,
...
...
@@ -550,8 +549,8 @@ class Executor(object):
return_numpy
=
return_numpy
,
use_program_cache
=
use_program_cache
)
def
_run
(
self
,
program
,
feed
,
fetch_list
,
feed_var_name
,
fetch
_var_name
,
scope
,
return_numpy
,
use_program_cache
):
def
_run
(
self
,
program
,
exe
,
feed
,
fetch_list
,
feed
_var_name
,
fetch_var_name
,
scope
,
return_numpy
,
use_program_cache
):
if
feed
is
None
:
feed
=
{}
...
...
@@ -589,11 +588,11 @@ class Executor(object):
fetch_var_name
=
fetch_var_name
)
self
.
_feed_data
(
program
,
feed
,
feed_var_name
,
scope
)
self
.
executor
.
run
(
program
.
desc
,
scope
,
0
,
True
,
True
)
exe
.
run
(
program
.
desc
,
scope
,
0
,
True
,
True
)
outs
=
self
.
_fetch_data
(
fetch_list
,
fetch_var_name
,
scope
)
if
return_numpy
:
outs
=
as_numpy
(
outs
)
return
outs
def
_run_inference
(
self
,
program
,
feed
):
return
self
.
executor
.
run
(
feed
)
def
_run_inference
(
self
,
exe
,
feed
):
return
exe
.
run
(
feed
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录