Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
dca9941e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dca9941e
编写于
2月 12, 2018
作者:
T
typhoonzero
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
pass size when copy
上级
67d6f3a8
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
12 addition
and
8 deletion
+12
-8
paddle/fluid/operators/concat_op.h
paddle/fluid/operators/concat_op.h
+2
-2
paddle/fluid/operators/split_op.h
paddle/fluid/operators/split_op.h
+1
-1
paddle/fluid/operators/strided_memcpy.h
paddle/fluid/operators/strided_memcpy.h
+4
-5
python/paddle/v2/fluid/distribute_transpiler.py
python/paddle/v2/fluid/distribute_transpiler.py
+5
-0
未找到文件。
paddle/fluid/operators/concat_op.h
浏览文件 @
dca9941e
...
...
@@ -38,7 +38,7 @@ class ConcatKernel : public framework::OpKernel<T> {
auto
in_stride
=
framework
::
stride_numel
(
in
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
()
+
output_offset
,
out_stride
,
in
->
data
<
T
>
(),
in_stride
);
in
->
data
<
T
>
(),
in_stride
,
in_stride
[
axis
]
);
output_offset
+=
in_stride
[
axis
];
}
}
...
...
@@ -59,7 +59,7 @@ class ConcatGradKernel : public framework::OpKernel<T> {
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
);
in_stride
,
out_stride
[
axis
]
);
input_offset
+=
out_stride
[
axis
];
}
}
...
...
paddle/fluid/operators/split_op.h
浏览文件 @
dca9941e
...
...
@@ -38,7 +38,7 @@ class SplitOpKernel : public framework::OpKernel<T> {
auto
out_stride
=
framework
::
stride_numel
(
out
->
dims
());
StridedNumelCopyWithAxis
<
T
>
(
ctx
.
device_context
(),
axis
,
out
->
data
<
T
>
(),
out_stride
,
in
->
data
<
T
>
()
+
input_offset
,
in_stride
);
in_stride
,
out_stride
[
axis
]
);
input_offset
+=
out_stride
[
axis
];
}
}
...
...
paddle/fluid/operators/strided_memcpy.h
浏览文件 @
dca9941e
...
...
@@ -54,11 +54,11 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx,
int64_t
axis
,
T
*
dst
,
const
framework
::
DDim
&
dst_stride_numel
,
const
T
*
src
,
const
framework
::
DDim
&
src_stride_numel
)
{
const
framework
::
DDim
&
src_stride_numel
,
int64_t
size
)
{
int64_t
before
=
dst_stride_numel
[
0
]
/
dst_stride_numel
[
axis
];
int64_t
src_after
=
src_stride_numel
[
axis
];
int64_t
dst_after
=
dst_stride_numel
[
axis
];
int64_t
copy_size
=
std
::
min
(
src_after
,
dst_after
);
auto
place
=
ctx
.
GetPlace
();
PADDLE_ENFORCE_EQ
(
src_stride_numel
.
size
(),
dst_stride_numel
.
size
(),
...
...
@@ -83,15 +83,14 @@ inline void StridedNumelCopyWithAxis(const platform::DeviceContext& ctx,
if
(
platform
::
is_cpu_place
(
place
))
{
auto
&
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
place
);
memory
::
Copy
(
cpu_place
,
dst
+
i
*
dst_after
,
cpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
copy_
size
);
src
+
i
*
src_after
,
sizeof
(
T
)
*
size
);
}
else
{
#ifdef PADDLE_WITH_CUDA
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place
);
auto
&
cuda_ctx
=
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
ctx
);
memory
::
Copy
(
gpu_place
,
dst
+
i
*
dst_after
,
gpu_place
,
src
+
i
*
src_after
,
sizeof
(
T
)
*
copy_size
,
cuda_ctx
.
stream
());
src
+
i
*
src_after
,
sizeof
(
T
)
*
size
,
cuda_ctx
.
stream
());
#else
PADDLE_THROW
(
"Paddle is not compiled with GPU"
);
#endif
...
...
python/paddle/v2/fluid/distribute_transpiler.py
浏览文件 @
dca9941e
...
...
@@ -121,6 +121,7 @@ def split_dense_variable(var_list,
block_size
+=
dim1
-
remains
# update split_count after aligning
split_count
=
int
(
math
.
ceil
(
var_numel
/
float
(
block_size
)))
print
(
"###split var "
,
var
.
name
,
var
.
shape
,
block_size
,
split_count
)
for
block_id
in
xrange
(
split_count
):
curr_block_size
=
min
(
block_size
,
var_numel
-
(
(
block_id
)
*
block_size
))
...
...
@@ -255,6 +256,7 @@ class DistributeTranspiler:
splited_shape
=
[
rows
]
if
len
(
orig_shape
)
>=
2
:
splited_shape
.
extend
(
orig_shape
[
1
:])
print
(
"###splited: "
,
size
,
rows
,
splited_shape
)
var
=
program
.
global_block
().
create_var
(
name
=
"%s.block%d"
%
(
varname
,
i
),
psersistable
=
False
,
...
...
@@ -262,6 +264,7 @@ class DistributeTranspiler:
type
=
orig_var
.
type
,
shape
=
splited_shape
)
# flattend splited var
var_mapping
[
varname
].
append
(
var
)
print
(
"###created split var "
,
var
)
return
var_mapping
def
_clone_var
(
self
,
block
,
var
):
...
...
@@ -528,6 +531,8 @@ class DistributeTranspiler:
"""
# step5
pserver_program
=
Program
()
print
(
"param mapping on pserver: #### "
,
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
])
for
v
in
self
.
param_grad_ep_mapping
[
endpoint
][
"params"
]:
self
.
_clone_var
(
pserver_program
.
global_block
(),
v
)
for
v
in
self
.
param_grad_ep_mapping
[
endpoint
][
"grads"
]:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录