Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9ca8124f
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9ca8124f
编写于
5月 09, 2018
作者:
Y
Yao Cheng
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into develop
yes
上级
8cbb49ce
8b1918f5
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
202 addition
and
39 deletion
+202
-39
doc/fluid/build_and_install/paddleci.png
doc/fluid/build_and_install/paddleci.png
+1
-0
doc/fluid/design/motivation/refactorization.md
doc/fluid/design/motivation/refactorization.md
+3
-3
doc/fluid/images/op.dot
doc/fluid/images/op.dot
+4
-0
doc/fluid/images/op_op_with_kern_class_diagram.dot
doc/fluid/images/op_op_with_kern_class_diagram.dot
+38
-0
doc/fluid/images/op_with_kernel.dot
doc/fluid/images/op_with_kernel.dot
+26
-0
doc/v2/api/config/layer.rst
doc/v2/api/config/layer.rst
+8
-3
paddle/fluid/framework/details/fetch_op_handle.cc
paddle/fluid/framework/details/fetch_op_handle.cc
+3
-1
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+3
-1
paddle/fluid/framework/details/send_op_handle.cc
paddle/fluid/framework/details/send_op_handle.cc
+3
-1
paddle/fluid/inference/tests/book/CMakeLists.txt
paddle/fluid/inference/tests/book/CMakeLists.txt
+1
-1
paddle/fluid/operators/conv_op.h
paddle/fluid/operators/conv_op.h
+6
-4
paddle/fluid/operators/conv_transpose_op.h
paddle/fluid/operators/conv_transpose_op.h
+6
-3
paddle/fluid/platform/cuda_device_function.h
paddle/fluid/platform/cuda_device_function.h
+1
-0
paddle/fluid/platform/profiler.cc
paddle/fluid/platform/profiler.cc
+1
-1
python/paddle/fluid/tests/book/notest_understand_sentiment.py
...on/paddle/fluid/tests/book/notest_understand_sentiment.py
+0
-0
python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
...id/tests/unittests/test_memory_optimization_transpiler.py
+1
-1
python/paddle/fluid/tests/unittests/test_parallel_executor.py
...on/paddle/fluid/tests/unittests/test_parallel_executor.py
+96
-19
python/paddle/fluid/tests/unittests/test_split_var.py
python/paddle/fluid/tests/unittests/test_split_var.py
+1
-1
未找到文件。
doc/fluid/build_and_install/paddleci.png
0 → 120000
浏览文件 @
9ca8124f
../../v2/build_and_install/paddleci.png
\ No newline at end of file
doc/fluid/design/motivation/refactorization.md
浏览文件 @
9ca8124f
...
@@ -125,12 +125,12 @@ Compile Time -> IR -> Runtime
...
@@ -125,12 +125,12 @@ Compile Time -> IR -> Runtime
## Operator/OpWithKernel/OpKernel
## Operator/OpWithKernel/OpKernel


---
---
## Operator
## Operator


*
`Operator`
is the fundamental building block of the user interface.
*
`Operator`
is the fundamental building block of the user interface.
*
Operator stores input/output variable names and attributes.
*
Operator stores input/output variable names and attributes.
...
@@ -141,7 +141,7 @@ Compile Time -> IR -> Runtime
...
@@ -141,7 +141,7 @@ Compile Time -> IR -> Runtime
## OpWithKernel/Kernel
## OpWithKernel/Kernel


*
`OpWithKernel`
inherits
`Operator`
.
*
`OpWithKernel`
inherits
`Operator`
.
*
`OpWithKernel`
contains a Kernel map.
*
`OpWithKernel`
contains a Kernel map.
...
...
doc/fluid/images/op.dot
0 → 100644
浏览文件 @
9ca8124f
digraph
sample
{
graph
[
rankdir
=
TD
]
;
node
[
shape
=
record
]
;
op
[
label
=
"{Operator| InferShape()=0\lRun()=0\l | map<string, string[]> inputs_\lmap<string, string[]> outputs_ \l AttributeMap attrs_\l}"
]
;
}
\ No newline at end of file
doc/fluid/images/op_op_with_kern_class_diagram.dot
0 → 100644
浏览文件 @
9ca8124f
digraph
sample
{
graph
[
rankdir
=
TD
]
;
node
[
shape
=
record
]
;
op
[
label
=
"{Operator| InferShape()=0\lRun()=0\l | map<string, string[]> inputs_\lmap<string, string[]> outputs_ \l AttributeMap attrs_\l}"
]
;
op_with_kern
[
label
=
"{OpWithKernel | InferShape()=0\lRun()\l | map<OpKernelKey,OpKernel>kernels_ }"
]
op_kernel
[
label
=
"{OpKernel | Compute()=0}"
]
op_kernel_key
[
label
=
"{OpKernelKey| Place place\n...}"
]
op
->
op_with_kern
[
dir
=
back
,
arrowtail
=
onormal
]
op_with_kern
->
op_kernel
[
arrowhead
=
vee
,
label
=
"contains many"
]
{
rank
=
same
;
op_with_kern
op_kernel
}
op_kernel
->
op_kernel_key
[
style
=
invis
]
{
rank
=
same
;
op_kernel
op_kernel_key
}
op_with_kern
->
op_kernel_key
[
arrowhead
=
vee
,
label
=
"\nas map key"
]
mul_op
[
label
=
"MulOp"
]
op_with_kern
->
mul_op
[
dir
=
back
,
arrowtail
=
onormal
]
mul_kernel
[
label
=
"template <typename Place>\lclass MulOpKernel\l"
]
op_kernel
->
mul_kernel
[
dir
=
back
,
arrowtail
=
onormal
]
mul_op
->
mul_kernel
[
arrowhead
=
vee
,
label
=
"register many"
]
{
rank
=
same
;
mul_op
;
mul_kernel
;
}
}
\ No newline at end of file
doc/fluid/images/op_with_kernel.dot
0 → 100644
浏览文件 @
9ca8124f
digraph
sample
{
graph
[
rankdir
=
TD
]
;
node
[
shape
=
record
]
;
op
[
label
=
"{Operator}"
]
;
op_with_kern
[
label
=
"{OpWithKernel | InferShape()=0\lRun()\l | map<OpKernelKey,OpKernel>kernels_ }"
]
op_kernel
[
label
=
"{OpKernel | Compute()=0}"
]
op_kernel_key
[
label
=
"{OpKernelKey| Place place\n...}"
]
op
->
op_with_kern
[
dir
=
back
,
arrowtail
=
onormal
]
op_with_kern
->
op_kernel
[
arrowhead
=
vee
,
label
=
"contains many"
]
{
rank
=
same
;
op_with_kern
op_kernel
}
op_kernel
->
op_kernel_key
[
style
=
invis
]
{
rank
=
same
;
op_kernel
op_kernel_key
}
op_with_kern
->
op_kernel_key
[
arrowhead
=
vee
,
label
=
"\nas map key"
]
}
\ No newline at end of file
doc/v2/api/config/layer.rst
浏览文件 @
9ca8124f
...
@@ -142,7 +142,7 @@ gated_unit
...
@@ -142,7 +142,7 @@ gated_unit
-----------
-----------
.. autoclass:: paddle.v2.layer.gated_unit
.. autoclass:: paddle.v2.layer.gated_unit
:noindex:
:noindex:
Recurrent Layer Group
Recurrent Layer Group
=====================
=====================
...
@@ -354,7 +354,7 @@ dropout
...
@@ -354,7 +354,7 @@ dropout
--------
--------
.. autoclass:: paddle.v2.layer.dropout
.. autoclass:: paddle.v2.layer.dropout
:noindex:
:noindex:
dot_prod
dot_prod
---------
---------
.. autoclass:: paddle.v2.layer.dot_prod
.. autoclass:: paddle.v2.layer.dot_prod
...
@@ -460,6 +460,11 @@ multi_binary_label_cross_entropy_cost
...
@@ -460,6 +460,11 @@ multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex:
:noindex:
classification_cost
-------------------
.. autoclass:: paddle.v2.layer.classification_cost
:noindex:
huber_regression_cost
huber_regression_cost
-------------------------
-------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost
.. autoclass:: paddle.v2.layer.huber_regression_cost
...
@@ -534,7 +539,7 @@ detection_output
...
@@ -534,7 +539,7 @@ detection_output
----------------
----------------
.. autoclass:: paddle.v2.layer.detection_output
.. autoclass:: paddle.v2.layer.detection_output
:noindex:
:noindex:
Check Layer
Check Layer
============
============
...
...
paddle/fluid/framework/details/fetch_op_handle.cc
浏览文件 @
9ca8124f
...
@@ -49,7 +49,9 @@ void FetchOpHandle::RunImpl() {
...
@@ -49,7 +49,9 @@ void FetchOpHandle::RunImpl() {
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
platform
::
DeviceContextPool
::
Instance
().
Get
(
platform
::
CPUPlace
());
for
(
auto
*
input
:
inputs_
)
{
for
(
auto
*
input
:
inputs_
)
{
auto
*
var
=
static_cast
<
VarHandle
*>
(
input
);
auto
*
var
=
static_cast
<
VarHandle
*>
(
input
);
var
->
generated_op_
->
Wait
(
cpu_ctx
);
if
(
var
->
generated_op_
)
{
var
->
generated_op_
->
Wait
(
cpu_ctx
);
}
}
}
tensors_
.
resize
(
inputs_
.
size
());
tensors_
.
resize
(
inputs_
.
size
());
auto
*
var_handle
=
static_cast
<
VarHandle
*>
(
inputs_
[
0
]);
auto
*
var_handle
=
static_cast
<
VarHandle
*>
(
inputs_
[
0
]);
...
...
paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
浏览文件 @
9ca8124f
...
@@ -36,7 +36,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
...
@@ -36,7 +36,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
// Wait input done
// Wait input done
for
(
auto
*
in
:
inputs_
)
{
for
(
auto
*
in
:
inputs_
)
{
auto
&
p
=
static_cast
<
VarHandle
*>
(
in
)
->
place_
;
auto
&
p
=
static_cast
<
VarHandle
*>
(
in
)
->
place_
;
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
if
(
in
->
generated_op_
)
{
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
}
}
}
auto
&
var_name
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
])
->
name_
;
auto
&
var_name
=
static_cast
<
VarHandle
*>
(
this
->
inputs_
[
0
])
->
name_
;
...
...
paddle/fluid/framework/details/send_op_handle.cc
浏览文件 @
9ca8124f
...
@@ -32,7 +32,9 @@ void SendOpHandle::RunImpl() {
...
@@ -32,7 +32,9 @@ void SendOpHandle::RunImpl() {
if
(
in
->
DebugString
()
==
"dummy"
)
{
// HACK
if
(
in
->
DebugString
()
==
"dummy"
)
{
// HACK
continue
;
continue
;
}
}
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
if
(
in
->
generated_op_
)
{
in
->
generated_op_
->
Wait
(
dev_ctxes_
[
p
]);
}
}
}
auto
&
tmp_scope
=
local_scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
auto
&
tmp_scope
=
local_scope_
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
();
// FIXME(wuyi): can not use RunAndRecordEvent here, for it will cause dead
// FIXME(wuyi): can not use RunAndRecordEvent here, for it will cause dead
...
...
paddle/fluid/inference/tests/book/CMakeLists.txt
浏览文件 @
9ca8124f
...
@@ -36,5 +36,5 @@ inference_test(label_semantic_roles)
...
@@ -36,5 +36,5 @@ inference_test(label_semantic_roles)
inference_test
(
recognize_digits ARGS mlp conv
)
inference_test
(
recognize_digits ARGS mlp conv
)
inference_test
(
recommender_system
)
inference_test
(
recommender_system
)
#inference_test(rnn_encoder_decoder)
#inference_test(rnn_encoder_decoder)
inference_test
(
understand_sentiment ARGS conv
)
#
inference_test(understand_sentiment ARGS conv)
inference_test
(
word2vec
)
inference_test
(
word2vec
)
paddle/fluid/operators/conv_op.h
浏览文件 @
9ca8124f
...
@@ -187,7 +187,8 @@ class GemmConvKernel : public framework::OpKernel<T> {
...
@@ -187,7 +187,8 @@ class GemmConvKernel : public framework::OpKernel<T> {
// gemm
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
blas
.
MatMul
(
filter_slice
,
col_matrix
,
&
out_slice
);
blas
.
MatMul
(
filter_slice
,
false
,
col_matrix
,
false
,
T
(
1.0
),
&
out_slice
,
T
(
0.0
));
}
}
}
}
}
}
...
@@ -304,7 +305,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -304,7 +305,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
col_matrix
.
ShareDataWith
(
in_grad_slice
);
col_matrix
.
ShareDataWith
(
in_grad_slice
);
col_matrix
.
Resize
(
col_matrix_shape
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
}
blas
.
MatMul
(
filter_slice
,
true
,
out_grad_slice
,
false
,
&
col_matrix
);
blas
.
MatMul
(
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
if
(
is_expand
&&
data_dim
==
2U
)
{
if
(
is_expand
&&
data_dim
==
2U
)
{
col2im
(
dev_ctx
,
col
,
dilations
,
strides
,
col2im
(
dev_ctx
,
col
,
dilations
,
strides
,
...
@@ -351,8 +353,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
...
@@ -351,8 +353,8 @@ class GemmConvGradKernel : public framework::OpKernel<T> {
// gemm
// gemm
Tensor
filter_grad_slice
=
Tensor
filter_grad_slice
=
filter_grad_
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
filter_grad_
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
blas
.
MatMul
(
out_grad_slice
,
false
,
col_matrix
,
true
,
blas
.
MatMul
(
out_grad_slice
,
false
,
col_matrix
,
true
,
T
(
1.0
),
&
filter_grad_slice
);
&
filter_grad_slice
,
T
(
1.0
)
);
}
}
}
}
}
}
...
...
paddle/fluid/operators/conv_transpose_op.h
浏览文件 @
9ca8124f
...
@@ -135,7 +135,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
...
@@ -135,7 +135,8 @@ class GemmConvTransposeKernel : public framework::OpKernel<T> {
// col_matrix = filter * input_batch
// col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
// of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
blas
.
MatMul
(
filter
,
true
,
input_batch
,
false
,
&
col_matrix
);
blas
.
MatMul
(
filter
,
true
,
input_batch
,
false
,
static_cast
<
T
>
(
1.0
),
&
col_matrix
,
static_cast
<
T
>
(
0.0
));
if
(
data_dim
==
2U
)
{
if
(
data_dim
==
2U
)
{
// col2im: col_matrix -> dy
// col2im: col_matrix -> dy
...
@@ -267,7 +268,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -267,7 +268,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// or
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
// d, h, w)
blas
.
MatMul
(
filter
,
false
,
col_matrix
,
false
,
&
input_grad_batch
);
blas
.
MatMul
(
filter
,
false
,
col_matrix
,
false
,
static_cast
<
T
>
(
1.0
),
&
input_grad_batch
,
static_cast
<
T
>
(
0.0
));
}
}
if
(
filter_grad
)
{
if
(
filter_grad
)
{
// input batch
// input batch
...
@@ -277,7 +279,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
...
@@ -277,7 +279,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel<T> {
// or
// or
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
// k_h * k_w)
blas
.
MatMul
(
in_batch
,
false
,
col_matrix
,
true
,
&
filter_grad_
);
blas
.
MatMul
(
in_batch
,
false
,
col_matrix
,
true
,
static_cast
<
T
>
(
1.0
),
&
filter_grad_
,
static_cast
<
T
>
(
1.0
));
}
}
}
}
}
}
...
...
paddle/fluid/platform/cuda_device_function.h
浏览文件 @
9ca8124f
...
@@ -63,6 +63,7 @@ __device__ T reduceSum(T val, int tid, int len) {
...
@@ -63,6 +63,7 @@ __device__ T reduceSum(T val, int tid, int len) {
val
+=
platform
::
CudaShuffleDownSync
(
mask
,
val
,
offset
);
val
+=
platform
::
CudaShuffleDownSync
(
mask
,
val
,
offset
);
if
(
tid
<
warpSize
)
shm
[
tid
]
=
0
;
if
(
tid
<
warpSize
)
shm
[
tid
]
=
0
;
__syncthreads
();
if
(
tid
%
warpSize
==
0
)
{
if
(
tid
%
warpSize
==
0
)
{
shm
[
tid
/
warpSize
]
=
val
;
shm
[
tid
/
warpSize
]
=
val
;
...
...
paddle/fluid/platform/profiler.cc
浏览文件 @
9ca8124f
...
@@ -463,7 +463,7 @@ void SetProfileListener() {
...
@@ -463,7 +463,7 @@ void SetProfileListener() {
std
::
mt19937
rng
;
std
::
mt19937
rng
;
rng
.
seed
(
std
::
random_device
()());
rng
.
seed
(
std
::
random_device
()());
std
::
uniform_int_distribution
<
std
::
mt19937
::
result_type
>
dist6
(
std
::
uniform_int_distribution
<
std
::
mt19937
::
result_type
>
dist6
(
1
,
std
::
numeric_limits
<
int64_t
>::
max
());
1
,
std
::
numeric_limits
<
std
::
mt19937
::
result_type
>::
max
());
profiler_lister_id
=
dist6
(
rng
);
profiler_lister_id
=
dist6
(
rng
);
}
}
int64_t
ListenerId
()
{
return
profiler_lister_id
;
}
int64_t
ListenerId
()
{
return
profiler_lister_id
;
}
...
...
python/paddle/fluid/tests/book/test_understand_sentiment.py
→
python/paddle/fluid/tests/book/
no
test_understand_sentiment.py
浏览文件 @
9ca8124f
文件已移动
python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
浏览文件 @
9ca8124f
...
@@ -18,7 +18,7 @@ import unittest
...
@@ -18,7 +18,7 @@ import unittest
import
paddle.fluid.layers
as
layers
import
paddle.fluid.layers
as
layers
import
paddle.fluid.optimizer
as
optimizer
import
paddle.fluid.optimizer
as
optimizer
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.
memory_optimization_
transpiler
import
memory_optimize
from
paddle.fluid.transpiler
import
memory_optimize
class
TestControlFlowGraph
(
unittest
.
TestCase
):
class
TestControlFlowGraph
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor.py
浏览文件 @
9ca8124f
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
numpy
import
numpy
as
np
import
unittest
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
@@ -243,7 +243,7 @@ class TestParallelExecutorBase(unittest.TestCase):
...
@@ -243,7 +243,7 @@ class TestParallelExecutorBase(unittest.TestCase):
begin
=
time
.
time
()
begin
=
time
.
time
()
first_loss
,
=
run_executor
(
first_loss
,
=
run_executor
(
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[
loss
.
name
])
first_loss
=
n
umpy
.
array
(
first_loss
)
first_loss
=
n
p
.
array
(
first_loss
)
for
i
in
xrange
(
iter
):
for
i
in
xrange
(
iter
):
run_executor
(
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[])
run_executor
(
exe
=
exe
,
feed
=
feed_dict
,
fetch_list
=
[])
...
@@ -256,7 +256,7 @@ class TestParallelExecutorBase(unittest.TestCase):
...
@@ -256,7 +256,7 @@ class TestParallelExecutorBase(unittest.TestCase):
print
"%.4f Instance per second"
%
(
print
"%.4f Instance per second"
%
(
(
batch_size
*
iter
+
2
)
/
(
end
-
begin
))
(
batch_size
*
iter
+
2
)
/
(
end
-
begin
))
last_loss
=
n
umpy
.
array
(
last_loss
)
last_loss
=
n
p
.
array
(
last_loss
)
print
first_loss
,
last_loss
print
first_loss
,
last_loss
# self.assertGreater(first_loss[0], last_loss[0])
# self.assertGreater(first_loss[0], last_loss[0])
...
@@ -284,8 +284,8 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -284,8 +284,8 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
self
.
check_network_convergence
(
simple_fc_net
,
allow_op_delay
=
True
)
img
=
n
umpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
img
=
n
p
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
umpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
label
=
n
p
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
self
.
check_network_convergence
(
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
"label"
:
label
})
...
@@ -294,8 +294,8 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -294,8 +294,8 @@ class TestMNIST(TestParallelExecutorBase):
self
.
check_simple_fc_convergence
()
self
.
check_simple_fc_convergence
()
def
check_simple_fc_parallel_accuracy
(
self
):
def
check_simple_fc_parallel_accuracy
(
self
):
img
=
n
umpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
img
=
n
p
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
umpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
label
=
n
p
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
single_first_loss
,
single_last_loss
=
self
.
check_network_convergence
(
method
=
simple_fc_net
,
method
=
simple_fc_net
,
seed
=
1000
,
seed
=
1000
,
...
@@ -319,8 +319,8 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -319,8 +319,8 @@ class TestMNIST(TestParallelExecutorBase):
def
check_batchnorm_fc_convergence
(
self
):
def
check_batchnorm_fc_convergence
(
self
):
self
.
check_network_convergence
(
fc_with_batchnorm
)
self
.
check_network_convergence
(
fc_with_batchnorm
)
img
=
n
umpy
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
img
=
n
p
.
zeros
(
shape
=
[
32
,
784
],
dtype
=
'float32'
)
label
=
n
umpy
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
label
=
n
p
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
self
.
check_network_convergence
(
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
})
"label"
:
label
})
...
@@ -404,9 +404,6 @@ class ModelHyperParams(object):
...
@@ -404,9 +404,6 @@ class ModelHyperParams(object):
dropout
=
0.1
dropout
=
0.1
import
numpy
as
np
def
prepare_batch_input
(
insts
,
src_pad_idx
,
trg_pad_idx
,
n_head
):
def
prepare_batch_input
(
insts
,
src_pad_idx
,
trg_pad_idx
,
n_head
):
"""
"""
Pad the instances to the max sequence length in batch, and generate the
Pad the instances to the max sequence length in batch, and generate the
...
@@ -533,9 +530,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
...
@@ -533,9 +530,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
opt
.
minimize
(
loss
)
opt
.
minimize
(
loss
)
batch_size
=
32
batch_size
=
32
image
=
numpy
.
random
.
normal
(
size
=
(
batch_size
,
image
=
np
.
random
.
normal
(
size
=
(
batch_size
,
784
)).
astype
(
'float32'
)
784
)).
astype
(
'float32'
)
label
=
np
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
label
=
numpy
.
random
.
randint
(
0
,
10
,
(
batch_size
,
1
),
dtype
=
"int64"
)
place
=
fluid
.
CUDAPlace
(
0
)
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
...
@@ -552,12 +548,12 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
...
@@ -552,12 +548,12 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
for
i
in
xrange
(
5
):
for
i
in
xrange
(
5
):
test_loss
,
=
test_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
test_loss
,
=
test_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
test_loss
=
n
umpy
.
array
(
test_loss
)
test_loss
=
n
p
.
array
(
test_loss
)
train_loss
,
=
train_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
train_loss
,
=
train_exe
.
run
([
loss
.
name
],
feed
=
feed_dict
)
train_loss
=
n
umpy
.
array
(
train_loss
)
train_loss
=
n
p
.
array
(
train_loss
)
self
.
assertTrue
(
self
.
assertTrue
(
n
umpy
.
allclose
(
n
p
.
allclose
(
train_loss
,
test_loss
,
atol
=
1e-8
),
train_loss
,
test_loss
,
atol
=
1e-8
),
"Train loss: "
+
str
(
train_loss
)
+
"
\n
Test loss:"
+
"Train loss: "
+
str
(
train_loss
)
+
"
\n
Test loss:"
+
str
(
test_loss
))
str
(
test_loss
))
...
@@ -712,7 +708,7 @@ class TestCRFModel(unittest.TestCase):
...
@@ -712,7 +708,7 @@ class TestCRFModel(unittest.TestCase):
data
=
train_data
()
data
=
train_data
()
for
i
in
xrange
(
10
):
for
i
in
xrange
(
10
):
cur_batch
=
next
(
data
)
cur_batch
=
next
(
data
)
print
map
(
n
umpy
.
array
,
print
map
(
n
p
.
array
,
pe
.
run
(
feed
=
feeder
.
feed
(
cur_batch
),
pe
.
run
(
feed
=
feeder
.
feed
(
cur_batch
),
fetch_list
=
[
avg_cost
.
name
]))[
0
]
fetch_list
=
[
avg_cost
.
name
]))[
0
]
...
@@ -721,3 +717,84 @@ class TestCRFModel(unittest.TestCase):
...
@@ -721,3 +717,84 @@ class TestCRFModel(unittest.TestCase):
def
test_update_dense_parameter
(
self
):
def
test_update_dense_parameter
(
self
):
self
.
check_network_convergence
(
is_sparse
=
False
)
self
.
check_network_convergence
(
is_sparse
=
False
)
# test fetch all the variables of global_block
import
paddle.dataset.flowers
as
flowers
import
math
def
Lenet
(
data
,
class_dim
):
conv1
=
fluid
.
layers
.
conv2d
(
data
,
32
,
5
,
1
,
act
=
None
)
bn1
=
fluid
.
layers
.
batch_norm
(
conv1
,
act
=
'relu'
)
pool1
=
fluid
.
layers
.
pool2d
(
bn1
,
2
,
'max'
,
2
)
conv2
=
fluid
.
layers
.
conv2d
(
pool1
,
50
,
5
,
1
,
act
=
None
)
bn2
=
fluid
.
layers
.
batch_norm
(
conv2
,
act
=
'relu'
)
pool2
=
fluid
.
layers
.
pool2d
(
bn2
,
2
,
'max'
,
2
)
fc1
=
fluid
.
layers
.
fc
(
pool2
,
size
=
500
,
act
=
'relu'
)
fc2
=
fluid
.
layers
.
fc
(
fc1
,
size
=
class_dim
,
act
=
'softmax'
)
return
fc2
class
TestFetchOp
(
unittest
.
TestCase
):
def
parallel_exe
(
self
,
train_inputs
,
seed
):
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
.
random_seed
=
seed
with
fluid
.
program_guard
(
main
,
startup
):
data
=
fluid
.
layers
.
data
(
name
=
'image'
,
shape
=
[
3
,
224
,
224
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
out
=
Lenet
(
data
,
class_dim
=
102
)
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
out
,
label
=
label
)
loss
=
fluid
.
layers
.
mean
(
loss
)
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.1
,
momentum
=
0.9
,
regularization
=
fluid
.
regularizer
.
L2Decay
(
1e-4
))
opt
.
minimize
(
loss
)
# TODO(zcd): I found that onece the memory optimizer is open,
# parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
# conv2d_1.b_0@GRAD. Those variables should not be pruned.
# fluid.memory_optimize(main)
place
=
fluid
.
CUDAPlace
(
0
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
data
,
label
])
pe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
loss_name
=
loss
.
name
,
main_program
=
main
)
fetch_list
=
[]
all_vars
=
main
.
global_block
().
vars
for
k
,
v
in
all_vars
.
iteritems
():
if
'tmp'
not
in
k
and
k
[
0
]
is
not
'_'
or
v
.
persistable
:
fetch_list
.
append
(
k
)
for
data
in
train_inputs
:
ret
=
pe
.
run
(
fetch_list
,
feed
=
feeder
.
feed
(
data
))
for
i
in
range
(
len
(
fetch_list
)):
assert
not
math
.
isnan
(
np
.
sum
(
ret
[
i
]))
and
\
not
math
.
isinf
(
np
.
sum
(
ret
[
i
]))
def
test_update_sparse_parameter
(
self
):
tst_reader
=
paddle
.
batch
(
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
16
)
tst_reader_iter
=
tst_reader
()
iters
=
3
train_inputs
=
[]
for
i
in
range
(
iters
):
train_inputs
.
append
(
tst_reader_iter
.
next
())
self
.
parallel_exe
(
train_inputs
,
seed
=
1
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_split_var.py
浏览文件 @
9ca8124f
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
import
math
import
math
import
unittest
import
unittest
from
paddle.fluid.distribute_transpiler
import
split_dense_variable
from
paddle.fluid.
transpiler.
distribute_transpiler
import
split_dense_variable
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
random
import
random
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录