Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
018e2f3a
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
018e2f3a
编写于
7月 25, 2018
作者:
T
tangwei12
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'dis_ckpt_fix' of github.com:seiriosPlus/Paddle into dis_ckpt_fix
上级
74c5476f
04b1df2a
变更
52
隐藏空白更改
内联
并排
Showing
52 changed file
with
513 addition
and
487 deletion
+513
-487
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-20
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+3
-9
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+3
-9
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+1
-0
paddle/fluid/inference/tensorrt/convert/fc_op.cc
paddle/fluid/inference/tensorrt/convert/fc_op.cc
+6
-8
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
...le/fluid/inference/tensorrt/convert/test_activation_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
+7
-6
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
+1
-1
paddle/fluid/inference/tensorrt/convert/ut_helper.h
paddle/fluid/inference/tensorrt/convert/ut_helper.h
+5
-5
paddle/fluid/inference/tensorrt/engine.cc
paddle/fluid/inference/tensorrt/engine.cc
+39
-23
paddle/fluid/inference/tensorrt/engine.h
paddle/fluid/inference/tensorrt/engine.h
+4
-0
paddle/fluid/inference/tensorrt/test_engine.cc
paddle/fluid/inference/tensorrt/test_engine.cc
+37
-3
paddle/fluid/inference/tests/test_helper.h
paddle/fluid/inference/tests/test_helper.h
+6
-4
paddle/fluid/operators/distributed/CMakeLists.txt
paddle/fluid/operators/distributed/CMakeLists.txt
+1
-1
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+11
-28
paddle/fluid/operators/distributed/grpc_client.h
paddle/fluid/operators/distributed/grpc_client.h
+8
-9
paddle/fluid/operators/distributed/request_handler.h
paddle/fluid/operators/distributed/request_handler.h
+0
-2
paddle/fluid/operators/distributed/request_handler_impl.cc
paddle/fluid/operators/distributed/request_handler_impl.cc
+4
-7
paddle/fluid/operators/distributed/rpc_client.h
paddle/fluid/operators/distributed/rpc_client.h
+5
-9
paddle/fluid/operators/distributed/rpc_server.cc
paddle/fluid/operators/distributed/rpc_server.cc
+6
-12
paddle/fluid/operators/distributed/rpc_server.h
paddle/fluid/operators/distributed/rpc_server.h
+2
-3
paddle/fluid/operators/distributed/rpc_server_test.cc
paddle/fluid/operators/distributed/rpc_server_test.cc
+25
-4
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
+0
-17
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+1
-1
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+4
-3
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+3
-1
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+16
-16
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-4
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+21
-4
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+2
-3
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+0
-98
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+3
-11
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+52
-59
python/paddle/fluid/tests/demo/file_reader/.gitignore
python/paddle/fluid/tests/demo/file_reader/.gitignore
+0
-0
python/paddle/fluid/tests/demo/file_reader/convert_data_to_recordio.py
.../fluid/tests/demo/file_reader/convert_data_to_recordio.py
+2
-2
python/paddle/fluid/tests/demo/file_reader/train.py
python/paddle/fluid/tests/demo/file_reader/train.py
+138
-0
python/paddle/fluid/tests/test_error_clip.py
python/paddle/fluid/tests/test_error_clip.py
+1
-1
python/paddle/fluid/tests/test_if_else_op.py
python/paddle/fluid/tests/test_if_else_op.py
+8
-5
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+1
-1
python/paddle/fluid/tests/unittests/test_conditional_block.py
...on/paddle/fluid/tests/unittests/test_conditional_block.py
+3
-2
python/paddle/fluid/tests/unittests/test_const_value.py
python/paddle/fluid/tests/unittests/test_const_value.py
+1
-1
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
+11
-7
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
...dle/fluid/tests/unittests/test_learning_rate_scheduler.py
+7
-6
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
+2
-1
python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
...paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
+12
-7
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
...dle/fluid/tests/unittests/test_parallel_executor_mnist.py
+28
-57
python/paddle/fluid/tests/unittests/test_parallel_op.py
python/paddle/fluid/tests/unittests/test_parallel_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
...n/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
+2
-1
python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
...on/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
+7
-4
python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
...uid/tests/unittests/test_split_and_merge_lod_tensor_op.py
+6
-6
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+3
-3
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+1
-1
未找到文件。
paddle/fluid/API.spec
浏览文件 @
018e2f3a
paddle.fluid.Variable.__init__ ArgSpec(args=['self', 'block', 'type', 'name', 'shape', 'dtype', 'lod_level', 'capacity', 'persistable', 'error_clip', 'stop_gradient', 'is_data'], varargs=None, keywords='kwargs', defaults=(VarType.LOD_TENSOR, None, None, None, None, None, None, None, False, False))
paddle.fluid.Variable.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Variable.set_desc ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Variable.set_error_clip ArgSpec(args=['self', 'error_clip'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Variable.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.Program.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.__init__ ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.block ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.block ArgSpec(args=['self', 'index'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Program.clone ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.Program.clone ArgSpec(args=['self', 'for_test'], varargs=None, keywords=None, defaults=(False,))
...
@@ -33,8 +28,6 @@ paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=Non
...
@@ -33,8 +28,6 @@ paddle.fluid.Operator.set_attr ArgSpec(args=['self', 'name', 'val'], varargs=Non
paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Operator.to_string ArgSpec(args=['self', 'throw_on_error'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.__init__ ArgSpec(args=['self', 'block', 'shape', 'dtype'], varargs=None, keywords='kwargs', defaults=None)
paddle.fluid.Parameter.__init__ ArgSpec(args=['self', 'block', 'shape', 'dtype'], varargs=None, keywords='kwargs', defaults=None)
paddle.fluid.Parameter.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.astype ArgSpec(args=['self', 'dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.set_desc ArgSpec(args=['self', 'input'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.set_error_clip ArgSpec(args=['self', 'error_clip'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Parameter.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.Parameter.to_string ArgSpec(args=['self', 'throw_on_error', 'with_details'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_startup_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.default_main_program ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
...
@@ -42,8 +35,7 @@ paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', def
...
@@ -42,8 +35,7 @@ paddle.fluid.program_guard ArgSpec(args=[], varargs='args', keywords='kwds', def
paddle.fluid.get_var ArgSpec(args=['name', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.get_var ArgSpec(args=['name', 'program'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.__init__ ArgSpec(args=['self', 'place'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.as_lodtensor ArgSpec(args=['self', 'data'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.as_lodtensor ArgSpec(args=['self', 'data'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.begin_pass ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.close ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.end_pass ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list', 'feed_var_name', 'fetch_var_name', 'scope', 'return_numpy', 'use_program_cache'], varargs=None, keywords=None, defaults=(None, None, None, 'feed', 'fetch', None, True, False))
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
...
@@ -207,31 +199,23 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
...
@@ -207,31 +199,23 @@ paddle.fluid.layers.argsort ArgSpec(args=['input', 'axis', 'name'], varargs=None
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.ones ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.zeros ArgSpec(args=['shape', 'dtype', 'force_cpu'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.reverse ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.split_lod_tensor ArgSpec(args=['input', 'mask', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.merge_lod_tensor ArgSpec(args=['in_true', 'in_false', 'x', 'mask', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.While.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.While.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Switch.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.case ArgSpec(args=['self', 'condition'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.Switch.default ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.lod_rank_table ArgSpec(args=['x', 'level'], varargs=None, keywords=None, defaults=(0,))
paddle.fluid.layers.max_sequence_len ArgSpec(args=['rank_table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.lod_tensor_to_array ArgSpec(args=['x', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_to_lod_tensor ArgSpec(args=['x', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
paddle.fluid.layers.increment ArgSpec(args=['x', 'value', 'in_place'], varargs=None, keywords=None, defaults=(1.0, True))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.array_write ArgSpec(args=['x', 'i', 'array'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.create_array ArgSpec(args=['dtype'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.less_than ArgSpec(args=['x', 'y', 'force_cpu', 'cond'], varargs=None, keywords='ignored', defaults=(None, None))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.equal ArgSpec(args=['x', 'y', 'cond'], varargs=None, keywords='ignored', defaults=(None,))
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_read ArgSpec(args=['array', 'i'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.shrink_memory ArgSpec(args=['x', 'i', 'table'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.array_length ArgSpec(args=['array'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.IfElse.__init__ ArgSpec(args=['self', 'cond', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.false_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.output ArgSpec(args=['self'], varargs='outs', keywords=None, defaults=None)
paddle.fluid.layers.IfElse.parent_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.IfElse.true_block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.DynamicRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.layers.DynamicRNN.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
...
@@ -240,9 +224,6 @@ paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs',
...
@@ -240,9 +224,6 @@ paddle.fluid.layers.DynamicRNN.output ArgSpec(args=['self'], varargs='outputs',
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.DynamicRNN.update_memory ArgSpec(args=['self', 'ex_mem', 'new_mem'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.ConditionalBlock.__init__ ArgSpec(args=['self', 'inputs', 'is_scalar_condition', 'name'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.layers.ConditionalBlock.block ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.ConditionalBlock.complete ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.StaticRNN.__init__ ArgSpec(args=['self', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.StaticRNN.complete_op ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.complete_op ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
paddle.fluid.layers.StaticRNN.memory ArgSpec(args=['self', 'init', 'shape', 'batch_ref', 'init_value', 'init_batch_dim_idx', 'ref_batch_dim_idx'], varargs=None, keywords=None, defaults=(None, None, None, 0.0, 0, 1))
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
018e2f3a
...
@@ -45,19 +45,13 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
...
@@ -45,19 +45,13 @@ ExecutorPrepareContext::~ExecutorPrepareContext() {
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
Executor
::
Executor
(
const
platform
::
Place
&
place
)
:
place_
(
place
)
{}
void
Executor
::
Close
()
{
#ifdef PADDLE_WITH_DISTRIBUTE
#ifdef PADDLE_WITH_DISTRIBUTE
void
Executor
::
BeginPass
()
{
::
paddle
::
operators
::
distributed
::
RPCClient
::
GetInstance
<
::
paddle
::
operators
::
distributed
::
RPCClient
::
GetInstance
<
::
paddle
::
operators
::
distributed
::
GRPCClient
>
()
::
paddle
::
operators
::
distributed
::
GRPCClient
>
()
->
SendBeginPass
();
->
SendComplete
();
}
void
Executor
::
EndPass
()
{
::
paddle
::
operators
::
distributed
::
RPCClient
::
GetInstance
<
::
paddle
::
operators
::
distributed
::
GRPCClient
>
()
->
SendEndPass
();
}
#endif
#endif
}
void
InitializeVariable
(
Variable
*
var
,
proto
::
VarType
::
Type
var_type
)
{
void
InitializeVariable
(
Variable
*
var
,
proto
::
VarType
::
Type
var_type
)
{
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR
)
{
if
(
var_type
==
proto
::
VarType
::
LOD_TENSOR
)
{
...
...
paddle/fluid/framework/executor.h
浏览文件 @
018e2f3a
...
@@ -44,17 +44,11 @@ class Executor {
...
@@ -44,17 +44,11 @@ class Executor {
explicit
Executor
(
const
platform
::
Place
&
place
);
explicit
Executor
(
const
platform
::
Place
&
place
);
#ifdef PADDLE_WITH_DISTRIBUTE
/*
/*
* Sending signal to pserver to mark current pass started.
* Close this Executor.
* Calling this method will send complete messages to all pserver instances.
*/
*/
void
BeginPass
();
void
Close
();
/*
* Sending signal to pserver to mark current pass finished.
*/
void
EndPass
();
#endif
/* @Brief
/* @Brief
* Runtime evaluation of the given ProgramDesc under certain Scope
* Runtime evaluation of the given ProgramDesc under certain Scope
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
018e2f3a
...
@@ -137,6 +137,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -137,6 +137,7 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
executor_
->
RunPreparedContext
(
executor_
->
RunPreparedContext
(
ctx_
.
get
(),
sub_scope_
!=
nullptr
?
sub_scope_
:
scope_
.
get
(),
ctx_
.
get
(),
sub_scope_
!=
nullptr
?
sub_scope_
:
scope_
.
get
(),
&
feed_targets
,
&
fetch_targets
,
&
feed_targets
,
&
fetch_targets
,
false
,
/* don't create local scope each time*/
false
/* don't create variable eatch time */
);
false
/* don't create variable eatch time */
);
VLOG
(
4
)
<<
"Finish prepared context"
;
VLOG
(
4
)
<<
"Finish prepared context"
;
if
(
!
GetFetch
(
fetchs
,
output_data
))
{
if
(
!
GetFetch
(
fetchs
,
output_data
))
{
...
...
paddle/fluid/inference/tensorrt/convert/fc_op.cc
浏览文件 @
018e2f3a
...
@@ -32,11 +32,11 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
...
@@ -32,11 +32,11 @@ void Reorder2(nvinfer1::DimsHW shape, const T* idata, nvinfer1::DimsHW istrides,
for
(
int
h
=
0
;
h
<
shape
.
h
();
++
h
)
{
for
(
int
h
=
0
;
h
<
shape
.
h
();
++
h
)
{
for
(
int
w
=
0
;
w
<
shape
.
w
();
++
w
)
{
for
(
int
w
=
0
;
w
<
shape
.
w
();
++
w
)
{
odata
[
h
*
ostrides
.
h
()
+
w
*
ostrides
.
w
()]
=
odata
[
h
*
ostrides
.
h
()
+
w
*
ostrides
.
w
()]
=
idata
[
h
*
ostrides
.
h
()
+
w
*
o
strides
.
w
()];
idata
[
h
*
istrides
.
h
()
+
w
*
i
strides
.
w
()];
}
}
}
}
}
}
// indata c * k
// Reorder the data layout from CK to KC.
// Reorder the data layout from CK to KC.
void
ReorderCKtoKC
(
TensorRTEngine
::
Weight
&
iweights
,
void
ReorderCKtoKC
(
TensorRTEngine
::
Weight
&
iweights
,
TensorRTEngine
::
Weight
*
oweights
)
{
TensorRTEngine
::
Weight
*
oweights
)
{
...
@@ -79,9 +79,8 @@ class FcOpConverter : public OpConverter {
...
@@ -79,9 +79,8 @@ class FcOpConverter : public OpConverter {
framework
::
LoDTensor
tmp
;
framework
::
LoDTensor
tmp
;
tmp
.
Resize
(
Y_t
->
dims
());
tmp
.
Resize
(
Y_t
->
dims
());
memcpy
(
tmp
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
Y_t
->
data
<
float
>
(),
memcpy
(
tmp
.
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weight_data
,
Y_t
->
dims
()[
0
]
*
Y_t
->
dims
()[
1
]);
Y_t
->
dims
()[
0
]
*
Y_t
->
dims
()[
1
]
*
sizeof
(
float
));
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
TensorRTEngine
::
Weight
weight
{
nvinfer1
::
DataType
::
kFLOAT
,
static_cast
<
void
*>
(
weight_data
),
static_cast
<
void
*>
(
weight_data
),
Y_t
->
memory_size
()
/
sizeof
(
float
)};
Y_t
->
memory_size
()
/
sizeof
(
float
)};
...
@@ -93,7 +92,7 @@ class FcOpConverter : public OpConverter {
...
@@ -93,7 +92,7 @@ class FcOpConverter : public OpConverter {
// The data layout of TRT FC layer's weight is different from fluid's FC,
// The data layout of TRT FC layer's weight is different from fluid's FC,
// need to reorder the elements.
// need to reorder the elements.
ReorderCKtoKC
(
tmp_weight
,
&
weight
);
ReorderCKtoKC
(
weight
,
&
tmp_
weight
);
// Currently, the framework can only handle one fluid op -> one TRT layer,
// Currently, the framework can only handle one fluid op -> one TRT layer,
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
// but fc fuses `mul` and `bias` (2 fluid ops), so here is a trick, just
...
@@ -103,7 +102,7 @@ class FcOpConverter : public OpConverter {
...
@@ -103,7 +102,7 @@ class FcOpConverter : public OpConverter {
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
auto
*
layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
FullyConnected
,
*
const_cast
<
nvinfer1
::
ITensor
*>
(
X
),
*
const_cast
<
nvinfer1
::
ITensor
*>
(
X
),
n_output
,
weight
.
get
(),
bias
.
get
());
n_output
,
tmp_
weight
.
get
(),
bias
.
get
());
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
auto
output_name
=
op_desc
.
Output
(
"Out"
).
front
();
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
engine_
->
SetITensor
(
output_name
,
layer
->
getOutput
(
0
));
...
@@ -118,4 +117,3 @@ class FcOpConverter : public OpConverter {
...
@@ -118,4 +117,3 @@ class FcOpConverter : public OpConverter {
}
// namespace paddle
}
// namespace paddle
REGISTER_TRT_OP_CONVERTER
(
fc
,
FcOpConverter
);
REGISTER_TRT_OP_CONVERTER
(
fc
,
FcOpConverter
);
USE_OP
(
mul
);
paddle/fluid/inference/tensorrt/convert/test_activation_op.cc
浏览文件 @
018e2f3a
...
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
...
@@ -37,7 +37,7 @@ TEST(ReluOpConverter, main) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/test_fc_op.cc
浏览文件 @
018e2f3a
...
@@ -23,11 +23,11 @@ namespace tensorrt {
...
@@ -23,11 +23,11 @@ namespace tensorrt {
TEST
(
fc_op
,
test
)
{
TEST
(
fc_op
,
test
)
{
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul-Y"
});
std
::
unordered_set
<
std
::
string
>
parameters
({
"mul-Y"
});
framework
::
Scope
scope
;
framework
::
Scope
scope
;
TRTConvertValidation
validator
(
2
0
,
parameters
,
scope
,
1000
);
TRTConvertValidation
validator
(
1
0
,
parameters
,
scope
,
1000
);
validator
.
DeclInputVar
(
"mul-X"
,
nvinfer1
::
Dims4
(
1
,
10
,
1
,
1
));
validator
.
Decl
InputVar
(
"mul-X"
,
nvinfer1
::
Dims4
(
8
,
3
,
1
,
1
));
validator
.
Decl
ParamVar
(
"mul-Y"
,
nvinfer1
::
Dims2
(
10
,
2
));
validator
.
DeclParamVar
(
"mul-Y"
,
nvinfer1
::
Dims2
(
3
,
2
));
// validator.DeclParamVar("mul-Y", nvinfer1::Dims2(8
, 2));
validator
.
DeclOutputVar
(
"mul-Out"
,
nvinfer1
::
Dims2
(
8
,
2
));
validator
.
DeclOutputVar
(
"mul-Out"
,
nvinfer1
::
Dims2
(
1
,
2
));
// Prepare Op description
// Prepare Op description
framework
::
OpDesc
desc
;
framework
::
OpDesc
desc
;
...
@@ -38,9 +38,10 @@ TEST(fc_op, test) {
...
@@ -38,9 +38,10 @@ TEST(fc_op, test) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
USE_OP
(
mul
);
paddle/fluid/inference/tensorrt/convert/test_mul_op.cc
浏览文件 @
018e2f3a
...
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
...
@@ -39,7 +39,7 @@ TEST(MulOpConverter, main) {
validator
.
SetOp
(
*
desc
.
Proto
());
validator
.
SetOp
(
*
desc
.
Proto
());
LOG
(
INFO
)
<<
"execute"
;
LOG
(
INFO
)
<<
"execute"
;
validator
.
Execute
(
1
0
);
validator
.
Execute
(
1
);
}
}
}
// namespace tensorrt
}
// namespace tensorrt
...
...
paddle/fluid/inference/tensorrt/convert/ut_helper.h
浏览文件 @
018e2f3a
...
@@ -39,7 +39,7 @@ namespace tensorrt {
...
@@ -39,7 +39,7 @@ namespace tensorrt {
float
random
(
float
low
,
float
high
)
{
float
random
(
float
low
,
float
high
)
{
static
std
::
random_device
rd
;
static
std
::
random_device
rd
;
static
std
::
mt19937
mt
(
rd
());
static
std
::
mt19937
mt
(
rd
());
std
::
uniform_real_distribution
<
double
>
dist
(
1.0
,
10.0
);
std
::
uniform_real_distribution
<
double
>
dist
(
low
,
high
);
return
dist
(
mt
);
return
dist
(
mt
);
}
}
...
@@ -49,6 +49,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
...
@@ -49,6 +49,7 @@ void RandomizeTensor(framework::LoDTensor* tensor, const platform::Place& place,
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
size_t
num_elements
=
analysis
::
AccuDims
(
dims
,
dims
.
size
());
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
PADDLE_ENFORCE_GT
(
num_elements
,
0
);
auto
*
data
=
tensor
->
mutable_data
<
float
>
(
place
);
auto
*
data
=
tensor
->
mutable_data
<
float
>
(
place
);
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
num_elements
;
i
++
)
{
*
(
data
+
i
)
=
random
(
0.
,
1.
);
*
(
data
+
i
)
=
random
(
0.
,
1.
);
}
}
...
@@ -68,7 +69,7 @@ class TRTConvertValidation {
...
@@ -68,7 +69,7 @@ class TRTConvertValidation {
int
workspace_size
=
1
<<
10
)
int
workspace_size
=
1
<<
10
)
:
parameters_
(
parameters
),
scope_
(
scope
)
{
:
parameters_
(
parameters
),
scope_
(
scope
)
{
// create engine.
// create engine.
engine_
.
reset
(
new
TensorRTEngine
(
10
,
1
<<
10
,
&
stream_
));
engine_
.
reset
(
new
TensorRTEngine
(
batch_size
,
workspace_size
,
&
stream_
));
engine_
->
InitNetwork
();
engine_
->
InitNetwork
();
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
PADDLE_ENFORCE_EQ
(
cudaStreamCreate
(
&
stream_
),
0
);
...
@@ -138,12 +139,11 @@ class TRTConvertValidation {
...
@@ -138,12 +139,11 @@ class TRTConvertValidation {
cudaStreamSynchronize
(
*
engine_
->
stream
());
cudaStreamSynchronize
(
*
engine_
->
stream
());
ASSERT_FALSE
(
op_desc_
->
OutputArgumentNames
().
empty
());
ASSERT_FALSE
(
op_desc_
->
OutputArgumentNames
().
empty
());
const
size_t
output_space_size
=
200
;
const
size_t
output_space_size
=
200
0
;
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
for
(
const
auto
&
output
:
op_desc_
->
OutputArgumentNames
())
{
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
fluid_out
;
std
::
vector
<
float
>
trt_out
(
output_space_size
);
std
::
vector
<
float
>
trt_out
(
output_space_size
);
engine_
->
GetOutputInCPU
(
output
,
&
trt_out
[
0
],
engine_
->
GetOutputInCPU
(
output
,
&
trt_out
[
0
],
output_space_size
);
output_space_size
*
sizeof
(
float
));
cudaStreamSynchronize
(
*
engine_
->
stream
());
cudaStreamSynchronize
(
*
engine_
->
stream
());
auto
*
var
=
scope_
.
FindVar
(
output
);
auto
*
var
=
scope_
.
FindVar
(
output
);
...
...
paddle/fluid/inference/tensorrt/engine.cc
浏览文件 @
018e2f3a
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use
you may not use
this file except in compliance with the License.
this file except in compliance with the License.
You may obtain a copy of the License at
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
http://www.apache.org/licenses/LICENSE-2.0
...
@@ -26,6 +26,8 @@ namespace paddle {
...
@@ -26,6 +26,8 @@ namespace paddle {
namespace
inference
{
namespace
inference
{
namespace
tensorrt
{
namespace
tensorrt
{
int
TensorRTEngine
::
runtime_batch_
=
1
;
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
void
TensorRTEngine
::
Build
(
const
DescType
&
paddle_model
)
{
PADDLE_ENFORCE
(
false
,
"not implemented"
);
PADDLE_ENFORCE
(
false
,
"not implemented"
);
}
}
...
@@ -42,6 +44,7 @@ void TensorRTEngine::Execute(int batch_size) {
...
@@ -42,6 +44,7 @@ void TensorRTEngine::Execute(int batch_size) {
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
PADDLE_ENFORCE_NOT_NULL
(
stream_
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
infer_context_
->
enqueue
(
batch_size
,
buffers
.
data
(),
*
stream_
,
nullptr
);
cudaStreamSynchronize
(
*
stream_
);
cudaStreamSynchronize
(
*
stream_
);
SetRuntimeBatch
(
batch_size
);
}
}
TensorRTEngine
::~
TensorRTEngine
()
{
TensorRTEngine
::~
TensorRTEngine
()
{
...
@@ -80,17 +83,17 @@ void TensorRTEngine::FreezeNetwork() {
...
@@ -80,17 +83,17 @@ void TensorRTEngine::FreezeNetwork() {
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
item
.
second
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
max_batch_
;
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
PADDLE_ENFORCE_GT
(
item
.
second
,
0
);
}
}
auto
&
buf
=
buffer
(
item
.
first
);
auto
&
buf
=
buffer
(
item
.
first
);
buf
.
max_size
=
item
.
second
*
max_batch_
;
buf
.
max_size
=
item
.
second
*
max_batch_
;
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
CHECK
(
buf
.
buffer
==
nullptr
);
// buffer should be allocated only once.
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
buf
.
max_size
));
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
PADDLE_ENFORCE_EQ
(
0
,
cudaMalloc
(
&
buf
.
buffer
,
item
.
second
*
max_batch_
));
// buf.size will changed in the runtime.
buf
.
size
=
0
;
buf
.
size
=
0
;
PADDLE_ENFORCE_LE
(
buf
.
max_size
,
1
<<
30
);
// 10G
buf
.
device
=
DeviceType
::
GPU
;
buf
.
device
=
DeviceType
::
GPU
;
}
}
}
}
...
@@ -105,7 +108,7 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
...
@@ -105,7 +108,7 @@ nvinfer1::ITensor *TensorRTEngine::DeclareInput(const std::string &name,
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
auto
*
input
=
infer_network_
->
addInput
(
name
.
c_str
(),
dtype
,
dims
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
PADDLE_ENFORCE
(
input
,
"infer network add input %s failed"
,
name
);
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
buffer_sizes_
[
name
]
=
kDataTypeSize
[
static_cast
<
int
>
(
dtype
)]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
max_batch_
;
PADDLE_ENFORCE
(
input
->
isNetworkInput
());
PADDLE_ENFORCE
(
input
->
isNetworkInput
());
TensorRTEngine
::
SetITensor
(
name
,
input
);
TensorRTEngine
::
SetITensor
(
name
,
input
);
return
input
;
return
input
;
...
@@ -149,35 +152,42 @@ void *TensorRTEngine::GetOutputInGPU(const std::string &name) {
...
@@ -149,35 +152,42 @@ void *TensorRTEngine::GetOutputInGPU(const std::string &name) {
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInGPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
size_t
max_size
)
{
// determine data size
// determine data size
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
nvinfer1
::
Dims
dims
=
output
->
getDimensions
();
auto
dim_size
=
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
size_t
dst_size
=
dim_size
*
runtime_batch_
*
kDataTypeSize
[
static_cast
<
int
>
(
output
->
getType
())];
auto
it
=
buffer_sizes_
.
find
(
name
);
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_GE
(
max_size
,
it
->
second
);
PADDLE_ENFORCE_LE
(
dst_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
dst_size
);
auto
&
buf
=
buffer
(
name
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
it
->
second
,
PADDLE_ENFORCE_EQ
(
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
dst_size
,
cudaMemcpyDeviceToDevice
,
*
stream_
),
cudaMemcpyDeviceToDevice
,
*
stream_
),
0
);
0
);
}
}
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
void
TensorRTEngine
::
GetOutputInCPU
(
const
std
::
string
&
name
,
void
*
dst
,
size_t
max_size
)
{
size_t
max_size
)
{
VLOG
(
4
)
<<
"get output in cpu"
;
auto
&
buf
=
buffer
(
name
);
// Update needed buffer size.
auto
slot_offset
=
infer_engine_
->
getBindingIndex
(
name
.
c_str
());
auto
dims
=
infer_engine_
->
getBindingDimensions
(
slot_offset
);
buf
.
size
=
kDataTypeSize
[
static_cast
<
int
>
(
infer_engine_
->
getBindingDataType
(
slot_offset
))]
*
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
PADDLE_ENFORCE_LE
(
buf
.
size
,
buf
.
max_size
);
// determine data size
// determine data size
auto
*
output
=
TensorRTEngine
::
GetITensor
(
name
);
nvinfer1
::
Dims
dims
=
output
->
getDimensions
();
auto
dim_size
=
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
size_t
dst_size
=
dim_size
*
runtime_batch_
*
kDataTypeSize
[
static_cast
<
int
>
(
output
->
getType
())];
auto
it
=
buffer_sizes_
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
buffer_sizes_
.
end
());
PADDLE_ENFORCE_GT
(
it
->
second
,
0
);
PADDLE_ENFORCE_LE
(
dst_size
,
it
->
second
);
PADDLE_ENFORCE_GE
(
max_size
,
dst_size
);
auto
&
buf
=
buffer
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
PADDLE_ENFORCE_NOT_NULL
(
buf
.
buffer
,
"buffer should be allocated before"
);
// DEBUG
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpyAsync
(
dst
,
buf
.
buffer
,
dst_size
,
memset
(
dst
,
0
,
buf
.
size
);
cudaMemcpyDeviceToHost
,
*
stream_
));
PADDLE_ENFORCE_EQ
(
0
,
cudaMemcpy
(
dst
,
buf
.
buffer
,
buf
.
size
,
cudaMemcpyDeviceToHost
));
}
}
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
Buffer
&
TensorRTEngine
::
buffer
(
const
std
::
string
&
name
)
{
...
@@ -225,6 +235,12 @@ nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
...
@@ -225,6 +235,12 @@ nvinfer1::ITensor *TensorRTEngine::GetITensor(const std::string &name) {
return
itensor_map_
[
name
];
return
itensor_map_
[
name
];
}
}
void
TensorRTEngine
::
SetRuntimeBatch
(
size_t
batch_size
)
{
runtime_batch_
=
batch_size
;
}
int
TensorRTEngine
::
GetRuntimeBatch
()
{
return
runtime_batch_
;
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tensorrt/engine.h
浏览文件 @
018e2f3a
...
@@ -117,10 +117,14 @@ class TensorRTEngine : public EngineBase {
...
@@ -117,10 +117,14 @@ class TensorRTEngine : public EngineBase {
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
ICudaEngine
*
engine
()
{
return
infer_engine_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
nvinfer1
::
INetworkDefinition
*
network
()
{
return
infer_network_
.
get
();
}
void
SetRuntimeBatch
(
size_t
batch_size
);
int
GetRuntimeBatch
();
private:
private:
// the max batch size
// the max batch size
int
max_batch_
;
int
max_batch_
;
// the runtime batch size
static
int
runtime_batch_
;
// the max memory size the engine uses
// the max memory size the engine uses
int
max_workspace_
;
int
max_workspace_
;
...
...
paddle/fluid/inference/tensorrt/test_engine.cc
浏览文件 @
018e2f3a
...
@@ -28,7 +28,7 @@ class TensorRTEngineTest : public ::testing::Test {
...
@@ -28,7 +28,7 @@ class TensorRTEngineTest : public ::testing::Test {
protected:
protected:
void
SetUp
()
override
{
void
SetUp
()
override
{
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream_
));
ASSERT_EQ
(
0
,
cudaStreamCreate
(
&
stream_
));
engine_
=
new
TensorRTEngine
(
1
,
1
<<
10
,
&
stream_
);
engine_
=
new
TensorRTEngine
(
1
0
,
1
<<
10
,
&
stream_
);
engine_
->
InitNetwork
();
engine_
->
InitNetwork
();
}
}
...
@@ -71,7 +71,7 @@ TEST_F(TensorRTEngineTest, add_layer) {
...
@@ -71,7 +71,7 @@ TEST_F(TensorRTEngineTest, add_layer) {
LOG
(
INFO
)
<<
"to get output"
;
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
;
float
y_cpu
;
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
sizeof
(
float
));
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
,
1
*
sizeof
(
float
));
LOG
(
INFO
)
<<
"to checkout output"
;
LOG
(
INFO
)
<<
"to checkout output"
;
ASSERT_EQ
(
y_cpu
,
x_v
*
2
+
3
);
ASSERT_EQ
(
y_cpu
,
x_v
*
2
+
3
);
...
@@ -103,15 +103,49 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
...
@@ -103,15 +103,49 @@ TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
LOG
(
INFO
)
<<
"to get output"
;
LOG
(
INFO
)
<<
"to get output"
;
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
float
y_cpu
[
2
]
=
{
-
1.
,
-
1.
};
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
auto
dims
=
engine_
->
GetITensor
(
"y"
)
->
getDimensions
();
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
nbDims
,
3
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
0
],
2
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
ASSERT_EQ
(
dims
.
d
[
1
],
1
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
sizeof
(
float
)
*
2
);
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
2
*
sizeof
(
float
)
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
0
],
4.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
ASSERT_EQ
(
y_cpu
[
1
],
14.5
);
}
}
TEST_F
(
TensorRTEngineTest
,
test_conv2d_temp
)
{
// Weight in CPU memory.
float
raw_weight
[
9
]
=
{
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
};
float
raw_bias
[
1
]
=
{
0
};
TensorRTEngine
::
Weight
weight
(
nvinfer1
::
DataType
::
kFLOAT
,
raw_weight
,
9
);
TensorRTEngine
::
Weight
bias
(
nvinfer1
::
DataType
::
kFLOAT
,
raw_bias
,
1
);
auto
*
x
=
engine_
->
DeclareInput
(
"x"
,
nvinfer1
::
DataType
::
kFLOAT
,
nvinfer1
::
Dims3
{
1
,
3
,
3
});
auto
*
conv_layer
=
TRT_ENGINE_ADD_LAYER
(
engine_
,
Convolution
,
*
x
,
1
,
nvinfer1
::
DimsHW
{
3
,
3
},
weight
.
get
(),
bias
.
get
());
PADDLE_ENFORCE
(
conv_layer
!=
nullptr
);
conv_layer
->
setStride
(
nvinfer1
::
DimsHW
{
1
,
1
});
conv_layer
->
setPadding
(
nvinfer1
::
DimsHW
{
1
,
1
});
engine_
->
DeclareOutput
(
conv_layer
,
0
,
"y"
);
engine_
->
FreezeNetwork
();
ASSERT_EQ
(
engine_
->
engine
()
->
getNbBindings
(),
2
);
float
x_v
[
18
]
=
{
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
};
engine_
->
SetInputFromCPU
(
"x"
,
reinterpret_cast
<
void
*>
(
&
x_v
),
18
*
sizeof
(
float
));
engine_
->
Execute
(
2
);
LOG
(
INFO
)
<<
"to get output"
;
float
*
y_cpu
=
new
float
[
18
];
engine_
->
GetOutputInCPU
(
"y"
,
&
y_cpu
[
0
],
18
*
sizeof
(
float
));
ASSERT_EQ
(
y_cpu
[
0
],
4.0
);
ASSERT_EQ
(
y_cpu
[
1
],
6.0
);
}
}
// namespace tensorrt
}
// namespace tensorrt
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/tests/test_helper.h
浏览文件 @
018e2f3a
...
@@ -210,13 +210,14 @@ void TestInference(const std::string& dirname,
...
@@ -210,13 +210,14 @@ void TestInference(const std::string& dirname,
// Ignore the profiling results of the first run
// Ignore the profiling results of the first run
std
::
unique_ptr
<
paddle
::
framework
::
ExecutorPrepareContext
>
ctx
;
std
::
unique_ptr
<
paddle
::
framework
::
ExecutorPrepareContext
>
ctx
;
bool
CreateLocalScope
=
CreateVars
;
if
(
PrepareContext
)
{
if
(
PrepareContext
)
{
ctx
=
executor
.
Prepare
(
*
inference_program
,
0
);
ctx
=
executor
.
Prepare
(
*
inference_program
,
0
);
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
&
feed_targets
,
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
&
feed_targets
,
&
fetch_targets
,
tru
e
,
CreateVars
);
&
fetch_targets
,
CreateLocalScop
e
,
CreateVars
);
}
else
{
}
else
{
executor
.
Run
(
*
inference_program
,
scope
,
&
feed_targets
,
&
fetch_targets
,
executor
.
Run
(
*
inference_program
,
scope
,
&
feed_targets
,
&
fetch_targets
,
tru
e
,
CreateVars
);
CreateLocalScop
e
,
CreateVars
);
}
}
// Enable the profiler
// Enable the profiler
...
@@ -232,10 +233,11 @@ void TestInference(const std::string& dirname,
...
@@ -232,10 +233,11 @@ void TestInference(const std::string& dirname,
// Note: if you change the inference_program, you need to call
// Note: if you change the inference_program, you need to call
// executor.Prepare() again to get a new ExecutorPrepareContext.
// executor.Prepare() again to get a new ExecutorPrepareContext.
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
&
feed_targets
,
executor
.
RunPreparedContext
(
ctx
.
get
(),
scope
,
&
feed_targets
,
&
fetch_targets
,
CreateVars
);
&
fetch_targets
,
CreateLocalScope
,
CreateVars
);
}
else
{
}
else
{
executor
.
Run
(
*
inference_program
,
scope
,
&
feed_targets
,
&
fetch_targets
,
executor
.
Run
(
*
inference_program
,
scope
,
&
feed_targets
,
&
fetch_targets
,
CreateVars
);
Create
LocalScope
,
Create
Vars
);
}
}
}
}
...
...
paddle/fluid/operators/distributed/CMakeLists.txt
浏览文件 @
018e2f3a
...
@@ -18,7 +18,7 @@ if(WITH_GRPC)
...
@@ -18,7 +18,7 @@ if(WITH_GRPC)
set_source_files_properties
(
grpc_serde_test.cc rpc_server_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
grpc_serde_test.cc rpc_server_test.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
cc_test
(
grpc_serde_test SRCS grpc_serde_test.cc
cc_test
(
grpc_serde_test SRCS grpc_serde_test.cc
DEPS grpc++_unsecure grpc_unsecure gpr cares zlib protobuf sendrecvop_grpc scope profiler math_function SERIAL
)
DEPS grpc++_unsecure grpc_unsecure gpr cares zlib protobuf sendrecvop_grpc scope profiler math_function SERIAL
)
cc_test
(
g
rpc_server_test SRCS rpc_server_test.cc
cc_test
(
rpc_server_test SRCS rpc_server_test.cc
DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor proto_desc lookup_table_op SERIAL
)
DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf executor proto_desc lookup_table_op SERIAL
)
return
()
return
()
endif
()
endif
()
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
018e2f3a
...
@@ -36,20 +36,16 @@ void GRPCClient::InitEventLoop() {
...
@@ -36,20 +36,16 @@ void GRPCClient::InitEventLoop() {
client_thread_
.
reset
(
new
std
::
thread
(
std
::
bind
(
&
GRPCClient
::
Proceed
,
this
)));
client_thread_
.
reset
(
new
std
::
thread
(
std
::
bind
(
&
GRPCClient
::
Proceed
,
this
)));
}
}
void
GRPCClient
::
SendBeginPass
()
{
void
GRPCClient
::
SendComplete
()
{
for
(
auto
&
it
:
channels_
)
{
std
::
unique_lock
<
std
::
mutex
>
lk
(
completed_mutex_
);
VLOG
(
3
)
<<
"send begin pass to: "
<<
it
.
first
;
if
(
!
completed_
)
{
this
->
AsyncSendBeginPass
(
it
.
first
);
for
(
auto
&
it
:
channels_
)
{
}
VLOG
(
3
)
<<
"send complete message to "
<<
it
.
first
;
this
->
Wait
();
this
->
AsyncSendComplete
(
it
.
first
);
}
}
PADDLE_ENFORCE
(
this
->
Wait
(),
"internal grpc error"
);
void
GRPCClient
::
SendEndPass
()
{
completed_
=
true
;
for
(
auto
&
it
:
channels_
)
{
VLOG
(
3
)
<<
"send end pass to "
<<
it
.
first
;
this
->
AsyncSendEndPass
(
it
.
first
);
}
}
this
->
Wait
();
}
}
GRPCClient
::~
GRPCClient
()
{
GRPCClient
::~
GRPCClient
()
{
...
@@ -239,32 +235,19 @@ void GRPCClient::AsyncSendFetchBarrier(const std::string& ep,
...
@@ -239,32 +235,19 @@ void GRPCClient::AsyncSendFetchBarrier(const std::string& ep,
req_count_
++
;
req_count_
++
;
}
}
void
GRPCClient
::
AsyncSend
BeginPass
(
const
std
::
string
&
ep
,
int64_t
time_out
)
{
void
GRPCClient
::
AsyncSend
Complete
(
const
std
::
string
&
ep
,
int64_t
time_out
)
{
const
auto
ch
=
GetChannel
(
ep
);
const
auto
ch
=
GetChannel
(
ep
);
BatchBarrierProcessor
*
s
=
new
BatchBarrierProcessor
(
ch
);
BatchBarrierProcessor
*
s
=
new
BatchBarrierProcessor
(
ch
);
s
->
Prepare
(
time_out
);
s
->
Prepare
(
time_out
);
sendrecv
::
VariableMessage
req
;
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
BEGIN_PASS
_MESSAGE
);
req
.
set_varname
(
COMPLETE
_MESSAGE
);
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
auto
rpc
=
s
->
stub_
->
AsyncSendVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
req_count_
++
;
req_count_
++
;
}
}
void
GRPCClient
::
AsyncSendEndPass
(
const
std
::
string
&
ep
,
int64_t
time_out
)
{
const
auto
ch
=
GetChannel
(
ep
);
FetchBarrierProcessor
*
s
=
new
FetchBarrierProcessor
(
ch
);
s
->
Prepare
(
time_out
);
sendrecv
::
VariableMessage
req
;
req
.
set_varname
(
END_PASS_MESSAGE
);
auto
rpc
=
s
->
stub_
->
AsyncGetVariable
(
s
->
context_
.
get
(),
req
,
&
cq_
);
rpc
->
Finish
(
&
s
->
reply_
,
&
s
->
status_
,
reinterpret_cast
<
void
*>
(
s
));
req_count_
++
;
}
void
GRPCClient
::
AsyncCheckpointNotify
(
const
std
::
string
&
ep
,
void
GRPCClient
::
AsyncCheckpointNotify
(
const
std
::
string
&
ep
,
const
std
::
string
&
dir
,
const
std
::
string
&
dir
,
int64_t
time_out
)
{
int64_t
time_out
)
{
...
...
paddle/fluid/operators/distributed/grpc_client.h
浏览文件 @
018e2f3a
...
@@ -174,7 +174,7 @@ class CheckpointNotifyProcessor : public BaseProcessor {
...
@@ -174,7 +174,7 @@ class CheckpointNotifyProcessor : public BaseProcessor {
class
GRPCClient
:
public
RPCClient
{
class
GRPCClient
:
public
RPCClient
{
public:
public:
GRPCClient
()
:
ok_
(
true
)
{}
GRPCClient
()
:
ok_
(
true
)
,
completed_
(
false
)
{}
virtual
~
GRPCClient
();
virtual
~
GRPCClient
();
bool
AsyncSendVar
(
const
std
::
string
&
ep
,
const
platform
::
DeviceContext
&
ctx
,
bool
AsyncSendVar
(
const
std
::
string
&
ep
,
const
platform
::
DeviceContext
&
ctx
,
...
@@ -201,17 +201,12 @@ class GRPCClient : public RPCClient {
...
@@ -201,17 +201,12 @@ class GRPCClient : public RPCClient {
void
AsyncCheckpointNotify
(
const
std
::
string
&
ep
,
const
std
::
string
&
dir
,
void
AsyncCheckpointNotify
(
const
std
::
string
&
ep
,
const
std
::
string
&
dir
,
int64_t
time_out
=
FLAGS_rpc_deadline
)
override
;
int64_t
time_out
=
FLAGS_rpc_deadline
)
override
;
void
AsyncSendBeginPass
(
const
std
::
string
&
ep
,
void
AsyncSendComplete
(
const
std
::
string
&
ep
,
int64_t
time_out
=
FLAGS_rpc_deadline
)
override
;
int64_t
time_out
=
FLAGS_rpc_deadline
)
override
;
void
AsyncSendEndPass
(
const
std
::
string
&
ep
,
int64_t
time_out
=
FLAGS_rpc_deadline
)
override
;
bool
Wait
()
override
;
bool
Wait
()
override
;
void
SendBeginPass
()
override
;
void
SendComplete
()
override
;
void
SendEndPass
()
override
;
protected:
protected:
void
InitImpl
()
override
;
void
InitImpl
()
override
;
...
@@ -238,6 +233,10 @@ class GRPCClient : public RPCClient {
...
@@ -238,6 +233,10 @@ class GRPCClient : public RPCClient {
// mutex for GetChannel thread safety
// mutex for GetChannel thread safety
std
::
mutex
chan_mutex_
;
std
::
mutex
chan_mutex_
;
DISABLE_COPY_AND_ASSIGN
(
GRPCClient
);
DISABLE_COPY_AND_ASSIGN
(
GRPCClient
);
// mutex for sending complete message only once
std
::
mutex
completed_mutex_
;
bool
completed_
;
};
};
}
// namespace distributed
}
// namespace distributed
...
...
paddle/fluid/operators/distributed/request_handler.h
浏览文件 @
018e2f3a
...
@@ -43,8 +43,6 @@ constexpr char kRequestPassBarrier[] = "RequestPassBarrier";
...
@@ -43,8 +43,6 @@ constexpr char kRequestPassBarrier[] = "RequestPassBarrier";
#define BATCH_BARRIER_MESSAGE "BATCH_BARRIER@RECV"
#define BATCH_BARRIER_MESSAGE "BATCH_BARRIER@RECV"
#define FETCH_BARRIER_MESSAGE "FETCH_BARRIER@RECV"
#define FETCH_BARRIER_MESSAGE "FETCH_BARRIER@RECV"
#define COMPLETE_MESSAGE "COMPLETE@RECV"
#define COMPLETE_MESSAGE "COMPLETE@RECV"
#define BEGIN_PASS_MESSAGE "BEGIN_PASS@RECV"
#define END_PASS_MESSAGE "END_PASS@RECV"
#define CHECKPOINT_SAVE_MESSAGE "SAVE@CHECKPOINTNOTIFY"
#define CHECKPOINT_SAVE_MESSAGE "SAVE@CHECKPOINTNOTIFY"
#define CHECKPOINT_LOAD_MESSAGE "LOAD@CHECKPOINTNOTIFY"
#define CHECKPOINT_LOAD_MESSAGE "LOAD@CHECKPOINTNOTIFY"
...
...
paddle/fluid/operators/distributed/request_handler_impl.cc
浏览文件 @
018e2f3a
...
@@ -55,10 +55,9 @@ bool RequestSendHandler::Handle(const std::string& varname,
...
@@ -55,10 +55,9 @@ bool RequestSendHandler::Handle(const std::string& varname,
if
(
varname
==
BATCH_BARRIER_MESSAGE
)
{
if
(
varname
==
BATCH_BARRIER_MESSAGE
)
{
VLOG
(
3
)
<<
"sync: recv BATCH_BARRIER_MESSAGE"
;
VLOG
(
3
)
<<
"sync: recv BATCH_BARRIER_MESSAGE"
;
rpc_server_
->
IncreaseBatchBarrier
(
kRequestSend
);
rpc_server_
->
IncreaseBatchBarrier
(
kRequestSend
);
}
else
if
(
varname
==
BEGIN_PASS_MESSAGE
)
{
}
else
if
(
varname
==
COMPLETE_MESSAGE
)
{
VLOG
(
3
)
<<
"sync: recv begin pass message"
;
VLOG
(
3
)
<<
"sync: recv complete message"
;
rpc_server_
->
WaitCond
(
kRequestSend
);
rpc_server_
->
Complete
();
rpc_server_
->
BeginPass
();
}
else
{
}
else
{
VLOG
(
3
)
<<
"sync: received var_name: "
<<
varname
;
VLOG
(
3
)
<<
"sync: received var_name: "
<<
varname
;
rpc_server_
->
WaitCond
(
kRequestSend
);
rpc_server_
->
WaitCond
(
kRequestSend
);
...
@@ -94,14 +93,12 @@ bool RequestGetHandler::Handle(const std::string& varname,
...
@@ -94,14 +93,12 @@ bool RequestGetHandler::Handle(const std::string& varname,
if
(
varname
==
FETCH_BARRIER_MESSAGE
)
{
if
(
varname
==
FETCH_BARRIER_MESSAGE
)
{
VLOG
(
3
)
<<
"sync: recv fetch barrier message"
;
VLOG
(
3
)
<<
"sync: recv fetch barrier message"
;
rpc_server_
->
IncreaseBatchBarrier
(
kRequestGet
);
rpc_server_
->
IncreaseBatchBarrier
(
kRequestGet
);
}
else
if
(
varname
==
END_PASS_MESSAGE
)
{
rpc_server_
->
EndPass
();
}
else
{
}
else
{
rpc_server_
->
WaitCond
(
kRequestGet
);
rpc_server_
->
WaitCond
(
kRequestGet
);
*
outvar
=
scope_
->
FindVar
(
varname
);
*
outvar
=
scope_
->
FindVar
(
varname
);
}
}
}
else
{
}
else
{
if
(
varname
!=
FETCH_BARRIER_MESSAGE
&&
varname
!=
END_PASS
_MESSAGE
)
{
if
(
varname
!=
FETCH_BARRIER_MESSAGE
&&
varname
!=
COMPLETE
_MESSAGE
)
{
*
outvar
=
scope_
->
FindVar
(
varname
);
*
outvar
=
scope_
->
FindVar
(
varname
);
}
}
}
}
...
...
paddle/fluid/operators/distributed/rpc_client.h
浏览文件 @
018e2f3a
...
@@ -60,17 +60,13 @@ class RPCClient {
...
@@ -60,17 +60,13 @@ class RPCClient {
const
std
::
string
&
dir
,
const
std
::
string
&
dir
,
int64_t
time_out
=
FLAGS_rpc_deadline
)
=
0
;
int64_t
time_out
=
FLAGS_rpc_deadline
)
=
0
;
virtual
void
AsyncSend
BeginPass
(
const
std
::
string
&
ep
,
virtual
void
AsyncSend
Complete
(
const
std
::
string
&
ep
,
int64_t
time_out
=
FLAGS_rpc_deadline
)
=
0
;
int64_t
time_out
=
FLAGS_rpc_deadline
)
=
0
;
virtual
void
AsyncSendEndPass
(
const
std
::
string
&
ep
,
// Complete tells all the pserver instances that finishe the training,
int64_t
time_out
=
FLAGS_rpc_deadline
)
=
0
;
// the pserver can reduce it's barrier count, and continue to train
// BeginePass/EndPass tells all the pserver that start/end a pass, so that
// the pserver can increase/reduce it's barrier count, and continue to train
// with other trainers.
// with other trainers.
virtual
void
SendBeginPass
()
=
0
;
virtual
void
SendComplete
()
=
0
;
virtual
void
SendEndPass
()
=
0
;
virtual
bool
Wait
()
=
0
;
virtual
bool
Wait
()
=
0
;
...
...
paddle/fluid/operators/distributed/rpc_server.cc
浏览文件 @
018e2f3a
...
@@ -64,18 +64,7 @@ void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
...
@@ -64,18 +64,7 @@ void RPCServer::IncreaseBatchBarrier(const std::string rpc_name) {
}
}
}
}
void
RPCServer
::
BeginPass
()
{
void
RPCServer
::
Complete
()
{
VLOG
(
4
)
<<
"RPCServer begin increase pass barrier"
;
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
client_num_
++
;
VLOG
(
4
)
<<
"increase client_num to: "
<<
client_num_
;
}
barrier_cond_
.
notify_all
();
}
void
RPCServer
::
EndPass
()
{
VLOG
(
4
)
<<
"RPCServer begin increase pass barrier"
;
{
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
client_num_
--
;
client_num_
--
;
...
@@ -87,6 +76,11 @@ void RPCServer::EndPass() {
...
@@ -87,6 +76,11 @@ void RPCServer::EndPass() {
barrier_cond_
.
notify_all
();
barrier_cond_
.
notify_all
();
}
}
int
RPCServer
::
GetClientNum
()
{
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
return
client_num_
;
}
void
RPCServer
::
ResetBarrierCounter
()
{
void
RPCServer
::
ResetBarrierCounter
()
{
VLOG
(
3
)
<<
"RPCServer ResetBarrierCounter "
;
VLOG
(
3
)
<<
"RPCServer ResetBarrierCounter "
;
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
std
::
unique_lock
<
std
::
mutex
>
lock
(
mutex_
);
...
...
paddle/fluid/operators/distributed/rpc_server.h
浏览文件 @
018e2f3a
...
@@ -44,7 +44,7 @@ class RPCServer {
...
@@ -44,7 +44,7 @@ class RPCServer {
int
GetSelectedPort
()
const
{
return
selected_port_
;
}
int
GetSelectedPort
()
const
{
return
selected_port_
;
}
int
GetClientNum
()
const
;
int
GetClientNum
();
void
SavePort
()
const
;
void
SavePort
()
const
;
...
@@ -64,8 +64,7 @@ class RPCServer {
...
@@ -64,8 +64,7 @@ class RPCServer {
void
WaitCond
(
const
std
::
string
&
rpc_name
);
void
WaitCond
(
const
std
::
string
&
rpc_name
);
void
IncreaseBatchBarrier
(
const
std
::
string
rpc_name
);
void
IncreaseBatchBarrier
(
const
std
::
string
rpc_name
);
void
BeginPass
();
void
Complete
();
void
EndPass
();
void
ResetBarrierCounter
();
void
ResetBarrierCounter
();
...
...
paddle/fluid/operators/distributed/rpc_server_test.cc
浏览文件 @
018e2f3a
...
@@ -91,7 +91,7 @@ void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place,
...
@@ -91,7 +91,7 @@ void InitTensorsOnServer(framework::Scope* scope, platform::CPUPlace* place,
}
}
}
}
void
StartServer
()
{
void
StartServer
(
const
std
::
string
&
rpc_name
)
{
framework
::
ProgramDesc
program
;
framework
::
ProgramDesc
program
;
framework
::
Scope
scope
;
framework
::
Scope
scope
;
platform
::
CPUPlace
place
;
platform
::
CPUPlace
place
;
...
@@ -107,14 +107,14 @@ void StartServer() {
...
@@ -107,14 +107,14 @@ void StartServer() {
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
prefetch_var_name_to_prepared
;
prefetch_var_name_to_prepared
;
prefetch_var_name_to_prepared
[
in_var_name
]
=
prepared
[
0
];
prefetch_var_name_to_prepared
[
in_var_name
]
=
prepared
[
0
];
g_req_handler
->
SetProgram
(
&
program
);
g_req_handler
->
SetProgram
(
&
program
);
g_req_handler
->
SetPrefetchPreparedCtx
(
&
prefetch_var_name_to_prepared
);
g_req_handler
->
SetPrefetchPreparedCtx
(
&
prefetch_var_name_to_prepared
);
g_req_handler
->
SetDevCtx
(
&
ctx
);
g_req_handler
->
SetDevCtx
(
&
ctx
);
g_req_handler
->
SetScope
(
&
scope
);
g_req_handler
->
SetScope
(
&
scope
);
g_req_handler
->
SetExecutor
(
&
exe
);
g_req_handler
->
SetExecutor
(
&
exe
);
g_rpc_service
->
RegisterRPC
(
distributed
::
kRequestPrefetch
,
g_rpc_service
->
RegisterRPC
(
rpc_name
,
g_req_handler
.
get
());
g_req_handler
.
get
());
g_req_handler
->
SetRPCServer
(
g_rpc_service
.
get
());
g_req_handler
->
SetRPCServer
(
g_rpc_service
.
get
());
std
::
thread
server_thread
(
std
::
thread
server_thread
(
...
@@ -129,7 +129,7 @@ TEST(PREFETCH, CPU) {
...
@@ -129,7 +129,7 @@ TEST(PREFETCH, CPU) {
distributed
::
RPCClient
*
client
=
distributed
::
RPCClient
*
client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
std
::
thread
server_thread
(
StartServer
);
std
::
thread
server_thread
(
StartServer
,
distributed
::
kRequestPrefetch
);
g_rpc_service
->
WaitServerReady
();
g_rpc_service
->
WaitServerReady
();
int
port
=
g_rpc_service
->
GetSelectedPort
();
int
port
=
g_rpc_service
->
GetSelectedPort
();
...
@@ -162,3 +162,24 @@ TEST(PREFETCH, CPU) {
...
@@ -162,3 +162,24 @@ TEST(PREFETCH, CPU) {
g_rpc_service
.
reset
(
nullptr
);
g_rpc_service
.
reset
(
nullptr
);
g_req_handler
.
reset
(
nullptr
);
g_req_handler
.
reset
(
nullptr
);
}
}
TEST
(
COMPLETE
,
CPU
)
{
g_req_handler
.
reset
(
new
distributed
::
RequestSendHandler
(
true
));
g_rpc_service
.
reset
(
new
RPCSERVER_T
(
"127.0.0.1:0"
,
2
));
distributed
::
RPCClient
*
client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
PADDLE_ENFORCE
(
client
!=
nullptr
);
std
::
thread
server_thread
(
StartServer
,
distributed
::
kRequestSend
);
g_rpc_service
->
WaitServerReady
();
int
port
=
g_rpc_service
->
GetSelectedPort
();
std
::
string
ep
=
paddle
::
string
::
Sprintf
(
"127.0.0.1:%d"
,
port
);
client
->
AsyncSendComplete
(
ep
);
client
->
Wait
();
EXPECT_EQ
(
g_rpc_service
->
GetClientNum
(),
1
);
g_rpc_service
->
ShutDown
();
server_thread
.
join
();
g_rpc_service
.
reset
(
nullptr
);
g_req_handler
.
reset
(
nullptr
);
}
paddle/fluid/operators/reader/lod_tensor_blocking_queue.h
浏览文件 @
018e2f3a
...
@@ -38,12 +38,10 @@ class LoDTensorBlockingQueue {
...
@@ -38,12 +38,10 @@ class LoDTensorBlockingQueue {
public:
public:
bool
Push
(
const
std
::
vector
<
framework
::
LoDTensor
>&
lod_tensor_vec
)
{
bool
Push
(
const
std
::
vector
<
framework
::
LoDTensor
>&
lod_tensor_vec
)
{
CheckDims
(
lod_tensor_vec
);
return
queue_
.
Send
(
lod_tensor_vec
);
return
queue_
.
Send
(
lod_tensor_vec
);
}
}
bool
Push
(
std
::
vector
<
framework
::
LoDTensor
>&&
lod_tensor_vec
)
{
bool
Push
(
std
::
vector
<
framework
::
LoDTensor
>&&
lod_tensor_vec
)
{
CheckDims
(
lod_tensor_vec
);
return
queue_
.
Send
(
std
::
move
(
lod_tensor_vec
));
return
queue_
.
Send
(
std
::
move
(
lod_tensor_vec
));
}
}
...
@@ -65,21 +63,6 @@ class LoDTensorBlockingQueue {
...
@@ -65,21 +63,6 @@ class LoDTensorBlockingQueue {
inline
bool
IsClosed
()
const
{
return
queue_
.
IsClosed
();
}
inline
bool
IsClosed
()
const
{
return
queue_
.
IsClosed
();
}
private:
private:
void
CheckDims
(
const
std
::
vector
<
framework
::
LoDTensor
>&
lod_tensor_vec
)
const
{
PADDLE_ENFORCE
(
dims_
.
size
()
==
lod_tensor_vec
.
size
(),
"Expect input size is %d but found %s"
,
dims_
.
size
(),
lod_tensor_vec
.
size
());
for
(
size_t
i
=
0
;
i
<
dims_
.
size
();
++
i
)
{
const
auto
&
in_dims
=
framework
::
slice_ddim
(
lod_tensor_vec
[
i
].
dims
(),
1
,
lod_tensor_vec
[
i
].
dims
().
size
());
const
auto
&
expect_dims
=
framework
::
slice_ddim
(
dims_
[
i
],
1
,
dims_
[
i
].
size
());
PADDLE_ENFORCE
(
in_dims
==
expect_dims
,
"Dims of the %d-th input tensor do not match"
,
i
);
}
}
BlockingQueue
<
std
::
vector
<
framework
::
LoDTensor
>>
queue_
;
BlockingQueue
<
std
::
vector
<
framework
::
LoDTensor
>>
queue_
;
std
::
vector
<
framework
::
DDim
>
dims_
;
std
::
vector
<
framework
::
DDim
>
dims_
;
};
};
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
018e2f3a
...
@@ -216,7 +216,7 @@ class ReshapeKernel {
...
@@ -216,7 +216,7 @@ class ReshapeKernel {
if
(
shape_tensor
)
{
if
(
shape_tensor
)
{
auto
*
shape_data
=
shape_tensor
->
data
<
int
>
();
auto
*
shape_data
=
shape_tensor
->
data
<
int
>
();
framework
::
Tensor
cpu_shape_tensor
;
framework
::
Tensor
cpu_shape_tensor
;
if
(
platform
::
is_gpu_place
(
ctx
.
GetP
lace
()))
{
if
(
platform
::
is_gpu_place
(
shape_tensor
->
p
lace
()))
{
TensorCopySync
(
*
shape_tensor
,
platform
::
CPUPlace
(),
&
cpu_shape_tensor
);
TensorCopySync
(
*
shape_tensor
,
platform
::
CPUPlace
(),
&
cpu_shape_tensor
);
shape_data
=
cpu_shape_tensor
.
data
<
int
>
();
shape_data
=
cpu_shape_tensor
.
data
<
int
>
();
}
}
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
018e2f3a
...
@@ -55,13 +55,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
...
@@ -55,13 +55,14 @@ nvinfer1::Dims Vec2TRT_Dims(const std::vector<int64_t> &shape) {
"TensorRT' tensor input requires at least 2 dimensions"
);
"TensorRT' tensor input requires at least 2 dimensions"
);
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
PADDLE_ENFORCE_LE
(
shape
.
size
(),
4UL
,
"TensorRT' tensor input requires at most 4 dimensions"
);
"TensorRT' tensor input requires at most 4 dimensions"
);
switch
(
shape
.
size
())
{
switch
(
shape
.
size
())
{
case
2
:
case
2
:
return
nvinfer1
::
Dims2
(
shape
[
0
]
,
shape
[
1
]);
return
nvinfer1
::
Dims2
(
1
,
shape
[
1
]);
case
3
:
case
3
:
return
nvinfer1
::
Dims3
(
shape
[
0
]
,
shape
[
1
],
shape
[
2
]);
return
nvinfer1
::
Dims3
(
1
,
shape
[
1
],
shape
[
2
]);
case
4
:
case
4
:
return
nvinfer1
::
Dims4
(
shape
[
0
]
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
return
nvinfer1
::
Dims4
(
1
,
shape
[
1
],
shape
[
2
],
shape
[
3
]);
default:
default:
return
nvinfer1
::
Dims
();
return
nvinfer1
::
Dims
();
}
}
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
018e2f3a
...
@@ -93,13 +93,15 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
...
@@ -93,13 +93,15 @@ class TensorRTEngineKernel : public framework::OpKernel<T> {
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
auto
*
fluid_v
=
context
.
scope
().
FindVar
(
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
PADDLE_ENFORCE_NOT_NULL
(
fluid_v
,
"no output variable called %s"
,
y
);
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
*
fluid_t
=
fluid_v
->
GetMutable
<
framework
::
LoDTensor
>
();
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
);
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
fluid_t
->
Resize
(
framework
::
make_ddim
(
ddim
));
// TODO(Superjomn) find some way to determine which device to output the
// TODO(Superjomn) find some way to determine which device to output the
// tensor.
// tensor.
// if (platform::is_cpu_place(fluid_t->place())) {
// if (platform::is_cpu_place(fluid_t->place())) {
// TODO(Superjomn) change this float to dtype size.
// TODO(Superjomn) change this float to dtype size.
auto
size
=
inference
::
analysis
::
AccuDims
(
dims
.
d
,
dims
.
nbDims
)
*
FLAGS_tensorrt_engine_batch_size
;
engine
->
GetOutputInCPU
(
y
,
engine
->
GetOutputInCPU
(
y
,
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
fluid_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
size
*
sizeof
(
float
));
size
*
sizeof
(
float
));
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
018e2f3a
...
@@ -64,36 +64,37 @@ TEST(TensorRTEngineOp, manual) {
...
@@ -64,36 +64,37 @@ TEST(TensorRTEngineOp, manual) {
LOG
(
INFO
)
<<
"create block desc"
;
LOG
(
INFO
)
<<
"create block desc"
;
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
framework
::
BlockDesc
block_desc
(
&
program
,
block_
);
LOG
(
INFO
)
<<
"create
mul
op"
;
LOG
(
INFO
)
<<
"create
fc
op"
;
auto
*
mul
=
block_desc
.
AppendOp
();
auto
*
fc0
=
block_desc
.
AppendOp
();
mul
->
SetType
(
"mul
"
);
fc0
->
SetType
(
"fc
"
);
mul
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
// 2 x 4
fc0
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
// 4 x 1 x 1
mul
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
// 4 x 6
fc0
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y"
}));
// 4 x 6
mul
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
// 2 x 6
fc0
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
// 6 x 1 x 1
LOG
(
INFO
)
<<
"create fc op"
;
LOG
(
INFO
)
<<
"create fc op"
;
auto
*
fc
=
block_desc
.
AppendOp
();
auto
*
fc
1
=
block_desc
.
AppendOp
();
fc
->
SetType
(
"mul
"
);
fc
1
->
SetType
(
"fc
"
);
fc
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fc
1
->
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
"z"
}));
fc
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y0"
}));
// 6 x 8
fc
1
->
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
"y0"
}));
// 6 x 8
fc
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
// 2 x 8
fc
1
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
// 8 x 1 x 1
// Set inputs' variable shape in BlockDesc
// Set inputs' variable shape in BlockDesc
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}));
// the batch size is 2, so the dims of 'x' is {2, 4, 1, 1}
AddTensorToBlockDesc
(
block_
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
,
1
,
1
}));
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
AddTensorToBlockDesc
(
block_
,
"y"
,
std
::
vector
<
int64_t
>
({
4
,
6
}));
AddTensorToBlockDesc
(
block_
,
"y0"
,
std
::
vector
<
int64_t
>
({
6
,
8
}));
AddTensorToBlockDesc
(
block_
,
"y0"
,
std
::
vector
<
int64_t
>
({
6
,
8
}));
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
AddTensorToBlockDesc
(
block_
,
"z"
,
std
::
vector
<
int64_t
>
({
2
,
6
}));
// It is wired, need to copy manually.
// It is wired, need to copy manually.
*
block_
->
add_ops
()
=
*
mul
->
Proto
();
*
block_
->
add_ops
()
=
*
fc0
->
Proto
();
*
block_
->
add_ops
()
=
*
fc
->
Proto
();
*
block_
->
add_ops
()
=
*
fc
1
->
Proto
();
ASSERT_EQ
(
block_
->
ops_size
(),
2
);
ASSERT_EQ
(
block_
->
ops_size
(),
2
);
LOG
(
INFO
)
<<
"create tensorrt desc"
;
LOG
(
INFO
)
<<
"create tensorrt desc"
;
framework
::
OpDesc
engine_op_desc
(
nullptr
);
framework
::
OpDesc
engine_op_desc
(
nullptr
);
engine_op_desc
.
SetType
(
"tensorrt_engine"
);
engine_op_desc
.
SetType
(
"tensorrt_engine"
);
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
,
"y"
,
"y0"
}));
engine_op_desc
.
SetInput
(
"Xs"
,
std
::
vector
<
std
::
string
>
({
"x"
}));
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
engine_op_desc
.
SetOutput
(
"Ys"
,
std
::
vector
<
std
::
string
>
({
"z0"
}));
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"subgraph"
,
SetAttr
<
std
::
string
>
(
engine_op_desc
.
Proto
(),
"subgraph"
,
block_
->
SerializeAsString
());
block_
->
SerializeAsString
());
...
@@ -207,5 +208,4 @@ TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
...
@@ -207,5 +208,4 @@ TEST(TensorRTEngineOp, fc) { Execute(40, 28, 28); }
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
USE_TRT_CONVERTER
(
mul
)
USE_TRT_CONVERTER
(
fc
)
USE_TRT_CONVERTER
(
fc
)
paddle/fluid/pybind/pybind.cc
浏览文件 @
018e2f3a
...
@@ -498,10 +498,7 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -498,10 +498,7 @@ All parameter, weight, gradient are variables in Paddle.
py
::
class_
<
framework
::
Executor
>
(
m
,
"Executor"
)
py
::
class_
<
framework
::
Executor
>
(
m
,
"Executor"
)
.
def
(
py
::
init
<
const
platform
::
Place
&>
())
.
def
(
py
::
init
<
const
platform
::
Place
&>
())
#ifdef PADDLE_WITH_DISTRIBUTE
.
def
(
"close"
,
&
Executor
::
Close
)
.
def
(
"begin_pass"
,
&
Executor
::
BeginPass
)
.
def
(
"end_pass"
,
&
Executor
::
EndPass
)
#endif
.
def
(
"run"
,
[](
Executor
&
self
,
const
ProgramDesc
&
prog
,
Scope
*
scope
,
.
def
(
"run"
,
[](
Executor
&
self
,
const
ProgramDesc
&
prog
,
Scope
*
scope
,
int
block_id
,
bool
create_local_scope
,
bool
create_vars
)
{
int
block_id
,
bool
create_local_scope
,
bool
create_vars
)
{
pybind11
::
gil_scoped_release
release
;
pybind11
::
gil_scoped_release
release
;
...
...
python/paddle/fluid/executor.py
浏览文件 @
018e2f3a
...
@@ -247,6 +247,7 @@ class Executor(object):
...
@@ -247,6 +247,7 @@ class Executor(object):
p
.
set_place
(
place
)
p
.
set_place
(
place
)
self
.
executor
=
core
.
Executor
(
p
)
self
.
executor
=
core
.
Executor
(
p
)
self
.
program_caches
=
dict
()
self
.
program_caches
=
dict
()
self
.
_closed
=
False
def
as_lodtensor
(
self
,
data
):
def
as_lodtensor
(
self
,
data
):
"""
"""
...
@@ -348,11 +349,23 @@ class Executor(object):
...
@@ -348,11 +349,23 @@ class Executor(object):
]
]
return
outs
return
outs
def
begin_pass
(
self
):
def
close
(
self
):
self
.
executor
.
begin_pass
()
"""
Close this executor.
def
end_pass
(
self
):
You can no long use this executor after calling this method.
self
.
executor
.
end_pass
()
For the distributed training, this method would free the resource on PServers related to
the current Trainer.
Example:
>>> cpu = core.CPUPlace()
>>> exe = Executor(cpu)
>>> ...
>>> exe.close()
"""
if
not
self
.
_closed
:
self
.
executor
.
close
()
self
.
_closed
=
True
def
run
(
self
,
def
run
(
self
,
program
=
None
,
program
=
None
,
...
@@ -405,6 +418,10 @@ class Executor(object):
...
@@ -405,6 +418,10 @@ class Executor(object):
>>> feed={'X': x},
>>> feed={'X': x},
>>> fetch_list=[loss.name])
>>> fetch_list=[loss.name])
"""
"""
if
self
.
_closed
:
raise
RuntimeError
(
"Attempted to use a closed Executor"
)
if
feed
is
None
:
if
feed
is
None
:
feed
=
{}
feed
=
{}
if
not
isinstance
(
feed
,
dict
):
if
not
isinstance
(
feed
,
dict
):
...
...
python/paddle/fluid/framework.py
浏览文件 @
018e2f3a
...
@@ -32,7 +32,6 @@ except Exception, e:
...
@@ -32,7 +32,6 @@ except Exception, e:
import
unique_name
import
unique_name
__all__
=
[
__all__
=
[
'Variable'
,
'Program'
,
'Program'
,
'Operator'
,
'Operator'
,
'Parameter'
,
'Parameter'
,
...
@@ -302,7 +301,7 @@ class Variable(object):
...
@@ -302,7 +301,7 @@ class Variable(object):
__repr__
=
__str__
__repr__
=
__str__
def
set_desc
(
self
,
input
):
def
_
set_desc
(
self
,
input
):
"""
"""
Set the variable description.
Set the variable description.
...
@@ -347,7 +346,7 @@ class Variable(object):
...
@@ -347,7 +346,7 @@ class Variable(object):
def
type
(
self
):
def
type
(
self
):
return
self
.
desc
.
type
()
return
self
.
desc
.
type
()
def
set_error_clip
(
self
,
error_clip
):
def
_
set_error_clip
(
self
,
error_clip
):
"""
"""
Set the error_clip.
Set the error_clip.
...
...
python/paddle/fluid/io.py
浏览文件 @
018e2f3a
...
@@ -796,104 +796,6 @@ def get_parameter_value_by_name(name, executor, program=None):
...
@@ -796,104 +796,6 @@ def get_parameter_value_by_name(name, executor, program=None):
return
get_parameter_value
(
var
,
executor
)
return
get_parameter_value
(
var
,
executor
)
def
get_test_program
(
filelist
,
program
=
None
,
startup_program
=
None
):
"""
Transpile current train program to a program to read test dataset
if the program is using reader ops like "open_files_op".
"""
def
_copy_reader_var_
(
block
,
var
,
new_name
=
None
):
if
new_name
==
None
:
new_name
=
var
.
name
new_var
=
block
.
create_var
(
name
=
str
(
new_name
),
type
=
core
.
VarDesc
.
VarType
.
READER
)
new_var
.
desc
.
set_shapes
(
var
.
desc
.
shapes
())
new_var
.
desc
.
set_dtypes
(
var
.
desc
.
dtypes
())
new_var
.
persistable
=
True
return
new_var
def
_get_test_reader_name
(
train_reader_name
):
return
train_reader_name
+
"_test"
def
_is_reader_op
(
op
):
block
=
op
.
block
if
"Out"
in
op
.
output_names
:
reader_out
=
block
.
vars
[
op
.
output
(
"Out"
)[
0
]]
if
reader_out
.
type
==
core
.
VarDesc
.
VarType
.
READER
:
return
True
return
False
if
program
==
None
:
program
=
default_main_program
()
if
startup_program
==
None
:
startup_program
=
default_startup_program
()
startup_block
=
startup_program
.
global_block
()
# 1. find out the orignal reader var name
startup_reader_op_list
=
[]
for
op
in
startup_block
.
ops
:
if
_is_reader_op
(
op
):
startup_reader_op_list
.
append
(
op
)
if
len
(
startup_reader_op_list
)
==
0
:
return
program
root_reader_op
=
startup_reader_op_list
[
0
]
train_test_reader_map
=
{}
# 2. add operators to startup to read open and read test data files
for
op
in
startup_reader_op_list
:
assert
(
len
(
op
.
output
(
"Out"
))
==
1
)
train_reader_name
=
op
.
output
(
"Out"
)[
0
]
train_reader
=
startup_block
.
vars
[
train_reader_name
]
test_reader
=
_copy_reader_var_
(
startup_block
,
train_reader
,
new_name
=
_get_test_reader_name
(
train_reader_name
))
train_test_reader_map
[
train_reader
.
name
]
=
test_reader
test_op_inputs
=
{}
for
name
in
op
.
input_names
:
train_arg_names
=
op
.
input
(
name
)
test_arg_vars
=
[]
for
arg_name
in
train_arg_names
:
arg_var
=
train_test_reader_map
[
arg_name
]
if
name
==
"UnderlyingReader"
else
startup_block
.
vars
[
arg_name
]
test_arg_vars
.
append
(
arg_var
)
test_op_inputs
[
name
]
=
test_arg_vars
test_op
=
startup_block
.
append_op
(
type
=
op
.
type
,
inputs
=
test_op_inputs
,
outputs
=
{
'Out'
:
[
test_reader
]},
attrs
=
op
.
attrs
)
# root reader op's filelist attr for read test files
if
op
.
type
==
root_reader_op
.
type
:
test_op
.
set_attr
(
"file_names"
,
filelist
)
if
op
.
type
==
"create_multi_pass_reader"
:
test_op
.
set_attr
(
"pass_num"
,
1
)
# 3. rename reader vars in inference program to different name
# to avoid read from train data.
main_block
=
program
.
global_block
()
for
var
in
main_block
.
vars
.
values
():
if
var
.
type
==
core
.
VarDesc
.
VarType
.
READER
:
main_block
.
_rename_var
(
str
(
var
.
name
),
str
(
_get_test_reader_name
(
var
.
name
)))
for
op
in
main_block
.
ops
:
if
op
.
type
==
root_reader_op
.
type
:
test_op
.
set_attr
(
"file_names"
,
filelist
)
if
op
.
type
==
"create_multi_pass_reader"
:
test_op
.
set_attr
(
"pass_num"
,
1
)
startup_program
.
_sync_with_cpp
()
program
.
_sync_with_cpp
()
return
program
def
_load_slice_up_vars
(
executor
,
dirname
,
slice_vars_and_atts
):
def
_load_slice_up_vars
(
executor
,
dirname
,
slice_vars_and_atts
):
if
slice_vars_and_atts
==
None
or
len
(
slice_vars_and_atts
)
==
0
:
if
slice_vars_and_atts
==
None
or
len
(
slice_vars_and_atts
)
==
0
:
return
return
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
018e2f3a
...
@@ -23,25 +23,17 @@ from ops import logical_and, logical_not, logical_or
...
@@ -23,25 +23,17 @@ from ops import logical_and, logical_not, logical_or
import
numpy
import
numpy
__all__
=
[
__all__
=
[
'split_lod_tensor'
,
'merge_lod_tensor'
,
'While'
,
'While'
,
'Switch'
,
'Switch'
,
'lod_rank_table'
,
'max_sequence_len'
,
'lod_tensor_to_array'
,
'array_to_lod_tensor'
,
'increment'
,
'increment'
,
'array_write'
,
'array_write'
,
'create_array'
,
'create_array'
,
'less_than'
,
'less_than'
,
'equal'
,
'equal'
,
'array_read'
,
'array_read'
,
'shrink_memory'
,
'array_length'
,
'array_length'
,
'IfElse'
,
'IfElse'
,
'DynamicRNN'
,
'DynamicRNN'
,
'ConditionalBlock'
,
'StaticRNN'
,
'StaticRNN'
,
'reorder_lod_tensor_by_rank'
,
'reorder_lod_tensor_by_rank'
,
'ParallelDo'
,
'ParallelDo'
,
...
@@ -1457,7 +1449,7 @@ class IfElse(object):
...
@@ -1457,7 +1449,7 @@ class IfElse(object):
if
self
.
status
==
IfElse
.
OUT_IF_ELSE_BLOCKS
:
if
self
.
status
==
IfElse
.
OUT_IF_ELSE_BLOCKS
:
raise
ValueError
(
"input must in true/false blocks"
)
raise
ValueError
(
"input must in true/false blocks"
)
if
id
(
x
)
not
in
self
.
input_table
:
if
id
(
x
)
not
in
self
.
input_table
:
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
_
parent_block
()
out_true
=
parent_block
.
create_var
(
out_true
=
parent_block
.
create_var
(
name
=
unique_name
.
generate
(
'ifelse_input'
+
self
.
helper
.
name
),
name
=
unique_name
.
generate
(
'ifelse_input'
+
self
.
helper
.
name
),
dtype
=
x
.
dtype
)
dtype
=
x
.
dtype
)
...
@@ -1483,7 +1475,7 @@ class IfElse(object):
...
@@ -1483,7 +1475,7 @@ class IfElse(object):
else
:
else
:
return
out_false
return
out_false
def
parent_block
(
self
):
def
_
parent_block
(
self
):
current_block
=
self
.
helper
.
main_program
.
current_block
()
current_block
=
self
.
helper
.
main_program
.
current_block
()
return
self
.
helper
.
main_program
.
block
(
current_block
.
parent_idx
)
return
self
.
helper
.
main_program
.
block
(
current_block
.
parent_idx
)
...
@@ -1499,7 +1491,7 @@ class IfElse(object):
...
@@ -1499,7 +1491,7 @@ class IfElse(object):
out_table
=
self
.
output_table
[
1
if
self
.
status
==
out_table
=
self
.
output_table
[
1
if
self
.
status
==
self
.
IN_IF_ELSE_TRUE_BLOCKS
else
0
]
self
.
IN_IF_ELSE_TRUE_BLOCKS
else
0
]
parent_block
=
self
.
parent_block
()
parent_block
=
self
.
_
parent_block
()
for
each_out
in
outs
:
for
each_out
in
outs
:
if
not
isinstance
(
each_out
,
Variable
):
if
not
isinstance
(
each_out
,
Variable
):
raise
TypeError
(
"Each output should be a variable"
)
raise
TypeError
(
"Each output should be a variable"
)
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
018e2f3a
...
@@ -62,10 +62,10 @@ def noam_decay(d_model, warmup_steps):
...
@@ -62,10 +62,10 @@ def noam_decay(d_model, warmup_steps):
The decayed learning rate.
The decayed learning rate.
"""
"""
global_step
=
_decay_step_counter
(
1
)
global_step
=
_decay_step_counter
(
1
)
with
init_on_cpu
():
a
=
global_step
**-
0.5
a
=
global_step
**-
0.5
b
=
(
warmup_steps
**-
1.5
)
*
global_step
b
=
(
warmup_steps
**-
1.5
)
*
global_step
lr_value
=
(
d_model
**-
0.5
)
*
ops
.
elementwise_min
(
a
,
b
)
lr_value
=
(
d_model
**-
0.5
)
*
ops
.
elementwise_min
(
a
,
b
)
return
lr_value
return
lr_value
...
@@ -108,12 +108,10 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -108,12 +108,10 @@ def exponential_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
"""
global_step
=
_decay_step_counter
()
global_step
=
_decay_step_counter
()
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
# update learning_rate
if
staircase
:
div_res
=
global_step
/
decay_steps
div_res
=
ops
.
floor
(
div_res
)
if
staircase
:
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
(
decay_rate
**
div_res
)
return
decayed_lr
return
decayed_lr
...
@@ -138,11 +136,10 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -138,11 +136,10 @@ def natural_exp_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
"""
global_step
=
_decay_step_counter
()
global_step
=
_decay_step_counter
()
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
div_res
=
global_step
/
decay_steps
if
staircase
:
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
decayed_lr
=
learning_rate
*
ops
.
exp
(
-
1
*
decay_rate
*
div_res
)
return
decayed_lr
return
decayed_lr
...
@@ -184,12 +181,11 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
...
@@ -184,12 +181,11 @@ def inverse_time_decay(learning_rate, decay_steps, decay_rate, staircase=False):
"""
"""
global_step
=
_decay_step_counter
()
global_step
=
_decay_step_counter
()
with
init_on_cpu
():
div_res
=
global_step
/
decay_steps
div_res
=
global_step
/
decay_steps
if
staircase
:
if
staircase
:
div_res
=
ops
.
floor
(
div_res
)
div_res
=
ops
.
floor
(
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
decayed_lr
=
learning_rate
/
(
1
+
decay_rate
*
div_res
)
return
decayed_lr
return
decayed_lr
...
@@ -224,25 +220,22 @@ def polynomial_decay(learning_rate,
...
@@ -224,25 +220,22 @@ def polynomial_decay(learning_rate,
"""
"""
global_step
=
_decay_step_counter
()
global_step
=
_decay_step_counter
()
with
init_on_cpu
():
if
cycle
:
if
cycle
:
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
div_res
=
ops
.
ceil
(
global_step
/
decay_steps
)
zero_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
zero_var
=
tensor
.
fill_constant
(
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
shape
=
[
1
],
dtype
=
'float32'
,
value
=
0.0
)
one_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
1.0
)
with
control_flow
.
Switch
()
as
switch
:
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
ops
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
with
control_flow
.
Switch
()
as
switch
:
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
with
switch
.
case
(
global_step
==
zero_var
):
tensor
.
assign
(
input
=
one_var
,
output
=
div_res
)
decay_steps
=
decay_steps
*
div_res
else
:
decay_steps_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
decay_steps
))
global_step
=
ops
.
elementwise_min
(
x
=
global_step
,
y
=
decay_steps_var
)
decayed_lr
=
(
learning_rate
-
end_learning_rate
)
*
\
((
1
-
global_step
/
decay_steps
)
**
power
)
+
end_learning_rate
return
decayed_lr
return
decayed_lr
...
@@ -277,28 +270,28 @@ def piecewise_decay(boundaries, values):
...
@@ -277,28 +270,28 @@ def piecewise_decay(boundaries, values):
global_step
=
_decay_step_counter
()
global_step
=
_decay_step_counter
()
with
init_on_cpu
():
lr
=
tensor
.
create_global_var
(
lr
=
tensor
.
create_global_var
(
shape
=
[
1
],
shape
=
[
1
],
value
=
0.0
,
value
=
0.0
,
dtype
=
'float32'
,
dtype
=
'float32'
,
persistable
=
True
,
persistable
=
True
,
name
=
"learning_rate"
)
name
=
"learning_rate"
)
with
control_flow
.
Switch
()
as
switch
:
with
control_flow
.
Switch
()
as
switch
:
for
i
in
range
(
len
(
boundaries
)):
for
i
in
range
(
len
(
boundaries
)):
boundary_val
=
tensor
.
fill_constant
(
boundary_val
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
boundaries
[
i
]))
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
shape
=
[
1
],
dtype
=
'float32'
,
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
value
=
float
(
boundaries
[
i
]),
with
switch
.
default
():
force_cpu
=
True
)
tensor
.
assign
(
last_value_var
,
lr
)
value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
i
]))
with
switch
.
case
(
global_step
<
boundary_val
):
tensor
.
assign
(
value_var
,
lr
)
last_value_var
=
tensor
.
fill_constant
(
shape
=
[
1
],
dtype
=
'float32'
,
value
=
float
(
values
[
len
(
values
)
-
1
]))
with
switch
.
default
():
tensor
.
assign
(
last_value_var
,
lr
)
return
lr
return
lr
...
@@ -333,9 +326,9 @@ def append_LARS(params_grads, learning_rate, weight_decay):
...
@@ -333,9 +326,9 @@ def append_LARS(params_grads, learning_rate, weight_decay):
grad_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
grad
)))
grad_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
grad
)))
if
type
(
param_lr
)
==
float
and
param_lr
==
1.0
:
if
type
(
param_lr
)
==
float
and
param_lr
==
1.0
:
decayed_lr
=
learning_rate
*
param_norm
\
decayed_lr
=
learning_rate
*
param_norm
\
/
_balanced_weight
(
param_norm
,
grad_norm
)
/
_balanced_weight
(
param_norm
,
grad_norm
)
else
:
else
:
decayed_lr
=
learning_rate
*
param_lr
*
param_norm
\
decayed_lr
=
learning_rate
*
param_lr
*
param_norm
\
/
_balanced_weight
(
param_norm
,
grad_norm
)
/
_balanced_weight
(
param_norm
,
grad_norm
)
# set back param local learning rate
# set back param local learning rate
param
.
optimize_attr
[
'learning_rate'
]
=
decayed_lr
param
.
optimize_attr
[
'learning_rate'
]
=
decayed_lr
python/paddle/fluid/tests/demo/
text_classification
/.gitignore
→
python/paddle/fluid/tests/demo/
file_reader
/.gitignore
浏览文件 @
018e2f3a
文件已移动
python/paddle/fluid/tests/demo/
text_classification
/convert_data_to_recordio.py
→
python/paddle/fluid/tests/demo/
file_reader
/convert_data_to_recordio.py
浏览文件 @
018e2f3a
...
@@ -35,7 +35,7 @@ if len(sys.argv) == 1:
...
@@ -35,7 +35,7 @@ if len(sys.argv) == 1:
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
else
:
else
:
word_dict
=
load_vocab
(
sys
.
argv
[
1
])
word_dict
=
load_vocab
(
sys
.
argv
[
1
])
word_dict
[
"<unk>"
]
=
len
(
word_dict
)
word_dict
[
"<unk>"
]
=
len
(
word_dict
)
print
"Dict dim = "
,
len
(
word_dict
)
print
"Dict dim = "
,
len
(
word_dict
)
# input text data
# input text data
...
@@ -50,7 +50,7 @@ feeder = fluid.DataFeeder(feed_list=[data, label], place=fluid.CPUPlace())
...
@@ -50,7 +50,7 @@ feeder = fluid.DataFeeder(feed_list=[data, label], place=fluid.CPUPlace())
BATCH_SIZE
=
128
BATCH_SIZE
=
128
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
reader
.
shuffle
(
paddle
.
reader
.
shuffle
(
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
10
000
),
paddle
.
dataset
.
imdb
.
train
(
word_dict
),
buf_size
=
25
000
),
batch_size
=
BATCH_SIZE
)
batch_size
=
BATCH_SIZE
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
...
...
python/paddle/fluid/tests/demo/
text_classification
/train.py
→
python/paddle/fluid/tests/demo/
file_reader
/train.py
浏览文件 @
018e2f3a
...
@@ -19,7 +19,7 @@ import sys
...
@@ -19,7 +19,7 @@ import sys
TRAIN_FILES
=
[
'train.recordio'
]
TRAIN_FILES
=
[
'train.recordio'
]
TEST_FILES
=
[
'test.recordio'
]
TEST_FILES
=
[
'test.recordio'
]
DICT_DIM
=
89528
DICT_DIM
=
5147
# embedding dim
# embedding dim
emb_dim
=
128
emb_dim
=
128
...
@@ -27,58 +27,46 @@ emb_dim = 128
...
@@ -27,58 +27,46 @@ emb_dim = 128
# hidden dim
# hidden dim
hid_dim
=
128
hid_dim
=
128
# hidden dim2
hid_dim2
=
96
# class num
# class num
class_dim
=
2
class_dim
=
2
# epoch num
epoch_num
=
10
def
network_cfg
(
is_train
,
pass_num
=
100
):
with
fluid
.
unique_name
.
guard
():
train_file_obj
=
fluid
.
layers
.
open_files
(
filenames
=
TRAIN_FILES
,
pass_num
=
pass_num
,
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
])
test_file_obj
=
fluid
.
layers
.
open_files
(
filenames
=
TEST_FILES
,
pass_num
=
1
,
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
])
if
is_train
:
def
build_program
(
is_train
):
file_obj
=
fluid
.
layers
.
shuffle
(
train_file_obj
,
buffer_size
=
1000
)
file_obj_handle
=
fluid
.
layers
.
io
.
open_files
(
else
:
filenames
=
TRAIN_FILES
if
is_train
else
TEST_FILES
,
file_obj
=
test_file_obj
shapes
=
[[
-
1
,
1
],
[
-
1
,
1
]],
lod_levels
=
[
1
,
0
],
dtypes
=
[
'int64'
,
'int64'
])
file_obj
=
fluid
.
layers
.
double_buffer
(
file_obj
=
fluid
.
layers
.
io
.
double_buffer
(
file_obj_handle
)
file_obj
,
name
=
"train_double_buffer"
if
is_train
else
'test_double_buffer'
)
with
fluid
.
unique_name
.
guard
():
data
,
label
=
fluid
.
layers
.
read_file
(
file_obj
)
data
,
label
=
fluid
.
layers
.
read_file
(
file_obj
)
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
DICT_DIM
,
emb_dim
])
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
size
=
[
DICT_DIM
,
emb_dim
])
# sequence conv with window size = 3
win_size
=
3
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
conv_3
=
fluid
.
nets
.
sequence_conv_pool
(
input
=
emb
,
input
=
emb
,
num_filters
=
hid_dim
,
num_filters
=
hid_dim
,
filter_size
=
win_size
,
filter_size
=
3
,
act
=
"tanh"
,
act
=
"tanh"
,
pool_type
=
"
max
"
)
pool_type
=
"
sqrt
"
)
# fc layer after conv
conv_4
=
fluid
.
nets
.
sequence_conv_pool
(
fc_1
=
fluid
.
layers
.
fc
(
input
=
[
conv_3
],
size
=
hid_dim2
)
input
=
emb
,
num_filters
=
hid_dim
,
filter_size
=
4
,
act
=
"tanh"
,
pool_type
=
"sqrt"
)
# probability of each class
prediction
=
fluid
.
layers
.
fc
(
input
=
[
conv_3
,
conv_4
],
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_1
],
size
=
class_dim
,
size
=
class_dim
,
act
=
"softmax"
)
act
=
"softmax"
)
# cross entropy loss
# cross entropy loss
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
...
@@ -88,58 +76,62 @@ def network_cfg(is_train, pass_num=100):
...
@@ -88,58 +76,62 @@ def network_cfg(is_train, pass_num=100):
if
is_train
:
if
is_train
:
# SGD optimizer
# SGD optimizer
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.01
)
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.0
0
1
)
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
return
{
return
{
'loss'
:
avg_cost
,
'log'
:
[
avg_cost
,
acc
],
'file'
:
file_obj_handle
}
'loss'
:
avg_cost
,
'log'
:
[
avg_cost
,
acc
],
'file'
:
train_file_obj
if
is_train
else
test_file_obj
}
def
main
():
def
main
():
train
=
fluid
.
Program
()
train
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
test
=
fluid
.
Program
()
with
fluid
.
program_guard
(
train
,
startup
):
with
fluid
.
program_guard
(
train
,
startup
):
train_args
=
network_cfg
(
is_train
=
True
)
train_args
=
build_program
(
is_train
=
True
)
test
=
fluid
.
Program
()
with
fluid
.
program_guard
(
test
,
fluid
.
Program
()
):
with
fluid
.
program_guard
(
test
,
startup
):
test_args
=
network_cfg
(
is_train
=
False
)
test_args
=
build_program
(
is_train
=
False
)
use_cuda
=
fluid
.
core
.
is_compiled_with_cuda
()
# startup
# startup
place
=
fluid
.
CUDAPlace
(
0
)
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
=
place
)
exe
=
fluid
.
Executor
(
place
=
place
)
exe
.
run
(
startup
)
exe
.
run
(
startup
)
train_exe
=
fluid
.
ParallelExecutor
(
train_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
True
,
loss_name
=
train_args
[
'loss'
].
name
,
main_program
=
train
)
use_cuda
=
use_cuda
,
loss_name
=
train_args
[
'loss'
].
name
,
main_program
=
train
)
test_exe
=
fluid
.
ParallelExecutor
(
use_cuda
=
use_cuda
,
main_program
=
test
,
share_vars_from
=
train_exe
)
fetch_var_list
=
[
var
.
name
for
var
in
train_args
[
'log'
]]
fetch_var_list
=
[
var
.
name
for
var
in
train_args
[
'log'
]]
for
i
in
xrange
(
sys
.
maxint
):
for
epoch_id
in
range
(
epoch_num
):
result
=
map
(
numpy
.
array
,
# train
train_exe
.
run
(
fetch_list
=
fetch_var_list
try
:
if
i
%
1000
==
0
else
[]))
batch_id
=
0
if
len
(
result
)
!=
0
:
while
True
:
print
'Train: '
,
result
loss
,
acc
=
map
(
numpy
.
array
,
train_exe
.
run
(
fetch_list
=
fetch_var_list
))
if
i
%
1000
==
0
:
print
'Train epoch'
,
epoch_id
,
'batch'
,
batch_id
,
'loss:'
,
loss
,
'acc:'
,
acc
test_exe
=
fluid
.
ParallelExecutor
(
batch_id
+=
1
use_cuda
=
True
,
main_program
=
test
,
share_vars_from
=
train_exe
)
except
fluid
.
core
.
EOFException
:
loss
=
[]
print
'End of epoch'
,
epoch_id
acc
=
[]
train_args
[
'file'
].
reset
()
try
:
while
True
:
# test
loss_np
,
acc_np
=
map
(
loss
=
[]
numpy
.
array
,
test_exe
.
run
(
fetch_list
=
fetch_var_list
))
acc
=
[]
loss
.
append
(
loss_np
[
0
])
try
:
acc
.
append
(
acc_np
[
0
])
while
True
:
except
:
loss_np
,
acc_np
=
map
(
numpy
.
array
,
test_args
[
'file'
].
reset
()
test_exe
.
run
(
fetch_list
=
fetch_var_list
))
print
'TEST: '
,
numpy
.
mean
(
loss
),
numpy
.
mean
(
acc
)
loss
.
append
(
loss_np
[
0
])
acc
.
append
(
acc_np
[
0
])
except
:
test_args
[
'file'
].
reset
()
print
'Test loss:'
,
numpy
.
mean
(
loss
),
'acc:'
,
numpy
.
mean
(
acc
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/test_error_clip.py
浏览文件 @
018e2f3a
...
@@ -36,7 +36,7 @@ with fluid.program_guard(main_program=prog):
...
@@ -36,7 +36,7 @@ with fluid.program_guard(main_program=prog):
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
prog_clip
=
prog
.
clone
()
prog_clip
=
prog
.
clone
()
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
set_error_clip
(
prog_clip
.
block
(
0
).
var
(
hidden1
.
name
).
_
set_error_clip
(
fluid
.
clip
.
ErrorClipByValue
(
fluid
.
clip
.
ErrorClipByValue
(
max
=
CLIP_MAX
,
min
=
CLIP_MIN
))
max
=
CLIP_MAX
,
min
=
CLIP_MIN
))
...
...
python/paddle/fluid/tests/test_if_else_op.py
浏览文件 @
018e2f3a
...
@@ -19,6 +19,10 @@ from paddle.fluid.executor import Executor
...
@@ -19,6 +19,10 @@ from paddle.fluid.executor import Executor
from
paddle.fluid.optimizer
import
MomentumOptimizer
from
paddle.fluid.optimizer
import
MomentumOptimizer
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.control_flow
import
split_lod_tensor
from
paddle.fluid.layers.control_flow
import
merge_lod_tensor
from
paddle.fluid.layers.control_flow
import
ConditionalBlock
import
unittest
import
unittest
import
numpy
as
np
import
numpy
as
np
...
@@ -34,11 +38,10 @@ class TestMNISTIfElseOp(unittest.TestCase):
...
@@ -34,11 +38,10 @@ class TestMNISTIfElseOp(unittest.TestCase):
limit
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
5
)
limit
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
5
)
cond
=
layers
.
less_than
(
x
=
label
,
y
=
limit
)
cond
=
layers
.
less_than
(
x
=
label
,
y
=
limit
)
true_image
,
false_image
=
layers
.
split_lod_tensor
(
true_image
,
false_image
=
split_lod_tensor
(
input
=
image
,
mask
=
cond
)
input
=
image
,
mask
=
cond
)
true_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
true_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
true_cond
=
layers
.
ConditionalBlock
([
cond
])
true_cond
=
ConditionalBlock
([
cond
])
with
true_cond
.
block
():
with
true_cond
.
block
():
hidden
=
layers
.
fc
(
input
=
true_image
,
size
=
100
,
act
=
'tanh'
)
hidden
=
layers
.
fc
(
input
=
true_image
,
size
=
100
,
act
=
'tanh'
)
...
@@ -46,14 +49,14 @@ class TestMNISTIfElseOp(unittest.TestCase):
...
@@ -46,14 +49,14 @@ class TestMNISTIfElseOp(unittest.TestCase):
layers
.
assign
(
input
=
prob
,
output
=
true_out
)
layers
.
assign
(
input
=
prob
,
output
=
true_out
)
false_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
false_out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
false_cond
=
layers
.
ConditionalBlock
([
cond
])
false_cond
=
ConditionalBlock
([
cond
])
with
false_cond
.
block
():
with
false_cond
.
block
():
hidden
=
layers
.
fc
(
input
=
false_image
,
size
=
200
,
act
=
'tanh'
)
hidden
=
layers
.
fc
(
input
=
false_image
,
size
=
200
,
act
=
'tanh'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
prob
=
layers
.
fc
(
input
=
hidden
,
size
=
10
,
act
=
'softmax'
)
layers
.
assign
(
input
=
prob
,
output
=
false_out
)
layers
.
assign
(
input
=
prob
,
output
=
false_out
)
prob
=
layers
.
merge_lod_tensor
(
prob
=
merge_lod_tensor
(
in_true
=
true_out
,
in_false
=
false_out
,
mask
=
cond
,
x
=
image
)
in_true
=
true_out
,
in_false
=
false_out
,
mask
=
cond
,
x
=
image
)
loss
=
layers
.
cross_entropy
(
input
=
prob
,
label
=
label
)
loss
=
layers
.
cross_entropy
(
input
=
prob
,
label
=
label
)
avg_loss
=
layers
.
mean
(
loss
)
avg_loss
=
layers
.
mean
(
loss
)
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
018e2f3a
...
@@ -251,7 +251,7 @@ class OpTest(unittest.TestCase):
...
@@ -251,7 +251,7 @@ class OpTest(unittest.TestCase):
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
fetch_list
.
append
(
str
(
out_name
))
fetch_list
.
append
(
str
(
out_name
))
# fetch_list = map(block.var, fetch_list)
# fetch_list = map(block.var, fetch_list)
if
not
isinstance
(
fetch_list
[
0
],
Variable
):
if
not
isinstance
(
fetch_list
[
0
],
fluid
.
framework
.
Variable
):
fetch_list
=
map
(
block
.
var
,
fetch_list
)
fetch_list
=
map
(
block
.
var
,
fetch_list
)
outs
=
executor
.
run
(
program
,
outs
=
executor
.
run
(
program
,
feed
=
feed_map
,
feed
=
feed_map
,
...
...
python/paddle/fluid/tests/unittests/test_conditional_block.py
浏览文件 @
018e2f3a
...
@@ -18,14 +18,15 @@ import paddle.fluid.core as core
...
@@ -18,14 +18,15 @@ import paddle.fluid.core as core
from
paddle.fluid.framework
import
default_startup_program
,
default_main_program
from
paddle.fluid.framework
import
default_startup_program
,
default_main_program
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
ConditionalBlock
import
numpy
import
numpy
class
ConditionalBlock
(
unittest
.
TestCase
):
class
ConditionalBlock
Test
(
unittest
.
TestCase
):
def
test_forward
(
self
):
def
test_forward
(
self
):
data
=
layers
.
data
(
name
=
'X'
,
shape
=
[
1
],
dtype
=
'float32'
)
data
=
layers
.
data
(
name
=
'X'
,
shape
=
[
1
],
dtype
=
'float32'
)
data
.
stop_gradient
=
False
data
.
stop_gradient
=
False
cond
=
layers
.
ConditionalBlock
(
inputs
=
[
data
])
cond
=
ConditionalBlock
(
inputs
=
[
data
])
out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
out
=
layers
.
create_tensor
(
dtype
=
'float32'
)
with
cond
.
block
():
with
cond
.
block
():
hidden
=
layers
.
fc
(
input
=
data
,
size
=
10
)
hidden
=
layers
.
fc
(
input
=
data
,
size
=
10
)
...
...
python/paddle/fluid/tests/unittests/test_const_value.py
浏览文件 @
018e2f3a
...
@@ -16,7 +16,7 @@ import unittest
...
@@ -16,7 +16,7 @@ import unittest
import
paddle.fluid.framework
as
framework
import
paddle.fluid.framework
as
framework
class
Con
ditionalBlock
(
unittest
.
TestCase
):
class
Con
stantTest
(
unittest
.
TestCase
):
def
test_const_value
(
self
):
def
test_const_value
(
self
):
self
.
assertEqual
(
framework
.
GRAD_VAR_SUFFIX
,
"@GRAD"
)
self
.
assertEqual
(
framework
.
GRAD_VAR_SUFFIX
,
"@GRAD"
)
self
.
assertEqual
(
framework
.
TEMP_VAR_NAME
,
"@TEMP@"
)
self
.
assertEqual
(
framework
.
TEMP_VAR_NAME
,
"@TEMP@"
)
...
...
python/paddle/fluid/tests/unittests/test_dyn_rnn.py
浏览文件 @
018e2f3a
...
@@ -17,6 +17,12 @@ import paddle
...
@@ -17,6 +17,12 @@ import paddle
import
unittest
import
unittest
import
numpy
import
numpy
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.layers.control_flow
import
max_sequence_len
from
paddle.fluid.layers.control_flow
import
lod_tensor_to_array
from
paddle.fluid.layers.control_flow
import
array_to_lod_tensor
from
paddle.fluid.layers.control_flow
import
shrink_memory
class
TestDynRNN
(
unittest
.
TestCase
):
class
TestDynRNN
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -38,12 +44,11 @@ class TestDynRNN(unittest.TestCase):
...
@@ -38,12 +44,11 @@ class TestDynRNN(unittest.TestCase):
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'float32'
)
rank_table
=
fluid
.
layers
.
lod_rank_table
(
x
=
sent_emb
)
rank_table
=
lod_rank_table
(
x
=
sent_emb
)
sent_emb_array
=
fluid
.
layers
.
lod_tensor_to_array
(
sent_emb_array
=
lod_tensor_to_array
(
x
=
sent_emb
,
table
=
rank_table
)
x
=
sent_emb
,
table
=
rank_table
)
seq_len
=
fluid
.
layers
.
max_sequence_len
(
rank_table
=
rank_table
)
seq_len
=
max_sequence_len
(
rank_table
=
rank_table
)
i
=
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
)
i
=
fluid
.
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
0
)
i
.
stop_gradient
=
False
i
.
stop_gradient
=
False
...
@@ -66,7 +71,7 @@ class TestDynRNN(unittest.TestCase):
...
@@ -66,7 +71,7 @@ class TestDynRNN(unittest.TestCase):
mem
=
fluid
.
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
mem
=
fluid
.
layers
.
array_read
(
array
=
mem_array
,
i
=
i
)
ipt
=
fluid
.
layers
.
array_read
(
array
=
sent_emb_array
,
i
=
i
)
ipt
=
fluid
.
layers
.
array_read
(
array
=
sent_emb_array
,
i
=
i
)
mem
=
fluid
.
layers
.
shrink_memory
(
x
=
mem
,
i
=
i
,
table
=
rank_table
)
mem
=
shrink_memory
(
x
=
mem
,
i
=
i
,
table
=
rank_table
)
hidden
=
fluid
.
layers
.
fc
(
input
=
[
mem
,
ipt
],
size
=
100
,
act
=
'tanh'
)
hidden
=
fluid
.
layers
.
fc
(
input
=
[
mem
,
ipt
],
size
=
100
,
act
=
'tanh'
)
...
@@ -75,8 +80,7 @@ class TestDynRNN(unittest.TestCase):
...
@@ -75,8 +80,7 @@ class TestDynRNN(unittest.TestCase):
fluid
.
layers
.
array_write
(
x
=
hidden
,
i
=
i
,
array
=
mem_array
)
fluid
.
layers
.
array_write
(
x
=
hidden
,
i
=
i
,
array
=
mem_array
)
fluid
.
layers
.
less_than
(
x
=
i
,
y
=
seq_len
,
cond
=
cond
)
fluid
.
layers
.
less_than
(
x
=
i
,
y
=
seq_len
,
cond
=
cond
)
all_timesteps
=
fluid
.
layers
.
array_to_lod_tensor
(
all_timesteps
=
array_to_lod_tensor
(
x
=
out
,
table
=
rank_table
)
x
=
out
,
table
=
rank_table
)
last
=
fluid
.
layers
.
sequence_last_step
(
input
=
all_timesteps
)
last
=
fluid
.
layers
.
sequence_last_step
(
input
=
all_timesteps
)
logits
=
fluid
.
layers
.
fc
(
input
=
last
,
size
=
1
,
act
=
None
)
logits
=
fluid
.
layers
.
fc
(
input
=
last
,
size
=
1
,
act
=
None
)
loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
...
...
python/paddle/fluid/tests/unittests/test_learning_rate_scheduler.py
浏览文件 @
018e2f3a
...
@@ -91,20 +91,21 @@ class TestLearningRateDecay(unittest.TestCase):
...
@@ -91,20 +91,21 @@ class TestLearningRateDecay(unittest.TestCase):
def
check_decay_with_place
(
self
,
place
,
python_decay_fn
,
fluid_decay_fn
,
def
check_decay_with_place
(
self
,
place
,
python_decay_fn
,
fluid_decay_fn
,
kwargs
):
kwargs
):
main_prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
decayed_lr
=
fluid_decay_fn
(
**
kwargs
)
place
=
fluid
.
CPUPlace
()
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
()
)
exe
.
run
(
startup_prog
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
()
)
fluid
.
memory_optimize
(
main_prog
)
for
step
in
range
(
10
):
for
step
in
range
(
10
):
lr_val
,
=
exe
.
run
(
fluid
.
default_main_program
(),
lr_val
,
=
exe
.
run
(
main_prog
,
feed
=
{},
fetch_list
=
[
decayed_lr
])
feed
=
{},
fetch_list
=
[
decayed_lr
])
python_decayed_lr
=
python_decay_fn
(
python_decayed_lr
=
python_decay_fn
(
global_step
=
float
(
step
),
**
kwargs
)
global_step
=
float
(
step
),
**
kwargs
)
self
.
assertAlmostEqual
(
self
.
assertAlmostEqual
(
...
...
python/paddle/fluid/tests/unittests/test_lod_rank_table.py
浏览文件 @
018e2f3a
...
@@ -12,7 +12,8 @@
...
@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
paddle.fluid.layers
import
lod_rank_table
,
data
from
paddle.fluid.layers
import
data
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
import
numpy
import
numpy
...
...
python/paddle/fluid/tests/unittests/test_lod_tensor_array_ops.py
浏览文件 @
018e2f3a
...
@@ -20,6 +20,11 @@ from paddle.fluid.framework import Program, program_guard
...
@@ -20,6 +20,11 @@ from paddle.fluid.framework import Program, program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
lod_rank_table
from
paddle.fluid.layers.control_flow
import
max_sequence_len
from
paddle.fluid.layers.control_flow
import
lod_tensor_to_array
from
paddle.fluid.layers.control_flow
import
array_to_lod_tensor
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
def
place
(
self
):
def
place
(
self
):
...
@@ -137,13 +142,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
...
@@ -137,13 +142,13 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
with
program_guard
(
program
):
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
10
])
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
10
])
x
.
persistable
=
True
x
.
persistable
=
True
table
=
l
ayers
.
l
od_rank_table
(
x
,
level
=
level
)
table
=
lod_rank_table
(
x
,
level
=
level
)
max_len
=
layers
.
max_sequence_len
(
table
)
max_len
=
max_sequence_len
(
table
)
max_len
.
persistable
=
True
max_len
.
persistable
=
True
array
=
l
ayers
.
l
od_tensor_to_array
(
x
,
table
)
array
=
lod_tensor_to_array
(
x
,
table
)
array
.
persistable
=
True
array
.
persistable
=
True
result
=
layers
.
array_to_lod_tensor
(
array
,
table
)
result
=
array_to_lod_tensor
(
array
,
table
)
result
.
persistable
=
True
result
.
persistable
=
True
exe
=
Executor
(
place
)
exe
=
Executor
(
place
)
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
...
@@ -181,9 +186,9 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
...
@@ -181,9 +186,9 @@ class TestCPULoDTensorArrayOpGrad(unittest.TestCase):
with
program_guard
(
program
):
with
program_guard
(
program
):
x
=
layers
.
data
(
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
1
],
dtype
=
'float32'
,
stop_gradient
=
False
)
name
=
'x'
,
shape
=
[
1
],
dtype
=
'float32'
,
stop_gradient
=
False
)
table
=
l
ayers
.
l
od_rank_table
(
x
,
level
=
0
)
table
=
lod_rank_table
(
x
,
level
=
0
)
array
=
l
ayers
.
l
od_tensor_to_array
(
x
,
table
)
array
=
lod_tensor_to_array
(
x
,
table
)
result
=
layers
.
array_to_lod_tensor
(
array
,
table
)
result
=
array_to_lod_tensor
(
array
,
table
)
mean
=
layers
.
mean
(
result
)
mean
=
layers
.
mean
(
result
)
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py
浏览文件 @
018e2f3a
...
@@ -107,44 +107,24 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -107,44 +107,24 @@ class TestMNIST(TestParallelExecutorBase):
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
label
=
np
.
ones
(
shape
=
[
32
,
1
],
dtype
=
'int64'
)
return
img
,
label
return
img
,
label
# simple_fc
def
_compare_reduce_and_allreduce
(
self
,
model
,
use_cuda
,
random_data
=
True
):
def
check_simple_fc_convergence
(
self
,
use_cuda
,
use_reduce
=
False
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
return
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
)
self
.
check_network_convergence
(
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
)
model
,
use_cuda
=
use_cuda
,
use_reduce
=
True
)
img
,
label
=
self
.
_init_data
()
self
.
check_network_convergence
(
self
.
check_network_convergence
(
simple_fc_net
,
model
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
,
use_reduce
=
True
)
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_reduce
=
use_reduce
)
def
check_simple_fc_convergence_with_Reduce
(
self
,
use_cuda
):
img
,
label
=
self
.
_init_data
(
random_data
)
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
,
use_reduce
=
True
)
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
,
use_reduce
=
True
)
img
,
label
=
self
.
_init_data
()
all_reduce_first_loss
,
all_reduce_last_loss
=
self
.
check_network_convergence
(
all_reduce_first_loss
,
all_reduce_last_loss
=
self
.
check_network_convergence
(
simple_fc_net
,
model
,
feed_dict
=
{
"image"
:
img
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
"label"
:
label
},
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
use_reduce
=
False
)
use_reduce
=
False
)
reduce_first_loss
,
reduce_last_loss
=
self
.
check_network_convergence
(
reduce_first_loss
,
reduce_last_loss
=
self
.
check_network_convergence
(
simple_fc_net
,
model
,
feed_dict
=
{
"image"
:
img
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
"label"
:
label
},
use_cuda
=
use_cuda
,
use_cuda
=
use_cuda
,
...
@@ -153,7 +133,24 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -153,7 +133,24 @@ class TestMNIST(TestParallelExecutorBase):
for
loss
in
zip
(
all_reduce_first_loss
,
reduce_first_loss
):
for
loss
in
zip
(
all_reduce_first_loss
,
reduce_first_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
all_reduce_last_loss
,
reduce_last_loss
):
for
loss
in
zip
(
all_reduce_last_loss
,
reduce_last_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-4
)
# simple_fc
def
check_simple_fc_convergence
(
self
,
use_cuda
,
use_reduce
=
False
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
)
self
.
check_network_convergence
(
simple_fc_net
,
use_cuda
=
use_cuda
,
allow_op_delay
=
True
)
img
,
label
=
self
.
_init_data
()
self
.
check_network_convergence
(
simple_fc_net
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_reduce
=
use_reduce
)
def
test_simple_fc
(
self
):
def
test_simple_fc
(
self
):
# use_cuda
# use_cuda
...
@@ -162,8 +159,8 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -162,8 +159,8 @@ class TestMNIST(TestParallelExecutorBase):
def
test_simple_fc_with_new_strategy
(
self
):
def
test_simple_fc_with_new_strategy
(
self
):
# use_cuda, use_reduce
# use_cuda, use_reduce
self
.
check_simple_fc_convergence_with_Reduce
(
True
)
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
True
)
self
.
check_simple_fc_convergence_with_Reduce
(
False
)
self
.
_compare_reduce_and_allreduce
(
simple_fc_net
,
False
)
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
):
def
check_simple_fc_parallel_accuracy
(
self
,
use_cuda
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
...
@@ -209,39 +206,13 @@ class TestMNIST(TestParallelExecutorBase):
...
@@ -209,39 +206,13 @@ class TestMNIST(TestParallelExecutorBase):
"label"
:
label
},
"label"
:
label
},
use_cuda
=
use_cuda
)
use_cuda
=
use_cuda
)
def
check_batchnorm_fc_convergence_use_reduce
(
self
,
use_cuda
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
return
self
.
check_network_convergence
(
fc_with_batchnorm
,
use_cuda
=
use_cuda
,
use_reduce
=
True
)
img
,
label
=
self
.
_init_data
()
all_reduce_first_loss
,
all_reduce_last_loss
=
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_reduce
=
False
)
reduce_first_loss
,
reduce_last_loss
=
self
.
check_network_convergence
(
fc_with_batchnorm
,
feed_dict
=
{
"image"
:
img
,
"label"
:
label
},
use_cuda
=
use_cuda
,
use_reduce
=
True
)
for
loss
in
zip
(
all_reduce_first_loss
,
reduce_first_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-6
)
for
loss
in
zip
(
all_reduce_last_loss
,
reduce_last_loss
):
self
.
assertAlmostEquals
(
loss
[
0
],
loss
[
1
],
delta
=
1e-4
)
def
test_batchnorm_fc
(
self
):
def
test_batchnorm_fc
(
self
):
self
.
check_batchnorm_fc_convergence
(
True
)
self
.
check_batchnorm_fc_convergence
(
True
)
self
.
check_batchnorm_fc_convergence
(
False
)
self
.
check_batchnorm_fc_convergence
(
False
)
def
test_batchnorm_fc_with_new_strategy
(
self
):
def
test_batchnorm_fc_with_new_strategy
(
self
):
self
.
check_batchnorm_fc_convergence_use_reduce
(
True
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
True
)
self
.
check_batchnorm_fc_convergence_use_reduce
(
False
)
self
.
_compare_reduce_and_allreduce
(
fc_with_batchnorm
,
False
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_parallel_op.py
浏览文件 @
018e2f3a
...
@@ -120,7 +120,7 @@ class BaseParallelForTest(unittest.TestCase):
...
@@ -120,7 +120,7 @@ class BaseParallelForTest(unittest.TestCase):
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
data
=
next
(
generator
)
data
=
next
(
generator
)
if
isinstance
(
data
,
fluid
.
Variable
):
if
isinstance
(
data
,
fluid
.
framework
.
Variable
):
data
=
[
data
]
data
=
[
data
]
with
pd
.
do
():
with
pd
.
do
():
...
...
python/paddle/fluid/tests/unittests/test_reorder_lod_tensor.py
浏览文件 @
018e2f3a
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
import
unittest
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
from
paddle.fluid.layers.control_flow
import
lod_rank_table
import
numpy
import
numpy
...
@@ -34,7 +35,7 @@ class TestReorderLoDTensor(unittest.TestCase):
...
@@ -34,7 +35,7 @@ class TestReorderLoDTensor(unittest.TestCase):
dat
.
stop_gradient
=
False
dat
.
stop_gradient
=
False
rank_dat
=
fluid
.
layers
.
data
(
rank_dat
=
fluid
.
layers
.
data
(
name
=
cls
.
data_desc
[
1
][
0
],
shape
=
cls
.
data_desc
[
1
][
1
])
name
=
cls
.
data_desc
[
1
][
0
],
shape
=
cls
.
data_desc
[
1
][
1
])
table
=
fluid
.
layers
.
lod_rank_table
(
rank_dat
)
table
=
lod_rank_table
(
rank_dat
)
new_dat
=
fluid
.
layers
.
reorder_lod_tensor_by_rank
(
new_dat
=
fluid
.
layers
.
reorder_lod_tensor_by_rank
(
x
=
dat
,
rank_table
=
table
)
x
=
dat
,
rank_table
=
table
)
loss
=
fluid
.
layers
.
reduce_sum
(
new_dat
)
loss
=
fluid
.
layers
.
reduce_sum
(
new_dat
)
...
...
python/paddle/fluid/tests/unittests/test_shrink_rnn_memory.py
浏览文件 @
018e2f3a
...
@@ -21,6 +21,9 @@ from paddle.fluid.framework import default_main_program, switch_main_program
...
@@ -21,6 +21,9 @@ from paddle.fluid.framework import default_main_program, switch_main_program
from
paddle.fluid.framework
import
Program
from
paddle.fluid.framework
import
Program
import
numpy
as
np
import
numpy
as
np
from
paddle.fluid.layers.control_flow
import
shrink_memory
from
paddle.fluid.layers.control_flow
import
lod_rank_table
class
TestShrinkRNNMemoryBase
(
unittest
.
TestCase
):
class
TestShrinkRNNMemoryBase
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -30,15 +33,15 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
...
@@ -30,15 +33,15 @@ class TestShrinkRNNMemoryBase(unittest.TestCase):
x
.
stop_gradient
=
False
x
.
stop_gradient
=
False
rank_table_tensor
=
layers
.
data
(
rank_table_tensor
=
layers
.
data
(
'rank_table_tensor'
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
'rank_table_tensor'
,
shape
=
[
1
],
dtype
=
'float32'
,
lod_level
=
1
)
table
=
l
ayers
.
l
od_rank_table
(
x
=
rank_table_tensor
)
table
=
lod_rank_table
(
x
=
rank_table_tensor
)
i
=
layers
.
zeros
(
dtype
=
'int64'
,
shape
=
[
1
])
i
=
layers
.
zeros
(
dtype
=
'int64'
,
shape
=
[
1
])
self
.
mem1
=
layers
.
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
self
.
mem1
=
shrink_memory
(
x
=
x
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
i
.
stop_gradient
=
True
self
.
mem2
=
layers
.
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
self
.
mem2
=
shrink_memory
(
x
=
self
.
mem1
,
i
=
i
,
table
=
table
)
i
=
layers
.
increment
(
x
=
i
)
i
=
layers
.
increment
(
x
=
i
)
i
.
stop_gradient
=
True
i
.
stop_gradient
=
True
self
.
mem3
=
layers
.
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
self
.
mem3
=
shrink_memory
(
x
=
self
.
mem2
,
i
=
i
,
table
=
table
)
mem3_mean
=
layers
.
mean
(
self
.
mem3
)
mem3_mean
=
layers
.
mean
(
self
.
mem3
)
append_backward
(
loss
=
mem3_mean
)
append_backward
(
loss
=
mem3_mean
)
self
.
x_grad
=
self
.
main_program
.
global_block
().
var
(
'x@GRAD'
)
self
.
x_grad
=
self
.
main_program
.
global_block
().
var
(
'x@GRAD'
)
...
...
python/paddle/fluid/tests/unittests/test_split_and_merge_lod_tensor_op.py
浏览文件 @
018e2f3a
...
@@ -19,6 +19,8 @@ import paddle.fluid.layers as layers
...
@@ -19,6 +19,8 @@ import paddle.fluid.layers as layers
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.framework
import
Program
,
program_guard
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.executor
import
Executor
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.backward
import
append_backward
from
paddle.fluid.layers.control_flow
import
split_lod_tensor
from
paddle.fluid.layers.control_flow
import
merge_lod_tensor
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
class
TestCPULoDTensorArrayOps
(
unittest
.
TestCase
):
...
@@ -96,12 +98,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
...
@@ -96,12 +98,11 @@ class TestCPULoDTensorArrayOps(unittest.TestCase):
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
])
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
1
])
y
.
persistable
=
True
y
.
persistable
=
True
out_true
,
out_false
=
layers
.
split_lod_tensor
(
out_true
,
out_false
=
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
input
=
x
,
mask
=
y
,
level
=
level
)
out_true
.
persistable
=
True
out_true
.
persistable
=
True
out_false
.
persistable
=
True
out_false
.
persistable
=
True
out
=
layers
.
merge_lod_tensor
(
out
=
merge_lod_tensor
(
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
out
.
persistable
=
True
out
.
persistable
=
True
...
@@ -142,9 +143,8 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
...
@@ -142,9 +143,8 @@ class TestCPUSplitMergeLoDTensorGrad(unittest.TestCase):
level
=
0
level
=
0
out_true
,
out_false
=
layers
.
split_lod_tensor
(
out_true
,
out_false
=
split_lod_tensor
(
input
=
x
,
mask
=
y
,
level
=
level
)
input
=
x
,
mask
=
y
,
level
=
level
)
out
=
merge_lod_tensor
(
out
=
layers
.
merge_lod_tensor
(
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
in_true
=
out_true
,
in_false
=
out_false
,
mask
=
y
,
x
=
x
,
level
=
level
)
mean
=
layers
.
mean
(
out
)
mean
=
layers
.
mean
(
out
)
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
018e2f3a
...
@@ -38,7 +38,7 @@ from ps_dispatcher import RoundRobin, HashName, PSDispatcher
...
@@ -38,7 +38,7 @@ from ps_dispatcher import RoundRobin, HashName, PSDispatcher
from
..
import
core
,
framework
from
..
import
core
,
framework
from
..framework
import
Program
,
default_main_program
,
\
from
..framework
import
Program
,
default_main_program
,
\
default_startup_program
,
Block
,
\
default_startup_program
,
Block
,
\
Variable
,
Parameter
,
grad_var_name
Parameter
,
grad_var_name
from
details
import
*
from
details
import
*
LOOKUP_TABLE_TYPE
=
"lookup_table"
LOOKUP_TABLE_TYPE
=
"lookup_table"
...
@@ -918,7 +918,8 @@ class DistributeTranspiler(object):
...
@@ -918,7 +918,8 @@ class DistributeTranspiler(object):
# create table optimize block in pserver program
# create table optimize block in pserver program
table_opt_op
=
[
table_opt_op
=
[
op
for
op
in
self
.
optimize_ops
op
for
op
in
self
.
optimize_ops
if
op
.
input
(
"Param"
)[
0
]
==
self
.
table_name
if
'Param'
in
op
.
input_names
and
op
.
input
(
"Param"
)[
0
]
==
self
.
table_name
][
0
]
][
0
]
table_opt_block
=
pserver_program
.
create_block
(
pre_block_idx
)
table_opt_block
=
pserver_program
.
create_block
(
pre_block_idx
)
# only support sgd now
# only support sgd now
...
@@ -1075,7 +1076,6 @@ class DistributeTranspiler(object):
...
@@ -1075,7 +1076,6 @@ class DistributeTranspiler(object):
]
]
def
_clone_var
(
self
,
block
,
var
,
persistable
=
True
):
def
_clone_var
(
self
,
block
,
var
,
persistable
=
True
):
assert
isinstance
(
var
,
Variable
)
return
block
.
create_var
(
return
block
.
create_var
(
name
=
var
.
name
,
name
=
var
.
name
,
shape
=
var
.
shape
,
shape
=
var
.
shape
,
...
...
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
018e2f3a
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
from
collections
import
defaultdict
from
collections
import
defaultdict
from
..
import
core
from
..
import
core
from
..framework
import
Program
,
default_main_program
,
Parameter
,
Variable
from
..framework
import
Program
,
default_main_program
,
Parameter
from
..backward
import
_rename_arg_
from
..backward
import
_rename_arg_
dtype_to_size
=
{
dtype_to_size
=
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录