Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2e0b8713
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
2e0b8713
编写于
4月 04, 2019
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into imperative_dqn
上级
b2924940
b4c3a6aa
变更
75
展开全部
隐藏空白更改
内联
并排
Showing
75 changed file
with
2468 addition
and
371 deletion
+2468
-371
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
paddle/fluid/framework/details/all_reduce_deps_pass.cc
paddle/fluid/framework/details/all_reduce_deps_pass.cc
+151
-90
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+1
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+8
-8
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+1
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+26
-2
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-5
paddle/fluid/imperative/CMakeLists.txt
paddle/fluid/imperative/CMakeLists.txt
+3
-0
paddle/fluid/imperative/nccl_context.cc
paddle/fluid/imperative/nccl_context.cc
+134
-0
paddle/fluid/imperative/nccl_context.h
paddle/fluid/imperative/nccl_context.h
+81
-0
paddle/fluid/imperative/nccl_context_test.cc
paddle/fluid/imperative/nccl_context_test.cc
+52
-0
paddle/fluid/imperative/tracer.cc
paddle/fluid/imperative/tracer.cc
+2
-1
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+3
-10
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+11
-10
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+44
-8
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+7
-34
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+9
-5
paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+5
-3
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
...ce/tests/api/analyzer_int8_image_classification_tester.cc
+29
-24
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+4
-3
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
.../fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+2
-2
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+5
-3
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+4
-3
paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc
.../fluid/inference/tests/api/analyzer_transformer_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+3
-2
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+92
-55
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+1
-1
paddle/fluid/op_use_default_grad_op_maker.spec
paddle/fluid/op_use_default_grad_op_maker.spec
+0
-6
paddle/fluid/operators/batch_size_like.h
paddle/fluid/operators/batch_size_like.h
+3
-0
paddle/fluid/operators/controlflow/conditional_block_op.cc
paddle/fluid/operators/controlflow/conditional_block_op.cc
+28
-8
paddle/fluid/operators/elementwise/elementwise_div_op.cc
paddle/fluid/operators/elementwise/elementwise_div_op.cc
+38
-1
paddle/fluid/operators/elementwise/elementwise_div_op.h
paddle/fluid/operators/elementwise/elementwise_div_op.h
+4
-2
paddle/fluid/operators/elementwise/elementwise_max_op.cc
paddle/fluid/operators/elementwise/elementwise_max_op.cc
+40
-1
paddle/fluid/operators/elementwise/elementwise_max_op.h
paddle/fluid/operators/elementwise/elementwise_max_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_min_op.cc
paddle/fluid/operators/elementwise/elementwise_min_op.cc
+40
-1
paddle/fluid/operators/elementwise/elementwise_min_op.h
paddle/fluid/operators/elementwise/elementwise_min_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+5
-5
paddle/fluid/operators/fill_constant_batch_size_like_op.cc
paddle/fluid/operators/fill_constant_batch_size_like_op.cc
+3
-1
paddle/fluid/operators/fill_zeros_like_op.cc
paddle/fluid/operators/fill_zeros_like_op.cc
+45
-0
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+10
-0
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+5
-9
paddle/fluid/operators/hinge_loss_op.cc
paddle/fluid/operators/hinge_loss_op.cc
+21
-1
paddle/fluid/operators/huber_loss_op.cc
paddle/fluid/operators/huber_loss_op.cc
+23
-13
paddle/fluid/operators/load_op.cc
paddle/fluid/operators/load_op.cc
+1
-1
paddle/fluid/operators/pixel_shuffle_op.cc
paddle/fluid/operators/pixel_shuffle_op.cc
+135
-0
paddle/fluid/operators/pixel_shuffle_op.cu
paddle/fluid/operators/pixel_shuffle_op.cu
+26
-0
paddle/fluid/operators/pixel_shuffle_op.h
paddle/fluid/operators/pixel_shuffle_op.h
+82
-0
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+26
-4
paddle/fluid/operators/uniform_random_batch_size_like_op.cc
paddle/fluid/operators/uniform_random_batch_size_like_op.cc
+5
-4
paddle/fluid/pybind/CMakeLists.txt
paddle/fluid/pybind/CMakeLists.txt
+1
-1
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+42
-1
paddle/fluid/pybind/imperative.h
paddle/fluid/pybind/imperative.h
+2
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+1
-1
python/paddle/distributed/launch.py
python/paddle/distributed/launch.py
+1
-0
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+10
-3
python/paddle/fluid/dygraph/__init__.py
python/paddle/fluid/dygraph/__init__.py
+4
-0
python/paddle/fluid/dygraph/nn.py
python/paddle/fluid/dygraph/nn.py
+540
-8
python/paddle/fluid/dygraph/parallel.py
python/paddle/fluid/dygraph/parallel.py
+60
-0
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+60
-0
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+5
-3
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_conditional_block.py
.../tests/unittests/test_eager_deletion_conditional_block.py
+23
-0
python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py
...ests/test_eager_deletion_no_need_buffer_vars_inference.py
+3
-0
python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
.../paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
+50
-0
python/paddle/fluid/tests/unittests/test_imperative_basic.py
python/paddle/fluid/tests/unittests/test_imperative_basic.py
+49
-0
python/paddle/fluid/tests/unittests/test_imperative_transformer.py
...ddle/fluid/tests/unittests/test_imperative_transformer.py
+5
-2
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+309
-0
python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
+50
-0
未找到文件。
paddle/fluid/API.spec
浏览文件 @
2e0b8713
...
...
@@ -235,6 +235,7 @@ paddle.fluid.layers.huber_loss (ArgSpec(args=['input', 'label', 'delta'], vararg
paddle.fluid.layers.kldiv_loss (ArgSpec(args=['x', 'target', 'reduction', 'name'], varargs=None, keywords=None, defaults=('mean', None)), ('document', '776d536cac47c89073abc7ee524d5aec'))
paddle.fluid.layers.tree_conv (ArgSpec(args=['nodes_vector', 'edge_set', 'output_size', 'num_filters', 'max_depth', 'act', 'param_attr', 'bias_attr', 'name'], varargs=None, keywords=None, defaults=(1, 2, 'tanh', None, None, None)), ('document', '34ea12ac9f10a65dccbc50100d12e607'))
paddle.fluid.layers.npair_loss (ArgSpec(args=['anchor', 'positive', 'labels', 'l2_reg'], varargs=None, keywords=None, defaults=(0.002,)), ('document', '46994d10276dd4cb803b4062b5d14329'))
paddle.fluid.layers.pixel_shuffle (ArgSpec(args=['x', 'upscale_factor'], varargs=None, keywords=None, defaults=None), ('document', 'ad669cdf83e72a69ebc5ed79e36486de'))
paddle.fluid.layers.fsp_matrix (ArgSpec(args=['x', 'y'], varargs=None, keywords=None, defaults=None), ('document', 'b76ccca3735bea4a58a0dbf0d77c5393'))
paddle.fluid.layers.data (ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True)), ('document', '33bbd42027d872b3818b3d64ec52e139'))
paddle.fluid.layers.open_files (ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None)), ('document', 'b1ae2e1cc0750e58726374061ea90ecc'))
...
...
paddle/fluid/framework/details/all_reduce_deps_pass.cc
浏览文件 @
2e0b8713
...
...
@@ -13,125 +13,186 @@
// limitations under the License.
#include <algorithm>
#include <m
emory
>
#include <m
ap
>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/op_graph_view.h"
#include "paddle/fluid/framework/
details/var_handle
.h"
#include "paddle/fluid/framework/
ir/graph
.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_proto_maker.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
VarHandle
*
GetValidInput
(
const
OpHandleBase
*
a
)
{
for
(
auto
p
:
a
->
Inputs
())
{
VarHandle
*
b
=
dynamic_cast
<
VarHandle
*>
(
p
);
if
(
b
)
{
return
b
;
class
AllReduceDepsPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
{
std
::
vector
<
AllReduceOpHandle
*>
all_reduce_op_handles
=
GetSortedAllReduceOps
(
*
graph
);
for
(
size_t
i
=
1
;
i
<
all_reduce_op_handles
.
size
();
++
i
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
CreateControlDepVar
());
graph
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
all_reduce_op_handles
[
i
-
1
]
->
AddOutput
(
dep_var
);
all_reduce_op_handles
[
i
]
->
AddInput
(
dep_var
);
}
}
return
nullptr
;
}
void
AllReduceDepsPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
auto
graph_ops
=
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph
);
// get vars order
int
order
=
0
;
std
::
unordered_map
<
std
::
string
,
int
>
vars
;
// TODO(gongwb): use graph topology sort to find the order of operators.
// Note that must assert topology sort is stable
auto
&
ops
=
graph
->
Get
<
const
std
::
vector
<
OpDesc
*>>
(
kStaleProgramOpDescs
);
for
(
auto
*
op_desc
:
ops
)
{
try
{
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
op_desc
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
continue
;
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op_desc
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
0
);
auto
outputs
=
op_desc
->
Outputs
();
for
(
auto
&
o_it
:
outputs
)
{
for
(
auto
&
v
:
o_it
.
second
)
{
// values
vars
[
v
]
=
order
;
VLOG
(
10
)
<<
"in all_reduce_deps_pass:"
<<
v
;
}
}
order
++
;
}
catch
(
boost
::
bad_get
e
)
{
if
(
VLOG_IS_ON
(
10
))
{
DebugString
(
*
graph
,
all_reduce_op_handles
);
}
}
std
::
vector
<
OpHandleBase
*>
dist_ops
;
// get allreduce ops.
for
(
auto
&
op
:
graph_ops
)
{
// FIXME(gongwb):add broad cast.
if
(
op
->
Name
()
==
"all_reduce"
||
op
->
Name
()
==
"reduce"
)
{
dist_ops
.
push_back
(
op
);
std
::
vector
<
AllReduceOpHandle
*>
GetSortedAllReduceOps
(
const
ir
::
Graph
&
graph
)
const
{
std
::
vector
<
AllReduceOpHandle
*>
all_reduce_op_handles
;
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
pending_ops
;
std
::
unordered_set
<
OpHandleBase
*>
ready_ops
;
std
::
unordered_set
<
OpHandleBase
*>
next_ready_ops
;
auto
op_handles
=
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
graph
);
size_t
num_of_ops
=
op_handles
.
size
();
for
(
OpHandleBase
*
op
:
op_handles
)
{
size_t
not_ready_vars
=
op
->
NotReadyInputSize
();
if
(
not_ready_vars
)
{
pending_ops
.
insert
({
op
,
not_ready_vars
});
}
else
{
ready_ops
.
insert
(
op
);
}
}
}
VLOG
(
10
)
<<
"dist_ops size:"
<<
dist_ops
.
size
()
<<
", outputs size:"
<<
vars
.
size
()
<<
", ops size:"
<<
ops
.
size
();
std
::
sort
(
dist_ops
.
begin
(),
dist_ops
.
end
(),
[
&
](
OpHandleBase
*
op1
,
OpHandleBase
*
op2
)
{
VarHandle
*
i0
=
dynamic_cast
<
VarHandle
*>
(
GetValidInput
(
op1
));
VarHandle
*
i1
=
dynamic_cast
<
VarHandle
*>
(
GetValidInput
(
op2
));
PADDLE_ENFORCE
(
i0
!=
nullptr
&&
i1
!=
nullptr
,
"%s convert to %s error"
,
op1
->
DebugString
(),
op2
->
DebugString
());
auto
l_it
=
vars
.
find
(
i0
->
name
());
auto
r_it
=
vars
.
find
(
i1
->
name
());
PADDLE_ENFORCE
(
l_it
!=
vars
.
end
()
&&
r_it
!=
vars
.
end
(),
"can't find var's name %s and %s in opdesc"
,
i0
->
name
(),
i1
->
name
());
if
(
l_it
->
second
<
r_it
->
second
)
return
true
;
GetSortedAllReduceOps
(
ready_ops
,
&
all_reduce_op_handles
);
size_t
has_run_ops
=
ready_ops
.
size
();
while
(
has_run_ops
!=
num_of_ops
)
{
for
(
auto
*
op
:
ready_ops
)
{
for
(
auto
&
ready_var
:
op
->
Outputs
())
{
for
(
auto
*
pend_op
:
ready_var
->
PendingOps
())
{
auto
&
deps
=
--
pending_ops
[
pend_op
];
if
(
deps
==
0
)
{
next_ready_ops
.
insert
(
pend_op
);
}
}
}
}
if
(
l_it
->
second
==
r_it
->
second
)
{
return
i0
->
name
()
<
i1
->
name
();
PADDLE_ENFORCE_NE
(
next_ready_ops
.
size
(),
0
,
"There maybe have a cycle."
);
ready_ops
.
clear
();
std
::
swap
(
ready_ops
,
next_ready_ops
);
GetSortedAllReduceOps
(
ready_ops
,
&
all_reduce_op_handles
);
has_run_ops
+=
ready_ops
.
size
();
}
return
all_reduce_op_handles
;
}
return
false
;
});
// add dependency.
auto
&
sorted_ops
=
dist_ops
;
for
(
size_t
i
=
1
;
i
<
sorted_ops
.
size
();
++
i
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
CreateControlDepVar
());
auto
*
pre_op
=
sorted_ops
[
i
-
1
];
auto
*
op
=
sorted_ops
[
i
];
pre_op
->
AddOutput
(
dep_var
);
op
->
AddInput
(
dep_var
);
graph
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
void
GetSortedAllReduceOps
(
const
std
::
unordered_set
<
OpHandleBase
*>&
ready_ops
,
std
::
vector
<
AllReduceOpHandle
*>*
all_reduce_op_handles
)
const
{
std
::
vector
<
AllReduceOpHandle
*>
current_all_reduce_op_handles
;
for
(
auto
&
op_handle
:
ready_ops
)
{
auto
all_reduce_op_handle
=
dynamic_cast
<
AllReduceOpHandle
*>
(
op_handle
);
if
(
all_reduce_op_handle
)
{
current_all_reduce_op_handles
.
emplace_back
(
all_reduce_op_handle
);
}
}
VLOG
(
10
)
<<
"add all_reduce sequential dependencies between "
<<
pre_op
<<
" and "
<<
op
;
// NOTE(zcd): For distributed training, it is important to keep the order of
// allReduce on each node consistent. Otherwise, hang may occur.
// Sort the current_all_reduce_op_handles according to the name of input.
sort
(
current_all_reduce_op_handles
.
begin
(),
current_all_reduce_op_handles
.
end
(),
[](
const
AllReduceOpHandle
*
left
,
const
AllReduceOpHandle
*
right
)
->
bool
{
auto
left_in_vars
=
DynamicCast
<
VarHandle
>
(
left
->
Inputs
());
auto
right_in_vars
=
DynamicCast
<
VarHandle
>
(
right
->
Inputs
());
PADDLE_ENFORCE_GT
(
left_in_vars
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
left_in_vars
.
size
(),
right_in_vars
.
size
());
return
left_in_vars
[
0
]
->
Name
()
>
right_in_vars
[
0
]
->
Name
();
});
all_reduce_op_handles
->
insert
(
all_reduce_op_handles
->
end
(),
current_all_reduce_op_handles
.
begin
(),
current_all_reduce_op_handles
.
end
());
}
VLOG
(
10
)
<<
"pre_op:"
<<
pre_op
->
DebugString
()
<<
", op:"
<<
op
->
DebugString
();
void
DebugString
(
const
ir
::
Graph
&
graph
,
const
std
::
vector
<
AllReduceOpHandle
*>&
all_reduce_op_handles
)
const
{
// get vars order
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
vars
=
GetSoredGradientsFromStaleProgram
(
graph
);
std
::
stringstream
out
;
size_t
grads_of_stale_program
=
0
;
out
<<
"Get Order From kStaleProgramOpDescs: "
;
for
(
auto
&
var
:
vars
)
{
out
<<
"Order "
<<
var
.
first
<<
" ["
;
for
(
auto
&
var_name
:
var
.
second
)
{
out
<<
var_name
<<
", "
;
++
grads_of_stale_program
;
}
out
<<
"], "
;
}
VLOG
(
10
)
<<
out
.
str
();
std
::
stringstream
out2
;
out2
<<
"Get Order From Topological order: "
;
for
(
auto
&
op
:
all_reduce_op_handles
)
{
bool
find_valid_input
=
false
;
for
(
auto
&
in_var
:
op
->
Inputs
())
{
if
(
dynamic_cast
<
VarHandle
*>
(
in_var
))
{
out2
<<
in_var
->
Name
()
<<
", "
;
find_valid_input
=
true
;
break
;
}
}
PADDLE_ENFORCE
(
find_valid_input
,
"Doesn't find valid input."
);
}
VLOG
(
10
)
<<
out2
.
str
();
if
(
grads_of_stale_program
!=
all_reduce_op_handles
.
size
())
{
VLOG
(
10
)
<<
"The gradients number of stale program and graph is not equal."
;
}
}
}
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
GetSoredGradientsFromStaleProgram
(
const
ir
::
Graph
&
graph
)
const
{
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
vars
;
auto
ops
=
graph
.
Get
<
const
std
::
vector
<
OpDesc
*>>
(
kStaleProgramOpDescs
);
int
order
=
0
;
for
(
auto
*
op_desc
:
ops
)
{
try
{
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
op_desc
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
continue
;
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op_desc
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
if
(
backward_vars
.
empty
())
continue
;
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
0
);
for
(
size_t
i
=
1
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
vars
[
order
].
emplace_back
(
backward_vars
[
i
]);
VLOG
(
1
)
<<
"get parameter and gradient: "
<<
backward_vars
[
i
-
1
]
<<
", "
<<
backward_vars
[
i
];
}
order
++
;
}
catch
(
boost
::
bad_get
e
)
{
}
}
return
vars
;
}
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
2e0b8713
...
...
@@ -28,7 +28,7 @@
// asynchronous nccl allreduce or synchronous issue:
// https://github.com/PaddlePaddle/Paddle/issues/15049
DEFINE_bool
(
sync_nccl_allreduce
,
fals
e
,
sync_nccl_allreduce
,
tru
e
,
"If set true, will call `cudaStreamSynchronize(nccl_stream)`"
"after allreduce, this mode can get better performance in some scenarios."
);
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
2e0b8713
...
...
@@ -163,14 +163,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
"graph_printer"
,
new
details
::
GraphvizSSAGraphPrinter
);
}
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
if
(
VLOG_IS_ON
(
2
))
{
AppendPass
(
"all_reduce_deps_pass"
);
}
if
(
SeqOnlyAllReduceOps
(
strategy_
))
{
// experimental shows that the program will be faster if append
// all_reduce_deps_pass here.
if
(
!
strategy_
.
enable_parallel_graph_
&&
(
SeqOnlyAllReduceOps
(
strategy_
)
||
strategy
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
))
{
VLOG
(
10
)
<<
"Add all_reduce_deps_pass"
;
AppendPass
(
"all_reduce_deps_pass"
);
}
...
...
@@ -179,6 +176,9 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
VLOG
(
10
)
<<
"Add modify_op_lock_and_record_event_pass"
;
AppendPass
(
"modify_op_lock_and_record_event_pass"
);
}
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
}
// Convert graph to run on multi-devices.
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
2e0b8713
...
...
@@ -68,7 +68,7 @@ void OpHandleBase::Run(bool use_cuda) {
if
(
out_var_handle
)
{
PADDLE_ENFORCE
(
platform
::
is_same_place
(
place
,
out_var_handle
->
place
()),
"The place of
in
put(%s) is not consistent with the "
"The place of
out
put(%s) is not consistent with the "
"place of current op(%s)."
,
out_var_handle
->
Name
(),
Name
());
out_var_handle
->
SetGenerateEvent
(
events_
.
at
(
dev_id
));
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
2e0b8713
...
...
@@ -617,6 +617,25 @@ void OpDesc::Flush() {
static
std
::
once_flag
init_infer_shape_funcs
;
/**
* NOTE(paddle-dev): Very tricky code here. Maybe we should find a
* better way to register compile-time infershape method gentlely.
*
* Normally, we can register a class derived from InferShapeBase, so that
* we can set the field of `infer_shape_` inside OpInfo when registering op.
*
* However, there is another way we can set the field of `infer_shape_` inside
* OpInfo. Usually, we overload InferShape method of OperatorWithKernel. After
* running the following method InitInferShapeFuncs, `infer_shape_` would be set
* to be the InferShape method of OperatorWithKernel. That is to say, we borrow
* the run-time InferShape method of OperatorWithKernel to be the compile-time
* InferShape method.
*
* However, during compiling time, we may not know inputs, outputs and attrs of
* run-time OperatorWithKernel. So the following code creates a fake
* OperatorWithKernel object. That is why the field info_ of OperatorBase
* would be null.
*/
static
void
InitInferShapeFuncs
()
{
std
::
call_once
(
init_infer_shape_funcs
,
[]
{
auto
&
map
=
OpInfoMap
::
Instance
();
...
...
@@ -628,11 +647,16 @@ static void InitInferShapeFuncs() {
PADDLE_ENFORCE
(
it
!=
info_map
.
end
(),
"%s has not been registered"
,
op_type
);
auto
&
op_info
=
it
->
second
;
auto
op
=
static_cast
<
OperatorWithKernel
*>
(
op_info
.
Creator
()(
""
,
VariableNameMap
{},
VariableNameMap
{},
AttributeMap
{}));
if
(
op_info
.
infer_shape_
)
{
// infer_shape has been registered.
continue
;
}
auto
op
=
dynamic_cast
<
OperatorWithKernel
*>
(
op_info
.
Creator
()(
""
,
VariableNameMap
{},
VariableNameMap
{},
AttributeMap
{}));
PADDLE_ENFORCE_NOT_NULL
(
op
,
"InferShapeBase is not registered to Operator %s"
,
op_type
);
op_info
.
infer_shape_
=
[
op
](
InferShapeContext
*
ctx
)
{
op
->
InferShape
(
ctx
);
};
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
2e0b8713
...
...
@@ -19,11 +19,6 @@ limitations under the License. */
#include <tuple>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/async_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
...
...
@@ -31,6 +26,8 @@ limitations under the License. */
#include "paddle/fluid/framework/details/reference_count_pass_helper.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
...
...
paddle/fluid/imperative/CMakeLists.txt
浏览文件 @
2e0b8713
...
...
@@ -3,4 +3,7 @@ cc_library(layer SRCS layer.cc DEPS proto_desc operator device_context blas pybi
cc_library
(
tracer SRCS tracer.cc DEPS proto_desc device_context pybind
)
cc_library
(
engine SRCS engine.cc
)
cc_library
(
imperative_profiler SRCS profiler.cc
)
cc_library
(
nccl_context SRCS nccl_context.cc DEPS device_context
)
cc_test
(
nccl_context_test SRCS nccl_context_test.cc DEPS nccl_context
)
endif
()
paddle/fluid/imperative/nccl_context.cc
0 → 100644
浏览文件 @
2e0b8713
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/imperative/nccl_context.h"
namespace
paddle
{
namespace
imperative
{
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
void
NCCLParallelContext
::
RecvNCCLID
(
const
std
::
string
&
ep
,
ncclUniqueId
*
nccl_id
)
{
auto
addr
=
paddle
::
string
::
Split
(
ep
,
':'
);
PADDLE_ENFORCE_EQ
(
addr
.
size
(),
2UL
,
"The endpoint should contain host and port: %s"
,
ep
);
std
::
string
host
=
addr
[
0
];
int
port
=
std
::
stoi
(
addr
[
1
]);
int
server_fd
,
new_socket
;
struct
sockaddr_in
address
;
int
addrlen
=
sizeof
(
address
);
char
buffer
[
1024
]
=
{
0
};
int
opt
=
0
;
// creating socket fd
if
((
server_fd
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
))
==
0
)
PADDLE_THROW
(
"create server fd failed"
);
if
(
setsockopt
(
server_fd
,
SOL_SOCKET
,
SO_REUSEADDR
|
SO_REUSEPORT
,
&
opt
,
sizeof
(
opt
)))
PADDLE_THROW
(
"set socket opt failed"
);
address
.
sin_family
=
AF_INET
;
address
.
sin_addr
.
s_addr
=
INADDR_ANY
;
address
.
sin_port
=
htons
(
port
);
if
(
bind
(
server_fd
,
(
struct
sockaddr
*
)
&
address
,
sizeof
(
address
))
<
0
)
PADDLE_THROW
(
"binding failed on ep: %s"
,
ep
);
VLOG
(
3
)
<<
"listening on: "
<<
ep
;
if
(
listen
(
server_fd
,
3
)
<
0
)
PADDLE_THROW
(
"listen on server fd failed"
);
if
((
new_socket
=
accept
(
server_fd
,
reinterpret_cast
<
struct
sockaddr
*>
(
&
address
),
reinterpret_cast
<
socklen_t
*>
(
&
addrlen
)))
<
0
)
PADDLE_THROW
(
"accept the new socket fd failed"
);
if
(
read
(
new_socket
,
buffer
,
1024
)
<
0
)
PADDLE_THROW
(
"reading the ncclUniqueId from socket failed"
);
VLOG
(
3
)
<<
"recevived the ncclUniqueId"
;
memcpy
(
nccl_id
,
buffer
,
NCCL_UNIQUE_ID_BYTES
);
VLOG
(
3
)
<<
"closing the socket server: "
<<
ep
;
close
(
server_fd
);
}
void
NCCLParallelContext
::
SendNCCLID
(
const
std
::
string
&
ep
,
ncclUniqueId
*
nccl_id
)
{
auto
addr
=
paddle
::
string
::
Split
(
ep
,
':'
);
PADDLE_ENFORCE_EQ
(
addr
.
size
(),
2UL
,
"The endpoint should contain host and port: %s"
,
ep
);
std
::
string
host
=
addr
[
0
];
int
port
=
std
::
stoi
(
addr
[
1
]);
// struct sockaddr_in address;
int
sock
=
0
;
struct
sockaddr_in
serv_addr
;
char
buffer
[
1024
]
=
{
0
};
memcpy
(
buffer
,
nccl_id
,
NCCL_UNIQUE_ID_BYTES
);
if
((
sock
=
socket
(
AF_INET
,
SOCK_STREAM
,
0
))
<
0
)
PADDLE_THROW
(
"create socket failed"
);
memset
(
&
serv_addr
,
'0'
,
sizeof
(
serv_addr
));
serv_addr
.
sin_family
=
AF_INET
;
serv_addr
.
sin_port
=
htons
(
port
);
if
(
inet_pton
(
AF_INET
,
host
.
c_str
(),
&
serv_addr
.
sin_addr
)
<=
0
)
PADDLE_THROW
(
"invalied address: %s"
,
ep
);
while
(
true
)
{
if
(
connect
(
sock
,
(
struct
sockaddr
*
)
&
serv_addr
,
sizeof
(
serv_addr
))
<
0
)
{
VLOG
(
0
)
<<
"worker: "
<<
ep
<<
" is not ready, will retry after 3 seconds..."
;
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
3
));
continue
;
}
VLOG
(
3
)
<<
"sending the ncclUniqueId to "
<<
ep
;
send
(
sock
,
buffer
,
NCCL_UNIQUE_ID_BYTES
,
0
);
break
;
}
}
void
NCCLParallelContext
::
BcastNCCLId
(
ncclUniqueId
*
nccl_id
,
int
root
)
{
if
(
strategy_
.
local_rank_
==
root
)
{
for
(
auto
ep
:
strategy_
.
trainer_endpoints_
)
{
if
(
ep
!=
strategy_
.
current_endpoint_
)
SendNCCLID
(
ep
,
nccl_id
);
}
}
else
{
RecvNCCLID
(
strategy_
.
current_endpoint_
,
nccl_id
);
}
}
void
NCCLParallelContext
::
Init
()
{
ncclUniqueId
nccl_id
;
ncclComm_t
comm
;
if
(
strategy_
.
local_rank_
==
0
)
{
// generate the unique ncclid on the root worker
platform
::
dynload
::
ncclGetUniqueId
(
&
nccl_id
);
BcastNCCLId
(
&
nccl_id
,
0
);
}
else
{
BcastNCCLId
(
&
nccl_id
,
0
);
}
int
gpu_id
=
boost
::
get
<
platform
::
CUDAPlace
>
(
place_
).
device
;
VLOG
(
0
)
<<
"init nccl context nranks: "
<<
strategy_
.
nranks_
<<
" local rank: "
<<
strategy_
.
local_rank_
<<
" gpu id: "
<<
gpu_id
;
PADDLE_ENFORCE
(
cudaSetDevice
(
gpu_id
));
PADDLE_ENFORCE
(
platform
::
dynload
::
ncclCommInitRank
(
&
comm
,
strategy_
.
nranks_
,
nccl_id
,
strategy_
.
local_rank_
));
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
pool
.
Get
(
place_
));
dev_ctx
->
set_nccl_comm
(
comm
);
}
#endif
}
// namespace imperative
}
// namespace paddle
paddle/fluid/imperative/nccl_context.h
0 → 100644
浏览文件 @
2e0b8713
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
// network header files
#ifndef _WIN32
#include <arpa/inet.h>
#include <netinet/in.h>
#include <stdlib.h>
#include <sys/socket.h>
#endif
#include <string>
#include <vector>
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/nccl.h"
#endif
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/split.h"
namespace
paddle
{
namespace
imperative
{
struct
ParallelStrategy
{
int
nranks_
{
1
};
int
local_rank_
{
0
};
std
::
vector
<
std
::
string
>
trainer_endpoints_
{};
std
::
string
current_endpoint_
{
""
};
};
class
ParallelContext
{
public:
explicit
ParallelContext
(
const
ParallelStrategy
&
strategy
,
const
platform
::
Place
&
place
)
:
strategy_
(
strategy
),
place_
(
place
)
{}
virtual
~
ParallelContext
()
{}
virtual
void
Init
()
=
0
;
protected:
ParallelStrategy
strategy_
;
platform
::
Place
place_
;
};
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
class
NCCLParallelContext
:
ParallelContext
{
public:
explicit
NCCLParallelContext
(
const
ParallelStrategy
&
strategy
,
const
platform
::
Place
&
place
)
:
ParallelContext
(
strategy
,
place
)
{}
~
NCCLParallelContext
()
{}
void
BcastNCCLId
(
ncclUniqueId
*
nccl_id
,
int
root
);
void
Init
()
override
;
protected:
void
RecvNCCLID
(
const
std
::
string
&
endpoint
,
ncclUniqueId
*
nccl_id
);
void
SendNCCLID
(
const
std
::
string
&
endpoint
,
ncclUniqueId
*
nccl_id
);
};
#endif
}
// namespace imperative
}
// namespace paddle
paddle/fluid/
framework/details/all_reduce_deps_pass.h
→
paddle/fluid/
imperative/nccl_context_test.cc
浏览文件 @
2e0b8713
//
Copyright (c) 2018
PaddlePaddle Authors. All Rights Reserved.
//
Copyright (c) 2019
PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
...
...
@@ -12,21 +12,41 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/imperative/nccl_context.h"
#include "gtest/gtest.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
imperative
=
paddle
::
imperative
;
namespace
platform
=
paddle
::
platform
;
namespace
paddle
{
namespace
framework
{
namespace
details
{
imperative
::
ParallelStrategy
GetStrategy
(
int
local_rank
)
{
std
::
vector
<
std
::
string
>
eps
=
{
"127.0.0.1:9866"
,
"127.0.0.1:9867"
};
imperative
::
ParallelStrategy
strategy
;
strategy
.
trainer_endpoints_
=
eps
;
strategy
.
current_endpoint_
=
eps
[
local_rank
];
strategy
.
nranks_
=
2
;
strategy
.
local_rank_
=
local_rank
;
return
strategy
;
}
// TODO(gongwb): overlap allreduce with backward computation.
class
AllReduceDepsPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
void
BcastNCCLId
(
int
local_rank
,
ncclUniqueId
*
nccl_id
)
{
auto
strategy
=
GetStrategy
(
local_rank
);
platform
::
CUDAPlace
gpu
(
local_rank
);
imperative
::
NCCLParallelContext
ctx
(
strategy
,
gpu
);
ctx
.
BcastNCCLId
(
nccl_id
,
0
);
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
TEST
(
BcastNCCLId
,
Run
)
{
ncclUniqueId
nccl_id
;
platform
::
dynload
::
ncclGetUniqueId
(
&
nccl_id
);
std
::
thread
t
(
BcastNCCLId
,
0
,
&
nccl_id
);
ncclUniqueId
recv_nccl_id
;
BcastNCCLId
(
1
,
&
recv_nccl_id
);
t
.
join
();
EXPECT_EQ
(
0
,
std
::
memcmp
(
nccl_id
.
internal
,
recv_nccl_id
.
internal
,
NCCL_UNIQUE_ID_BYTES
));
}
#endif
paddle/fluid/imperative/tracer.cc
浏览文件 @
2e0b8713
...
...
@@ -177,7 +177,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
current_vars_map
[
out
->
Name
()]
=
out
;
}
VLOG
(
3
)
<<
"
in
put var name: "
<<
out
->
Name
()
VLOG
(
3
)
<<
"
out
put var name: "
<<
out
->
Name
()
<<
" inited: "
<<
out
->
var_
->
IsInitialized
()
<<
" stop_grad: "
<<
out
->
IsStopGradient
();
}
...
...
@@ -215,6 +215,7 @@ std::set<std::string> Tracer::Trace(OpBase* op, const VarBasePtrMap& inputs,
framework
::
Scope
scope
;
op
->
place_
=
GetExpectedPlace
(
expected_place
,
inputs
);
PreparedOp
prepared_op
=
PreparedOp
::
Prepare
(
ctx
,
*
op_kernel
,
op
->
place_
);
prepared_op
.
op
.
RuntimeInferShape
(
scope
,
op
->
place_
,
ctx
);
prepared_op
.
func
(
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
2e0b8713
...
...
@@ -142,7 +142,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
void
AnalysisConfig
::
EnableMKLDNN
()
{
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
...
...
@@ -235,16 +234,13 @@ void AnalysisConfig::Update() {
}
if
(
use_mkldnn_
)
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"EnableMKLDNN() only works when IR optimization is enabled."
;
}
else
{
pass_builder
()
->
EnableMKLDNN
();
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
use_mkldnn_
=
false
;
#endif
}
...
...
@@ -256,9 +252,6 @@ void AnalysisConfig::Update() {
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMkldnnQuantizer
();
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MkldnnQuantizer"
;
use_mkldnn_quantizer_
=
false
;
#endif
}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
2e0b8713
...
...
@@ -27,6 +27,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
...
...
@@ -266,17 +267,17 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
}
static
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms, fps: "
<<
1
/
(
latency
/
1000.
f
)
double
batch_
latency
,
int
epoch
=
1
)
{
PADDLE_ENFORCE
(
batch_size
>
0
,
"Non-positive batch size."
);
double
sample_latency
=
batch_latency
/
batch_size
;
LOG
(
INFO
)
<<
"====== threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
" ======"
;
if
(
epoch
>
1
)
{
int
samples
=
batch_size
*
epoch
;
LOG
(
INFO
)
<<
"====== sample number: "
<<
samples
<<
", average latency of each sample: "
<<
latency
/
samples
<<
"ms ======"
;
}
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", iterations: "
<<
epoch
<<
", repetitions: "
<<
repeat
<<
" ======"
;
LOG
(
INFO
)
<<
"====== batch latency: "
<<
batch_latency
<<
"ms, number of samples: "
<<
batch_size
*
epoch
<<
", sample latency: "
<<
sample_latency
<<
"ms, fps: "
<<
1000.
f
/
sample_latency
<<
" ======"
;
}
static
bool
IsFileExists
(
const
std
::
string
&
path
)
{
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
2e0b8713
...
...
@@ -64,10 +64,12 @@ void PaddlePassBuilder::DeletePass(size_t idx) {
passes_
.
erase
(
std
::
begin
(
passes_
)
+
idx
);
}
void
GpuPassStrategy
::
EnableMKLDNN
(
)
{
LOG
(
ERROR
)
<<
"GPU not support MKLDNN yet"
;
void
PaddlePassBuilder
::
AppendAnalysisPass
(
const
std
::
string
&
pass
)
{
analysis_passes_
.
push_back
(
pass
)
;
}
void
PaddlePassBuilder
::
ClearPasses
()
{
passes_
.
clear
();
}
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
...
...
@@ -102,12 +104,12 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
use_gpu_
=
true
;
}
void
GpuPassStrategy
::
EnableM
kldnnQuantizer
()
{
LOG
(
ERROR
)
<<
"GPU not support MKL
-DNN quantization
"
;
void
GpuPassStrategy
::
EnableM
KLDNN
()
{
LOG
(
ERROR
)
<<
"GPU not support MKL
DNN yet
"
;
}
void
PaddlePassBuilder
::
AppendAnalysisPass
(
const
std
::
string
&
pass
)
{
analysis_passes_
.
push_back
(
pass
)
;
void
GpuPassStrategy
::
EnableMkldnnQuantizer
(
)
{
LOG
(
ERROR
)
<<
"GPU not support MKL-DNN quantization"
;
}
CpuPassStrategy
::
CpuPassStrategy
()
:
PassStrategy
({})
{
...
...
@@ -130,10 +132,44 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"is_test_pass"
,
//
"identity_scale_op_clean_pass"
,
//
"runtime_context_cache_pass"
,
//
});
use_gpu_
=
false
;
}
void
PaddlePassBuilder
::
ClearPasses
()
{
passes_
.
clear
();
}
void
CpuPassStrategy
::
EnableMKLDNN
()
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_
)
{
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
for
(
auto
&
pass
:
std
::
vector
<
std
::
string
>
(
{
"depthwise_conv_mkldnn_pass"
,
//
"conv_bn_fuse_pass"
,
// Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass"
,
// preserve correct pass order
"conv_bias_mkldnn_fuse_pass"
,
//
"conv3d_bias_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
,
"conv_relu_mkldnn_fuse_pass"
}))
{
passes_
.
push_back
(
pass
);
}
}
use_mkldnn_
=
true
;
#else
use_mkldnn_
=
false
;
#endif
}
void
CpuPassStrategy
::
EnableMkldnnQuantizer
()
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_quantizer_
)
{
passes_
.
push_back
(
"cpu_quantize_placement_pass"
);
}
use_mkldnn_quantizer_
=
true
;
#else
use_mkldnn_quantizer_
=
false
;
#endif
}
}
// namespace paddle
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
2e0b8713
...
...
@@ -109,43 +109,16 @@ class CpuPassStrategy : public PassStrategy {
CpuPassStrategy
();
explicit
CpuPassStrategy
(
const
CpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
:
PassStrategy
(
other
.
AllPasses
())
{
use_gpu_
=
other
.
use_gpu_
;
use_mkldnn_
=
other
.
use_mkldnn_
;
use_mkldnn_quantizer_
=
other
.
use_mkldnn_quantizer_
;
}
virtual
~
CpuPassStrategy
()
=
default
;
void
EnableMKLDNN
()
override
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_
)
{
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
for
(
auto
&
pass
:
std
::
vector
<
std
::
string
>
(
{
"depthwise_conv_mkldnn_pass"
,
//
"conv_bn_fuse_pass"
,
// Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass"
,
// preserve correct pass order
"conv_bias_mkldnn_fuse_pass"
,
//
"conv3d_bias_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
}))
{
passes_
.
push_back
(
pass
);
}
}
use_mkldnn_
=
true
;
#else
use_mkldnn_
=
false
;
#endif
}
void
EnableMkldnnQuantizer
()
override
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_quantizer_
)
{
passes_
.
push_back
(
"cpu_quantize_placement_pass"
);
}
use_mkldnn_quantizer_
=
true
;
#else
use_mkldnn_quantizer_
=
false
;
#endif
}
void
EnableMKLDNN
()
override
;
void
EnableMkldnnQuantizer
()
override
;
protected:
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
2e0b8713
...
...
@@ -26,7 +26,11 @@ endfunction()
function
(
inference_analysis_api_int8_test target model_dir data_dir filename
)
inference_analysis_test
(
${
target
}
SRCS
${
filename
}
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
benchmark
ARGS --infer_model=
${
model_dir
}
/model --infer_data=
${
data_dir
}
/data.bin --batch_size=100
)
ARGS --infer_model=
${
model_dir
}
/model
--infer_data=
${
data_dir
}
/data.bin
--warmup_batch_size=100
--batch_size=50
--iterations=2
)
endfunction
()
function
(
inference_analysis_api_test_with_fake_data target install_dir filename model_name
)
...
...
@@ -146,22 +150,22 @@ inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_con
# int8 image classification tests
if
(
WITH_MKLDNN
)
set
(
INT8_DATA_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/int8"
)
set
(
INT8_DATA_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/int8
v2
"
)
if
(
NOT EXISTS
${
INT8_DATA_DIR
}
)
inference_download_and_uncompress
(
${
INT8_DATA_DIR
}
${
INFERENCE_URL
}
"/int8"
"imagenet_val_100
.tar.gz"
)
inference_download_and_uncompress
(
${
INT8_DATA_DIR
}
"
${
INFERENCE_URL
}
/int8"
"imagenet_val_100_tail
.tar.gz"
)
endif
()
#resnet50 int8
set
(
INT8_RESNET50_MODEL_DIR
"
${
INT8_DATA_DIR
}
/resnet50"
)
if
(
NOT EXISTS
${
INT8_RESNET50_MODEL_DIR
}
)
inference_download_and_uncompress
(
${
INT8_RESNET50_MODEL_DIR
}
${
INFERENCE_URL
}
"
/int8"
"resnet50_int8_model.tar.gz"
)
inference_download_and_uncompress
(
${
INT8_RESNET50_MODEL_DIR
}
"
${
INFERENCE_URL
}
/int8"
"resnet50_int8_model.tar.gz"
)
endif
()
inference_analysis_api_int8_test
(
test_analyzer_int8_resnet50
${
INT8_RESNET50_MODEL_DIR
}
${
INT8_DATA_DIR
}
analyzer_int8_image_classification_tester.cc SERIAL
)
#mobilenet int8
set
(
INT8_MOBILENET_MODEL_DIR
"
${
INT8_DATA_DIR
}
/mobilenet"
)
if
(
NOT EXISTS
${
INT8_MOBILENET_MODEL_DIR
}
)
inference_download_and_uncompress
(
${
INT8_MOBILENET_MODEL_DIR
}
${
INFERENCE_URL
}
"
/int8"
"mobilenetv1_int8_model.tar.gz"
)
inference_download_and_uncompress
(
${
INT8_MOBILENET_MODEL_DIR
}
"
${
INFERENCE_URL
}
/int8"
"mobilenetv1_int8_model.tar.gz"
)
endif
()
inference_analysis_api_int8_test
(
test_analyzer_int8_mobilenet
${
INT8_MOBILENET_MODEL_DIR
}
${
INT8_DATA_DIR
}
analyzer_int8_image_classification_tester.cc SERIAL
)
endif
()
...
...
paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -154,7 +154,7 @@ void profile(bool use_mkldnn = false) {
config
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
;
LoadInputData
(
&
inputs
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
config
),
...
...
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -197,7 +197,7 @@ void profile(bool use_mkldnn = false) {
cfg
.
SetMKLDNNOp
(
op_list
);
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -206,9 +206,11 @@ void profile(bool use_mkldnn = false) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_GT
(
output
.
size
(),
0
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
result_data
[
i
],
1e-3
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_int32
(
iterations
,
0
,
"Number of iterations"
);
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
...
...
@@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
SwitchIrOptim
();
cfg
->
SwitchSpecifyInputNames
(
false
);
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
cfg
->
EnableMKLDNN
();
cfg
->
pass_builder
()
->
SetPasses
(
{
"infer_clean_graph_pass"
,
"mkldnn_placement_pass"
,
"depthwise_conv_mkldnn_pass"
,
"conv_bn_fuse_pass"
,
"conv_eltwiseadd_bn_fuse_pass"
,
"conv_bias_mkldnn_fuse_pass"
,
"conv_elementwise_add_mkldnn_fuse_pass"
,
"conv_relu_mkldnn_fuse_pass"
,
"fc_fuse_pass"
,
"is_test_pass"
});
}
template
<
typename
T
>
...
...
@@ -40,8 +43,8 @@ class TensorReader {
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
vector
<
int
>
shape
,
std
::
string
name
)
:
file_
(
file
),
position
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
T
>
());
numel
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
std
::
multiplies
<
size_t
>
());
}
PaddleTensor
NextBatch
()
{
...
...
@@ -71,10 +74,14 @@ class TensorReader {
};
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
GetWarmupData
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
test_data
,
int
num_images
)
{
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
test_data
,
int
num_images
=
FLAGS_warmup_batch_size
)
{
int
test_data_batch_size
=
test_data
[
0
][
0
].
shape
[
0
];
CHECK_LE
(
static_cast
<
size_t
>
(
num_images
),
test_data
.
size
()
*
test_data_batch_size
);
auto
iterations_max
=
test_data
.
size
();
PADDLE_ENFORCE
(
static_cast
<
size_t
>
(
num_images
)
<=
iterations_max
*
test_data_batch_size
,
"The requested quantization warmup data size "
+
std
::
to_string
(
num_images
)
+
" is bigger than all test data size."
);
PaddleTensor
images
;
images
.
name
=
"input"
;
...
...
@@ -120,20 +127,17 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
std
::
vector
<
int
>
image_batch_shape
{
batch_size
,
3
,
224
,
224
};
std
::
vector
<
int
>
label_batch_shape
{
batch_size
,
1
};
auto
images_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
());
auto
labels_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
())
+
sizeof
(
float
)
*
total_images
*
std
::
accumulate
(
image_batch_shape
.
begin
()
+
1
,
image_batch_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
images_offset_in_file
+
sizeof
(
float
)
*
total_images
*
3
*
224
*
224
;
TensorReader
<
float
>
image_reader
(
file
,
0
,
image_batch_shape
,
"input"
);
TensorReader
<
float
>
image_reader
(
file
,
images_offset_in_file
,
image_batch_shape
,
"input"
);
TensorReader
<
int64_t
>
label_reader
(
file
,
labels_offset_in_file
,
label_batch_shape
,
"label"
);
auto
iterations
=
total_images
/
batch_size
;
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
iterations
)
iterations
=
FLAGS_iterations
;
for
(
auto
i
=
0
;
i
<
iterations
;
i
++
)
{
auto
iterations_max
=
total_images
/
batch_size
;
for
(
auto
i
=
0
;
i
<
iterations_max
;
i
++
)
{
auto
images
=
image_reader
.
NextBatch
();
auto
labels
=
label_reader
.
NextBatch
();
inputs
->
emplace_back
(
...
...
@@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) {
AnalysisConfig
q_cfg
;
SetConfig
(
&
q_cfg
);
// read data from file and prepare batches with test data
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
,
100
);
SetInput
(
&
input_slots_all
);
// prepare warmup batch from input data read earlier
// warmup batch size can be different than batch size
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
warmup_data
=
GetWarmupData
(
input_slots_all
,
100
);
GetWarmupData
(
input_slots_all
);
// configure quantizer
q_cfg
.
EnableMkldnnQuantizer
();
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupData
(
warmup_data
);
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupBatchSize
(
100
);
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupBatchSize
(
FLAGS_warmup_batch_size
);
CompareQuantizedAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
q_cfg
),
input_slots_all
);
CompareQuantizedAndAnalysis
(
&
cfg
,
&
q_cfg
,
input_slots_all
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -124,7 +124,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_LAC
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -137,11 +137,13 @@ TEST(Analyzer_LAC, profile) {
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
size_t
batch1_size
=
sizeof
(
lac_ref_data
)
/
sizeof
(
int64_t
);
PADDLE_ENFORCE_GE
(
size
,
batch1_size
);
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
output
s
[
0
].
data
.
data
());
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -96,7 +96,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
...
...
@@ -108,8 +108,9 @@ void profile(bool use_mkldnn = false) {
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
2UL
);
for
(
auto
&
output
:
outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
back
().
size
(),
2UL
);
for
(
auto
&
output
:
outputs
.
back
())
{
size_t
size
=
GetSize
(
output
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
.
data
.
data
());
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -106,7 +106,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
memory_load
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
,
memory_load
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -117,10 +117,12 @@ void profile(bool memory_load = false) {
// the first inference result
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
48
,
39
,
38
,
16
,
25
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
int64_t
*
result
=
static_cast
<
int64_t
*>
(
output
s
[
0
].
data
.
data
());
int64_t
*
result
=
static_cast
<
int64_t
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
EXPECT_EQ
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
...
...
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -127,7 +127,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_Pyramid_DNN
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -135,10 +135,12 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
&&
!
FLAGS_zero_copy
)
{
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
// output is probability, which is in (0, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
0
);
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -40,7 +40,7 @@ void profile(bool use_mkldnn = false) {
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -229,7 +229,7 @@ TEST(Analyzer_rnn1, profile) {
SetConfig
(
&
cfg
);
cfg
.
DisableGpu
();
cfg
.
SwitchIrDebug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -280,7 +280,7 @@ TEST(Analyzer_rnn1, compare_determine) {
TEST
(
Analyzer_rnn1
,
multi_thread
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -126,7 +126,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_rnn2
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -136,9 +136,11 @@ TEST(Analyzer_rnn2, profile) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_GT
(
output
.
size
(),
0
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
result_data
[
i
],
1e-3
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -110,7 +110,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_seq_conv1
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -119,10 +119,12 @@ TEST(Analyzer_seq_conv1, profile) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
// output is probability, which is in (0, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
0
);
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -156,7 +156,7 @@ void profile(bool use_mkldnn = false) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
,
use_mkldnn
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -70,7 +70,7 @@ TEST(Analyzer_Text_Classification, profile) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
cfg
.
SwitchIrDebug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -79,8 +79,9 @@ TEST(Analyzer_Text_Classification, profile) {
if
(
FLAGS_num_threads
==
1
)
{
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
outputs
.
size
();
for
(
auto
&
output
:
outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
LOG
(
INFO
)
<<
"get outputs "
<<
outputs
.
back
().
size
();
for
(
auto
&
output
:
outputs
.
back
())
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// no lod ?
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
...
...
paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -186,7 +186,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -87,7 +87,7 @@ void profile(bool use_mkldnn = false) {
cfg
.
EnableMKLDNN
();
}
// cfg.pass_builder()->TurnOnDebug();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -100,7 +100,8 @@ void profile(bool use_mkldnn = false) {
auto
refer
=
ProcessALine
(
line
);
file
.
close
();
auto
&
output
=
outputs
.
front
();
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
&
output
=
outputs
.
back
().
front
();
size_t
numel
=
output
.
data
.
length
()
/
PaddleDtypeSize
(
output
.
dtype
);
CHECK_EQ
(
numel
,
refer
.
data
.
size
());
for
(
size_t
i
=
0
;
i
<
numel
;
++
i
)
{
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
2e0b8713
...
...
@@ -41,7 +41,10 @@ DEFINE_string(model_name, "", "model name");
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_string
(
refer_result
,
""
,
"reference result for comparison"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
// setting iterations to 0 means processing the whole dataset
DEFINE_int32
(
iterations
,
0
,
"number of batches to process"
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
...
...
@@ -239,7 +242,7 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
}
input
.
shape
=
shape
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
size_t
{
1
}
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
input
.
data
.
Resize
(
len
*
sizeof
(
float
));
input
.
lod
.
assign
({{
0
,
static_cast
<
size_t
>
(
FLAGS_batch_size
)}});
...
...
@@ -286,17 +289,18 @@ void ConvertPaddleTensorToZeroCopyTensor(
void
PredictionWarmUp
(
PaddlePredictor
*
predictor
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_thread
s
,
int
tid
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
output
s
,
int
num_threads
,
int
tid
)
{
int
batch_size
=
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"Running thread "
<<
tid
<<
", warm up run..."
;
if
(
FLAGS_zero_copy
)
{
ConvertPaddleTensorToZeroCopyTensor
(
predictor
,
inputs
[
0
]);
}
outputs
->
resize
(
1
);
Timer
warmup_timer
;
warmup_timer
.
tic
();
if
(
!
FLAGS_zero_copy
)
{
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
0
],
&
(
*
outputs
)[
0
]
,
batch_size
);
}
else
{
predictor
->
ZeroCopyRun
();
}
...
...
@@ -308,11 +312,16 @@ void PredictionWarmUp(PaddlePredictor *predictor,
void
PredictionRun
(
PaddlePredictor
*
predictor
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
int
tid
)
{
int
batch_size
=
FLAGS_batch_size
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
outputs
,
int
num_threads
,
int
tid
)
{
int
num_times
=
FLAGS_repeat
;
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
" run "
<<
num_times
<<
" times..."
;
int
iterations
=
inputs
.
size
();
// process the whole dataset ...
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
inputs
.
size
())
iterations
=
FLAGS_iterations
;
// ... unless the number of iterations is set
outputs
->
resize
(
iterations
);
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
", number of threads "
<<
num_threads
<<
", run "
<<
num_times
<<
" times..."
;
Timer
run_timer
;
double
elapsed_time
=
0
;
#ifdef WITH_GPERFTOOLS
...
...
@@ -320,14 +329,14 @@ void PredictionRun(PaddlePredictor *predictor,
#endif
if
(
!
FLAGS_zero_copy
)
{
run_timer
.
tic
();
for
(
size_t
i
=
0
;
i
<
i
nputs
.
size
()
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
i
terations
;
i
++
)
{
for
(
int
j
=
0
;
j
<
num_times
;
j
++
)
{
predictor
->
Run
(
inputs
[
i
],
outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
i
],
&
(
*
outputs
)[
i
],
FLAGS_
batch_size
);
}
}
elapsed_time
=
run_timer
.
toc
();
}
else
{
for
(
size_t
i
=
0
;
i
<
i
nputs
.
size
()
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
i
terations
;
i
++
)
{
ConvertPaddleTensorToZeroCopyTensor
(
predictor
,
inputs
[
i
]);
run_timer
.
tic
();
for
(
int
j
=
0
;
j
<
num_times
;
j
++
)
{
...
...
@@ -340,13 +349,14 @@ void PredictionRun(PaddlePredictor *predictor,
ProfilerStop
();
#endif
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
elapsed_time
/
num_times
,
inputs
.
size
());
auto
batch_latency
=
elapsed_time
/
(
iterations
*
num_times
);
PrintTime
(
FLAGS_batch_size
,
num_times
,
num_threads
,
tid
,
batch_latency
,
iterations
);
if
(
FLAGS_record_benchmark
)
{
Benchmark
benchmark
;
benchmark
.
SetName
(
FLAGS_model_name
);
benchmark
.
SetBatchSize
(
batch_size
);
benchmark
.
SetLatency
(
elapsed_time
/
num_times
);
benchmark
.
SetBatchSize
(
FLAGS_
batch_size
);
benchmark
.
SetLatency
(
batch_latency
);
benchmark
.
PersistToFile
(
"benchmark_record.txt"
);
}
}
...
...
@@ -354,16 +364,17 @@ void PredictionRun(PaddlePredictor *predictor,
void
TestOneThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
*
outputs
,
bool
use_analysis
=
true
)
{
auto
predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
);
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
FLAGS_paddle_num_threads
,
0
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
FLAGS_paddle_num_threads
,
0
);
}
void
TestMultiThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
true
)
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
...
...
@@ -376,7 +387,7 @@ void TestMultiThreadPrediction(
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std
::
vector
<
PaddleTensor
>
outputs_tid
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs_tid
;
auto
&
predictor
=
predictors
[
tid
];
#ifdef PADDLE_WITH_MKLDNN
if
(
use_analysis
)
{
...
...
@@ -384,8 +395,8 @@ void TestMultiThreadPrediction(
->
SetMkldnnThreadID
(
static_cast
<
int
>
(
tid
)
+
1
);
}
#endif
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
num_threads
,
tid
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
num_threads
,
tid
);
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
&
outputs_tid
,
num_threads
,
tid
);
PredictionRun
(
predictor
.
get
(),
inputs
,
&
outputs_tid
,
num_threads
,
tid
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
...
...
@@ -395,8 +406,8 @@ void TestMultiThreadPrediction(
void
TestPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_thread
s
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
output
s
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
PrintConfig
(
config
,
use_analysis
);
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
...
...
@@ -406,30 +417,41 @@ void TestPrediction(const PaddlePredictor::Config *config,
}
}
void
CompareTopAccuracy
(
const
std
::
vector
<
PaddleTensor
>
&
output_slots1
,
const
std
::
vector
<
PaddleTensor
>
&
output_slots2
)
{
// first output: avg_cost
if
(
output_slots
1
.
size
()
==
0
||
output_slots2
.
size
()
==
0
)
void
CompareTopAccuracy
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_quant
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_ref
)
{
if
(
output_slots
_quant
.
size
()
==
0
||
output_slots_ref
.
size
()
==
0
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: output_slots vector is empty."
);
PADDLE_ENFORCE
(
output_slots1
.
size
()
>=
2UL
);
PADDLE_ENFORCE
(
output_slots2
.
size
()
>=
2UL
);
// second output: acc_top1
if
(
output_slots1
[
1
].
lod
.
size
()
>
0
||
output_slots2
[
1
].
lod
.
size
()
>
0
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output has nonempty LoD."
);
if
(
output_slots1
[
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
||
output_slots2
[
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output is of a wrong type."
);
float
*
top1_quantized
=
static_cast
<
float
*>
(
output_slots1
[
1
].
data
.
data
());
float
*
top1_reference
=
static_cast
<
float
*>
(
output_slots2
[
1
].
data
.
data
());
LOG
(
INFO
)
<<
"top1 INT8 accuracy: "
<<
*
top1_quantized
;
LOG
(
INFO
)
<<
"top1 FP32 accuracy: "
<<
*
top1_reference
;
float
total_accs1_quant
{
0
};
float
total_accs1_ref
{
0
};
for
(
size_t
i
=
0
;
i
<
output_slots_quant
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
output_slots_quant
[
i
].
size
()
>=
2UL
);
PADDLE_ENFORCE
(
output_slots_ref
[
i
].
size
()
>=
2UL
);
// second output: acc_top1
if
(
output_slots_quant
[
i
][
1
].
lod
.
size
()
>
0
||
output_slots_ref
[
i
][
1
].
lod
.
size
()
>
0
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output has nonempty LoD."
);
if
(
output_slots_quant
[
i
][
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
||
output_slots_ref
[
i
][
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output is of a wrong type."
);
total_accs1_quant
+=
*
static_cast
<
float
*>
(
output_slots_quant
[
i
][
1
].
data
.
data
());
total_accs1_ref
+=
*
static_cast
<
float
*>
(
output_slots_ref
[
i
][
1
].
data
.
data
());
}
float
avg_acc1_quant
=
total_accs1_quant
/
output_slots_quant
.
size
();
float
avg_acc1_ref
=
total_accs1_ref
/
output_slots_ref
.
size
();
LOG
(
INFO
)
<<
"Avg top1 INT8 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_quant
;
LOG
(
INFO
)
<<
"Avg top1 FP32 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_ref
;
LOG
(
INFO
)
<<
"Accepted accuracy drop threshold: "
<<
FLAGS_quantized_accuracy
;
CHECK_LE
(
std
::
abs
(
*
top1_quantized
-
*
top1_reference
),
FLAGS_quantized_accuracy
);
CHECK_LE
(
std
::
abs
(
avg_acc1_quant
-
avg_acc1_ref
),
FLAGS_quantized_accuracy
);
}
void
CompareDeterministic
(
...
...
@@ -455,20 +477,35 @@ void CompareNativeAndAnalysis(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
CompareResult
(
analysis_outputs
,
native_outputs
);
PADDLE_ENFORCE
(
native_outputs
.
size
()
>
0
,
"Native output is empty."
);
PADDLE_ENFORCE
(
analysis_outputs
.
size
()
>
0
,
"Analysis output is empty."
);
CompareResult
(
analysis_outputs
.
back
(),
native_outputs
.
back
());
}
void
CompareQuantizedAndAnalysis
(
const
PaddlePredictor
::
Config
*
config
,
const
PaddlePredictor
::
Config
*
qconfig
,
const
AnalysisConfig
*
config
,
const
AnalysisConfig
*
qconfig
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
analysis_outputs
,
quantized_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
TestOneThreadPrediction
(
qconfig
,
inputs
,
&
quantized_outputs
,
true
);
PADDLE_ENFORCE_EQ
(
inputs
[
0
][
0
].
shape
[
0
],
FLAGS_batch_size
,
"Input data has to be packed batch by batch."
);
LOG
(
INFO
)
<<
"FP32 & INT8 prediction run: batch_size "
<<
FLAGS_batch_size
<<
", warmup batch size "
<<
FLAGS_warmup_batch_size
<<
"."
;
LOG
(
INFO
)
<<
"--- FP32 prediction start ---"
;
auto
*
cfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
config
);
PrintConfig
(
cfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
);
LOG
(
INFO
)
<<
"--- INT8 prediction start ---"
;
auto
*
qcfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
qconfig
);
PrintConfig
(
qcfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
quantized_outputs
;
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
);
LOG
(
INFO
)
<<
"--- comparing outputs --- "
;
CompareTopAccuracy
(
quantized_outputs
,
analysis_outputs
);
}
...
...
@@ -578,9 +615,9 @@ static bool CompareTensorData(const framework::LoDTensor &a,
const
framework
::
LoDTensor
&
b
)
{
auto
a_shape
=
framework
::
vectorize
(
a
.
dims
());
auto
b_shape
=
framework
::
vectorize
(
b
.
dims
());
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
1
,
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
size_t
{
1
}
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
1
,
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
size_t
{
1
}
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
if
(
a_size
!=
b_size
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data size not match, %d != %d"
,
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
2e0b8713
...
...
@@ -74,7 +74,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
if
(
use_analysis
||
use_tensorrt
)
{
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
...
...
paddle/fluid/op_use_default_grad_op_maker.spec
浏览文件 @
2e0b8713
...
...
@@ -8,9 +8,6 @@ conv_shift
cos
cos_sim
dequantize
elementwise_div
elementwise_max
elementwise_min
elu
fc
flatten
...
...
@@ -28,8 +25,6 @@ gelu
gru
hard_shrink
hierarchical_sigmoid
hinge_loss
huber_loss
leaky_relu
log
logsigmoid
...
...
@@ -57,7 +52,6 @@ requantize
reshape
rnn_memory_helper
round
row_conv
sequence_softmax
sin
softplus
...
...
paddle/fluid/operators/batch_size_like.h
浏览文件 @
2e0b8713
...
...
@@ -74,5 +74,8 @@ class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker {
virtual
void
Apply
()
=
0
;
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
BatchSizeLikeNoNeedBufferVarsInference
,
"Input"
);
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/controlflow/conditional_block_op.cc
浏览文件 @
2e0b8713
...
...
@@ -12,6 +12,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
...
...
@@ -174,24 +177,41 @@ class ConditionalBlockGradOp : public ConditionalOp {
framework
::
Executor
exec
(
dev_place
);
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
"sub_block"
);
exec
.
Run
(
*
block
->
Program
(),
&
cur_scope
,
block
->
ID
(),
false
);
AssignLocalGradientToGlobal
(
dev_place
,
cur_scope
,
Inputs
(
"Input"
),
Outputs
(
framework
::
GradVarName
(
"Input"
)));
const
auto
&
ins
=
Inputs
(
"Input"
);
const
auto
&
d_ins
=
Outputs
(
framework
::
GradVarName
(
"Input"
));
const
auto
&
conds
=
Inputs
(
"Cond"
);
const
auto
&
d_conds
=
Outputs
(
framework
::
GradVarName
(
"Cond"
));
std
::
vector
<
std
::
string
>
ins_conds_grads
;
ins_conds_grads
.
reserve
(
ins
.
size
()
+
conds
.
size
());
for
(
auto
&
in
:
ins
)
{
ins_conds_grads
.
emplace_back
(
framework
::
GradVarName
(
in
));
}
for
(
auto
&
cond
:
conds
)
{
ins_conds_grads
.
emplace_back
(
framework
::
GradVarName
(
cond
));
}
exec
.
Run
(
*
block
->
Program
(),
&
cur_scope
,
block
->
ID
(),
false
,
true
,
ins_conds_grads
);
AssignLocalGradientToGlobal
(
dev_place
,
cur_scope
,
ins_conds_grads
.
data
(),
ins
.
size
(),
d_ins
);
AssignLocalGradientToGlobal
(
dev_place
,
cur_scope
,
Inputs
(
"Cond"
),
Outputs
(
framework
::
GradVarName
(
"Cond"
)));
AssignLocalGradientToGlobal
(
dev_place
,
cur_scope
,
ins_conds_grads
.
data
()
+
ins
.
size
(),
conds
.
size
(),
d_conds
);
}
}
private:
void
AssignLocalGradientToGlobal
(
const
platform
::
Place
&
place
,
const
framework
::
Scope
&
cur_scope
,
const
std
::
vector
<
std
::
string
>
&
p_names
,
const
std
::
string
*
p_grad_names
,
size_t
p_grad_names_num
,
const
std
::
vector
<
std
::
string
>
&
pg_names
)
const
{
for
(
size_t
i
=
0
;
i
<
p_
names
.
size
()
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
p_
grad_names_num
;
++
i
)
{
auto
out_grad_name
=
pg_names
[
i
];
auto
in_grad_name
=
framework
::
GradVarName
(
p_names
[
i
])
;
const
auto
&
in_grad_name
=
p_grad_names
[
i
]
;
auto
*
in_var
=
cur_scope
.
FindVar
(
in_grad_name
);
if
(
in_var
==
nullptr
)
{
continue
;
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.cc
浏览文件 @
2e0b8713
...
...
@@ -13,10 +13,47 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
paddle
{
namespace
operators
{
class
ElementwiseDivOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Div"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X / Y"
;
}
};
class
ElementwiseDivGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"elementwise_div_grad"
);
op
->
SetInput
(
"Y"
,
Input
(
"Y"
));
op
->
SetInput
(
"Out"
,
Output
(
"Out"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_OP
(
elementwise_div
,
"Div"
,
"Out = X / Y"
);
REGISTER_OPERATOR
(
elementwise_div
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseDivOpMaker
,
ops
::
ElementwiseOpInferVarType
,
ops
::
ElementwiseDivGradOpDescMaker
);
REGISTER_OPERATOR
(
elementwise_div_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OP_CPU_KERNEL
(
elementwise_div
,
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.h
浏览文件 @
2e0b8713
...
...
@@ -47,7 +47,7 @@ struct DivGradDX {
template
<
typename
T
>
struct
DivGradDY
{
HOSTDEVICE
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
-
dout
*
x
/
(
y
*
y
)
;
return
-
dout
*
out
/
y
;
}
};
...
...
@@ -58,13 +58,15 @@ class ElementwiseDivGradKernel : public ElemwiseGradKernel<T> {
ElemwiseGradKernel
<
T
>::
Compute
(
ctx
);
using
Tensor
=
framework
::
Tensor
;
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
*
x
=
dout
;
// Fake x, not used
ElemwiseGradCompute
<
DeviceContext
,
T
,
DivGradDX
<
T
>
,
DivGradDY
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
DivGradDX
<
T
>
(),
DivGradDY
<
T
>
());
}
...
...
paddle/fluid/operators/elementwise/elementwise_max_op.cc
浏览文件 @
2e0b8713
...
...
@@ -13,9 +13,48 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
paddle
{
namespace
operators
{
class
ElementwiseMaxOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Max"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = max(X, Y)"
;
}
};
class
ElementwiseMaxGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"elementwise_max_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Y"
,
Input
(
"Y"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_OP
(
elementwise_max
,
"Max"
,
"Out = max(X, Y)"
);
REGISTER_OPERATOR
(
elementwise_max
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMaxOpMaker
,
ops
::
ElementwiseOpInferVarType
,
ops
::
ElementwiseMaxGradOpDescMaker
);
REGISTER_OPERATOR
(
elementwise_max_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OP_CPU_KERNEL
(
elementwise_max
,
ops
::
ElementwiseMaxKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise/elementwise_max_op.h
浏览文件 @
2e0b8713
...
...
@@ -63,10 +63,10 @@ class ElementwiseMaxGradKernel : public ElemwiseGradKernel<T> {
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
out
=
dout
;
// Fake out, not used
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
ElemwiseGradCompute
<
DeviceContext
,
T
,
MaxGradDx
<
T
>
,
MaxGradDy
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
MaxGradDx
<
T
>
(),
MaxGradDy
<
T
>
());
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.cc
浏览文件 @
2e0b8713
...
...
@@ -13,9 +13,48 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_min_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
paddle
{
namespace
operators
{
class
ElementwiseMinOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Min"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = min(X, Y)"
;
}
};
class
ElementwiseMinGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"elementwise_min_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Y"
,
Input
(
"Y"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_OP
(
elementwise_min
,
"Min"
,
"Out = min(X, Y)"
);
REGISTER_OPERATOR
(
elementwise_min
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMinOpMaker
,
ops
::
ElementwiseOpInferVarType
,
ops
::
ElementwiseMinGradOpDescMaker
);
REGISTER_OPERATOR
(
elementwise_min_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OP_CPU_KERNEL
(
elementwise_min
,
ops
::
ElementwiseMinKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.h
浏览文件 @
2e0b8713
...
...
@@ -62,10 +62,10 @@ class ElementwiseMinGradKernel : public ElemwiseGradKernel<T> {
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
out
=
dout
;
// Fake out, not used
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
ElemwiseGradCompute
<
DeviceContext
,
T
,
MinGradDx
<
T
>
,
MinGradDy
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
MinGradDx
<
T
>
(),
MinGradDy
<
T
>
());
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
2e0b8713
...
...
@@ -173,12 +173,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
using
Tensor
=
framework
::
Tensor
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null
"
);
auto
out_grad_name
=
framework
::
GradVarName
(
"Out
"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
out_grad_name
),
"Input(Out@GRAD) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_dims
=
ctx
->
GetInputDim
(
out_grad_name
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
PADDLE_ENFORCE_GE
(
x_dims
.
size
(),
y_dims
.
size
(),
...
...
@@ -187,8 +187,8 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
ShareDim
(
"X"
,
/*->*/
x_grad_name
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
x_grad_name
);
ctx
->
ShareDim
(
out_grad_name
,
/*->*/
x_grad_name
);
ctx
->
ShareLoD
(
out_grad_name
,
/*->*/
x_grad_name
);
}
if
(
ctx
->
HasOutput
(
y_grad_name
))
{
ctx
->
ShareDim
(
"Y"
,
/*->*/
y_grad_name
);
...
...
paddle/fluid/operators/fill_constant_batch_size_like_op.cc
浏览文件 @
2e0b8713
...
...
@@ -46,6 +46,7 @@ obtained from the `input` tensor.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -53,7 +54,8 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
ops
::
FillConstantBatchSizeLikeOpMaker
);
ops
::
FillConstantBatchSizeLikeOpMaker
,
ops
::
BatchSizeLikeNoNeedBufferVarsInference
);
REGISTER_OP_CPU_KERNEL
(
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
...
...
paddle/fluid/operators/fill_zeros_like_op.cc
浏览文件 @
2e0b8713
...
...
@@ -36,6 +36,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"X"
,
"The input of fill-zeros-like op."
);
AddOutput
(
"Out"
,
"The variable will be filled up with zeros."
);
ExtraMake
();
AddComment
(
R"DOC(
FillZerosLike Operator.
...
...
@@ -44,13 +45,49 @@ The output will have the same size as the input.
)DOC"
);
}
protected:
virtual
void
ExtraMake
()
{}
};
class
FillZerosLikeOp2
:
public
FillZerosLikeOp
{
public:
using
FillZerosLikeOp
::
FillZerosLikeOp
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
GetPlace
());
}
};
class
FillZerosLikeOp2Maker
:
public
FillZerosLikeOpMaker
{
protected:
void
ExtraMake
()
override
{
this
->
AddAttr
<
int
>
(
"dtype"
,
"(int, default 5(FP32)) "
"Output data type."
)
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
FillZerosLikeOp2NoNeedBufferVarsInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
ops
::
FillZerosLikeOp
,
ops
::
FillZerosLikeOpMaker
);
REGISTER_OPERATOR
(
fill_zeros_like2
,
ops
::
FillZerosLikeOp2
,
ops
::
FillZerosLikeOp2Maker
,
ops
::
FillZerosLikeOp2NoNeedBufferVarsInference
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
...
...
@@ -58,3 +95,11 @@ REGISTER_OP_CPU_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
bool
>
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like2
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
bool
>
);
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
2e0b8713
...
...
@@ -26,3 +26,13 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
REGISTER_OP_CUDA_KERNEL
(
fill_zeros_like2
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
浏览文件 @
2e0b8713
...
...
@@ -65,17 +65,13 @@ by input arguments.
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
GaussianRandomBatchSizeLikeNoNeedBufferVarsInference
,
"Input"
);
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
gaussian_random_batch_size_like
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOp
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeNoNeedBufferVarsInference
);
REGISTER_OPERATOR
(
gaussian_random_batch_size_like
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOp
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
BatchSizeLikeNoNeedBufferVarsInference
);
// Kernels are registered in gaussian_random_op.cc and gaussian_random_op.cu
paddle/fluid/operators/hinge_loss_op.cc
浏览文件 @
2e0b8713
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/hinge_loss_op.h"
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -97,12 +100,29 @@ class HingeLossGradOp : public framework::OperatorWithKernel {
}
};
class
HingeLossGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"hinge_loss_grad"
);
op
->
SetInput
(
"Logits"
,
Input
(
"Logits"
));
op
->
SetInput
(
"Labels"
,
Input
(
"Labels"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Loss"
),
OutputGrad
(
"Loss"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Logits"
),
InputGrad
(
"Logits"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
hinge_loss
,
ops
::
HingeLossOp
,
ops
::
HingeLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
HingeLossGradOpDescMaker
);
REGISTER_OPERATOR
(
hinge_loss_grad
,
ops
::
HingeLossGradOp
);
REGISTER_OP_CPU_KERNEL
(
hinge_loss
,
...
...
paddle/fluid/operators/huber_loss_op.cc
浏览文件 @
2e0b8713
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/huber_loss_op.h"
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -90,38 +93,45 @@ class HuberLossGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Residual"
),
"Input(Residual) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
residual_dims
=
ctx
->
GetInputDim
(
"Residual"
);
auto
out_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
PADDLE_ENFORCE_EQ
(
residual_dims
,
x_dims
);
PADDLE_ENFORCE_EQ
(
out_grad_dims
,
x_dims
);
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
SetOutputDim
(
x_grad_name
,
x
_dims
);
ctx
->
SetOutputDim
(
x_grad_name
,
residual
_dims
);
}
if
(
ctx
->
HasOutput
(
y_grad_name
))
{
ctx
->
SetOutputDim
(
y_grad_name
,
y
_dims
);
ctx
->
SetOutputDim
(
y_grad_name
,
residual
_dims
);
}
}
};
class
HuberLossGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"huber_loss_grad"
);
op
->
SetInput
(
"Residual"
,
Output
(
"Residual"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
huber_loss
,
ops
::
HuberLossOp
,
ops
::
HuberLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
HuberLossGradOpDescMaker
);
REGISTER_OPERATOR
(
huber_loss_grad
,
ops
::
HuberLossGradOp
);
REGISTER_OP_CPU_KERNEL
(
huber_loss
,
ops
::
HuberLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/load_op.cc
浏览文件 @
2e0b8713
...
...
@@ -29,7 +29,7 @@ class LoadOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
OpKernelType
kt
=
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
platform
::
CPU
Place
());
framework
::
proto
::
VarType
::
FP32
,
ctx
.
Get
Place
());
return
kt
;
}
};
...
...
paddle/fluid/operators/pixel_shuffle_op.cc
0 → 100644
浏览文件 @
2e0b8713
/*Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pixel_shuffle_op.h"
#include <memory>
namespace
paddle
{
namespace
operators
{
class
PixelShuffleOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of PixelShuffleOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of PixelShuffleOp should not be null."
);
auto
input_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE
(
input_dims
.
size
()
==
4
,
"The layout of input is NCHW."
);
auto
upscale_factor
=
ctx
->
Attrs
().
Get
<
int
>
(
"upscale_factor"
);
PADDLE_ENFORCE
(
input_dims
[
1
]
%
(
upscale_factor
*
upscale_factor
)
==
0
,
"Upscale_factor should devide the number of channel"
);
auto
output_dims
=
input_dims
;
output_dims
[
0
]
=
input_dims
[
0
];
output_dims
[
1
]
=
input_dims
[
1
]
/
(
upscale_factor
*
upscale_factor
);
output_dims
[
2
]
=
input_dims
[
2
]
*
upscale_factor
;
output_dims
[
3
]
=
input_dims
[
3
]
*
upscale_factor
;
ctx
->
SetOutputDim
(
"Out"
,
output_dims
);
}
};
class
PixelShuffleOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor, default Tensor<float>), "
"the input feature data of PixelShuffleOp, the layout is [N C H W]."
);
AddOutput
(
"Out"
,
"(Tensor, default Tensor<float>), the output of "
"PixelShuffleOp. The layout is [N,C/factor^2,H*factor,W*factor]."
);
AddAttr
<
int
>
(
"upscale_factor"
,
"the factor to increase spatial resolution by."
)
.
SetDefault
(
1
)
.
AddCustomChecker
([](
const
int
&
upscale_factor
)
{
PADDLE_ENFORCE_GE
(
upscale_factor
,
1
,
"upscale_factor should be larger than 0."
);
});
AddComment
(
R"DOC(
Pixel Shuffle operator
This operator rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)`
to a tensor of shape :math:`(C, H \times r, W \times r)`.
This is useful for implementing efficient sub-pixel convolution
with a stride of :math:`1/r`.
Please refer to the paper:
`Real-Time Single Image and Video Super-Resolution Using an Efficient
Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_
by Shi et. al (2016) for more details.
)DOC"
);
}
};
class
PixelShuffleGradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
op
=
new
framework
::
OpDesc
();
op
->
SetType
(
"pixel_shuffle_grad"
);
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetAttrMap
(
Attrs
());
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
op
);
}
};
class
PixelShuffleGradOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@Grad) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)),
"Output(X@Grad) should not be null"
);
auto
do_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
PADDLE_ENFORCE
(
do_dims
.
size
()
==
4
,
"The layout of input is NCHW."
);
auto
upscale_factor
=
ctx
->
Attrs
().
Get
<
int
>
(
"upscale_factor"
);
auto
dx_dims
=
do_dims
;
dx_dims
[
0
]
=
do_dims
[
0
];
dx_dims
[
1
]
=
do_dims
[
1
]
*
(
upscale_factor
*
upscale_factor
);
dx_dims
[
2
]
=
do_dims
[
2
]
/
upscale_factor
;
dx_dims
[
3
]
=
do_dims
[
3
]
/
upscale_factor
;
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
dx_dims
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
pixel_shuffle
,
ops
::
PixelShuffleOp
,
ops
::
PixelShuffleOpMaker
,
ops
::
PixelShuffleGradMaker
);
REGISTER_OPERATOR
(
pixel_shuffle_grad
,
ops
::
PixelShuffleGradOp
);
REGISTER_OP_CPU_KERNEL
(
pixel_shuffle
,
ops
::
PixelShuffleOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PixelShuffleOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
REGISTER_OP_CPU_KERNEL
(
pixel_shuffle_grad
,
ops
::
PixelShuffleGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
PixelShuffleGradOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/pixel_shuffle_op.cu
0 → 100644
浏览文件 @
2e0b8713
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/pixel_shuffle_op.h"
namespace
ops
=
paddle
::
operators
;
namespace
plat
=
paddle
::
platform
;
REGISTER_OP_CUDA_KERNEL
(
pixel_shuffle
,
ops
::
PixelShuffleOpKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
PixelShuffleOpKernel
<
plat
::
CUDADeviceContext
,
double
>
);
REGISTER_OP_CUDA_KERNEL
(
pixel_shuffle_grad
,
ops
::
PixelShuffleGradOpKernel
<
plat
::
CUDADeviceContext
,
float
>
,
ops
::
PixelShuffleGradOpKernel
<
plat
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/pixel_shuffle_op.h
0 → 100644
浏览文件 @
2e0b8713
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
DeviceContext
,
typename
T
>
class
PixelShuffleOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
in
=
ctx
.
Input
<
framework
::
Tensor
>
(
"X"
);
auto
*
out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
factor
=
ctx
.
Attr
<
int
>
(
"upscale_factor"
);
auto
in_dims
=
in
->
dims
();
auto
o_dims
=
out
->
dims
();
framework
::
Tensor
t
;
t
.
ShareDataWith
(
*
in
);
t
.
Resize
({
in_dims
[
0
],
o_dims
[
1
],
factor
,
factor
,
in_dims
[
2
],
in_dims
[
3
]});
std
::
vector
<
int
>
axis
=
{
0
,
1
,
4
,
2
,
5
,
3
};
framework
::
Tensor
o
;
o
.
ShareDataWith
(
*
out
);
o
.
Resize
({
in_dims
[
0
],
o_dims
[
1
],
in_dims
[
2
],
factor
,
in_dims
[
3
],
factor
});
math
::
Transpose
<
DeviceContext
,
T
,
6
>
trans
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
trans
(
dev_ctx
,
t
,
&
o
,
axis
);
out
->
Resize
(
o_dims
);
}
};
template
<
typename
DeviceContext
,
typename
T
>
class
PixelShuffleGradOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
*
dout
=
ctx
.
Input
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
framework
::
Tensor
>
(
framework
::
GradVarName
(
"X"
));
dx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
factor
=
ctx
.
Attr
<
int
>
(
"upscale_factor"
);
auto
do_dims
=
dout
->
dims
();
auto
dx_dims
=
dx
->
dims
();
framework
::
Tensor
t
;
t
.
ShareDataWith
(
*
dout
);
t
.
Resize
({
do_dims
[
0
],
do_dims
[
1
],
dx_dims
[
2
],
factor
,
dx_dims
[
3
],
factor
});
std
::
vector
<
int
>
axis
=
{
0
,
1
,
3
,
5
,
2
,
4
};
framework
::
Tensor
o
;
o
.
ShareDataWith
(
*
dx
);
o
.
Resize
({
do_dims
[
0
],
do_dims
[
1
],
factor
,
factor
,
dx_dims
[
2
],
dx_dims
[
3
]});
math
::
Transpose
<
DeviceContext
,
T
,
6
>
trans
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
trans
(
dev_ctx
,
t
,
&
o
,
axis
);
dx
->
Resize
(
dx_dims
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
2e0b8713
...
...
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/row_conv_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
namespace
paddle
{
...
...
@@ -54,7 +58,6 @@ class RowConvGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Filter"
),
"Input(Filter) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
...
...
@@ -62,8 +65,8 @@ class RowConvGradOp : public framework::OperatorWithKernel {
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
ctx
->
SetOutputDim
(
x_grad_name
,
x
_dims
);
auto
dout_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
)
);
ctx
->
SetOutputDim
(
x_grad_name
,
dout
_dims
);
}
auto
filter_grad_name
=
framework
::
GradVarName
(
"Filter"
);
...
...
@@ -259,12 +262,31 @@ class RowConvGradKernel<platform::CPUDeviceContext, T>
}
}
};
class
RowConvGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"row_conv_grad"
);
op
->
SetAttrMap
(
Attrs
());
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
row_conv
,
ops
::
RowConvOp
,
ops
::
RowConvOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
RowConvGradOpDescMaker
);
REGISTER_OPERATOR
(
row_conv_grad
,
ops
::
RowConvGradOp
);
REGISTER_OP_CPU_KERNEL
(
row_conv
,
ops
::
RowConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/uniform_random_batch_size_like_op.cc
浏览文件 @
2e0b8713
...
...
@@ -64,8 +64,9 @@ with random values sampled from a uniform distribution.
}
// namespace operators
}
// namespace paddle
REGISTER_OP_WITHOUT_GRADIENT
(
uniform_random_batch_size_like
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOp
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOpMaker
);
REGISTER_OPERATOR
(
uniform_random_batch_size_like
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOp
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
BatchSizeLikeNoNeedBufferVarsInference
);
// Kernels are registered in uniform_random_op.cc and uniform_random_op.cu
paddle/fluid/pybind/CMakeLists.txt
浏览文件 @
2e0b8713
set
(
PYBIND_DEPS pybind python proto_desc memory executor async_executor fleet_wrapper prune
feed_fetch_method pass_builder parallel_executor profiler layer scope_pool
tracer analysis_predictor imperative_profiler
)
tracer analysis_predictor imperative_profiler
nccl_context
)
if
(
WITH_PYTHON
)
list
(
APPEND PYBIND_DEPS py_func_op
)
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
2e0b8713
...
...
@@ -29,7 +29,7 @@ namespace paddle {
namespace
pybind
{
// Bind Methods
void
Bind
Tracer
(
pybind11
::
module
*
m
)
{
void
Bind
Imperative
(
pybind11
::
module
*
m
)
{
pybind11
::
class_
<
imperative
::
Tracer
>
(
*
m
,
"Tracer"
,
""
)
.
def
(
"__init__"
,
[](
imperative
::
Tracer
&
self
,
framework
::
BlockDesc
*
root_block
)
{
...
...
@@ -59,6 +59,47 @@ void BindTracer(pybind11::module* m) {
})
.
def
(
"py_trace"
,
&
imperative
::
Tracer
::
PyTrace
,
pybind11
::
return_value_policy
::
take_ownership
);
// define parallel context
pybind11
::
class_
<
imperative
::
ParallelStrategy
>
parallel_strategy
(
*
m
,
"ParallelStrategy"
,
""
);
parallel_strategy
.
def
(
pybind11
::
init
())
.
def_property
(
"nranks"
,
[](
const
imperative
::
ParallelStrategy
&
self
)
{
return
self
.
nranks_
;
},
[](
imperative
::
ParallelStrategy
&
self
,
int
nranks
)
{
self
.
nranks_
=
nranks
;
})
.
def_property
(
"local_rank"
,
[](
const
imperative
::
ParallelStrategy
&
self
)
{
return
self
.
local_rank_
;
},
[](
imperative
::
ParallelStrategy
&
self
,
int
local_rank
)
{
self
.
local_rank_
=
local_rank
;
})
.
def_property
(
"trainer_endpoints"
,
[](
const
imperative
::
ParallelStrategy
&
self
)
{
return
self
.
trainer_endpoints_
;
},
[](
imperative
::
ParallelStrategy
&
self
,
std
::
vector
<
std
::
string
>
eps
)
{
self
.
trainer_endpoints_
=
eps
;
})
.
def_property
(
"current_endpoint"
,
[](
const
imperative
::
ParallelStrategy
&
self
)
{
return
self
.
current_endpoint_
;
},
[](
imperative
::
ParallelStrategy
&
self
,
const
std
::
string
&
ep
)
{
self
.
current_endpoint_
=
ep
;
});
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
pybind11
::
class_
<
imperative
::
NCCLParallelContext
>
nccl_ctx
(
*
m
,
"NCCLParallelContext"
);
nccl_ctx
.
def
(
pybind11
::
init
<
const
imperative
::
ParallelStrategy
&
,
const
platform
::
CUDAPlace
&>
())
.
def
(
"init"
,
[](
imperative
::
NCCLParallelContext
&
self
)
{
self
.
Init
();
});
#endif
}
}
// namespace pybind
...
...
paddle/fluid/pybind/imperative.h
浏览文件 @
2e0b8713
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/imperative/layer.h"
#include "paddle/fluid/imperative/nccl_context.h"
#include "pybind11/pybind11.h"
#include "pybind11/stl.h"
...
...
@@ -46,7 +47,7 @@ class PyVarBase : public imperative::VarBase {
using
imperative
::
VarBase
::
VarBase
;
// Inherit constructors
};
void
Bind
Tracer
(
pybind11
::
module
*
m
);
void
Bind
Imperative
(
pybind11
::
module
*
m
);
}
// namespace pybind
}
// namespace paddle
paddle/fluid/pybind/pybind.cc
浏览文件 @
2e0b8713
...
...
@@ -288,7 +288,7 @@ PYBIND11_MODULE(core, m) {
})
.
def_static
(
"num_funcs"
,
&
imperative
::
PyLayer
::
NumFuncs
);
Bind
Tracer
(
&
m
);
Bind
Imperative
(
&
m
);
py
::
class_
<
Tensor
>
(
m
,
"Tensor"
,
py
::
buffer_protocol
())
.
def_buffer
(
...
...
python/paddle/distributed/launch.py
浏览文件 @
2e0b8713
...
...
@@ -32,6 +32,7 @@ default_envs = {
"NCCL_SOCKET_IFNAME"
:
"eth0"
,
"NCCL_IB_GID_INDEX"
:
"3"
,
"NCCL_IB_RETRY_CNT"
:
"0"
,
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
}
GPUS
=
8
...
...
python/paddle/fluid/backward.py
浏览文件 @
2e0b8713
...
...
@@ -231,9 +231,16 @@ def _remove_no_grad_branch_(op_descs, no_grad_set):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
arg
in
op_desc
.
input_arg_names
():
if
core
.
grad_var_suffix
()
in
arg
and
arg
in
no_grad_set
:
to_insert
.
append
((
_create_op_desc_
(
"fill_zeros_like"
,
{
"X"
:
[
_strip_grad_suffix_
(
arg
)]
},
{
"Out"
:
[
arg
]},
{}),
idx
))
x_in
=
_strip_grad_suffix_
(
arg
)
x_in_var_desc
=
op_desc
.
block
().
find_var_recursive
(
cpt
.
to_bytes
(
x_in
))
assert
x_in_var_desc
is
not
None
,
"Variable {} not found"
.
format
(
x_in
)
dtype
=
x_in_var_desc
.
dtype
()
to_insert
.
append
(
(
_create_op_desc_
(
"fill_zeros_like2"
,
{
"X"
:
[
x_in
]},
{
"Out"
:
[
arg
]},
{
"dtype"
:
dtype
}),
idx
))
list
([
op_descs
.
insert
(
p
[
1
],
p
[
0
])
for
p
in
reversed
(
to_insert
)])
...
...
python/paddle/fluid/dygraph/__init__.py
浏览文件 @
2e0b8713
...
...
@@ -29,6 +29,9 @@ from .tracer import *
from
.
import
profiler
from
.profiler
import
*
from
.
import
parallel
from
.parallel
import
*
from
.
import
checkpoint
from
.checkpoint
import
*
...
...
@@ -41,5 +44,6 @@ __all__ += base.__all__
__all__
+=
nn
.
__all__
__all__
+=
tracer
.
__all__
__all__
+=
profiler
.
__all__
__all__
+=
parallel
.
__all__
__all__
+=
checkpoint
.
__all__
__all__
+=
learning_rate_scheduler
.
__all__
python/paddle/fluid/dygraph/nn.py
浏览文件 @
2e0b8713
此差异已折叠。
点击以展开。
python/paddle/fluid/dygraph/parallel.py
0 → 100644
浏览文件 @
2e0b8713
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except jin compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
from
..
import
core
__all__
=
[
"prepare_context"
]
ParallelStrategy
=
core
.
ParallelStrategy
__parallel_ctx__clz__
=
None
def
prepare_context
(
parallel_strategy
,
place
):
global
__parallel_ctx__clz__
assert
__parallel_ctx__clz__
is
None
,
"ParallelContext can only be initialized once."
if
isinstance
(
place
,
core
.
CUDAPlace
):
__parallel_ctx__clz__
=
core
.
NCCLParallelContext
(
parallel_strategy
,
place
)
else
:
# TODO(Yancey1989): add Gloo Parallel Context to support CPU parallel computation
assert
(
"Only support CUDAPlace for now."
)
__parallel_ctx__clz__
.
init
()
class
Env
(
object
):
def
__init__
(
self
):
self
.
_nranks
=
int
(
os
.
getenv
(
"PADDLE_TRAINERS_NUM"
,
"1"
))
self
.
_local_rank
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
,
"0"
))
self
.
_dev_id
=
int
(
os
.
getenv
(
"FLAGS_selected_gpus"
,
"0"
))
self
.
_trainer_endpoints
=
os
.
getenv
(
"PADDLE_TRAINER_ENDPOINTS"
,
""
).
split
(
","
)
self
.
_current_endpoint
=
os
.
getenv
(
"PADDLE_CURRENT_ENDPOINT"
,
""
)
@
property
def
nranks
(
self
):
return
self
.
_nranks
@
property
def
local_rank
(
self
):
return
self
.
_local_rank
@
property
def
dev_id
(
self
):
return
self
.
_dev_id
@
property
def
current_endpoint
(
self
):
return
self
.
_current_endpoint
python/paddle/fluid/layers/nn.py
浏览文件 @
2e0b8713
...
...
@@ -191,6 +191,7 @@ __all__ = [
'kldiv_loss'
,
'tree_conv'
,
'npair_loss'
,
'pixel_shuffle'
,
'fsp_matrix'
,
]
...
...
@@ -10961,6 +10962,65 @@ def npair_loss(anchor, positive, labels, l2_reg=0.002):
return
l2loss
+
celoss
def
pixel_shuffle
(
x
,
upscale_factor
):
"""
**Pixel Shuffle Layer**
This layer rearranges elements in a tensor of shape [N, C, H, W]
to a tensor of shape [N, C/r**2, H*r, W*r].
This is useful for implementing efficient sub-pixel convolution
with a stride of 1/r.
Please refer to the paper: `Real-Time Single Image and Video Super-Resolution
Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_ .
by Shi et. al (2016) for more details.
.. code-block:: text
Given a 4-D tensor with the shape:
x.shape = [1, 9, 4, 4]
Given upscale_factor:
upscale_factor= 3
output shape is:
[1, 1, 12, 12]
Args:
x(Variable): The input tensor variable.
upscale_factor(int): factor to increase spatial resolution
Returns:
Out(Variable): the pixel shuffle result is a tensor variable with the same shape and the same type as the input.
Raises:
ValueError: If the square of upscale_factor cannot divide the channels of input.
Examples:
.. code-block:: python
input = fluid.layers.data(shape=[9,4,4])
output = fluid.layers.pixel_shuffle(x=input, upscale_factor=3)
"""
helper
=
LayerHelper
(
"pixel_shuffle"
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
if
not
isinstance
(
upscale_factor
,
int
):
raise
TypeError
(
"upscale factor must be int type"
)
helper
.
append_op
(
type
=
"pixel_shuffle"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Out"
:
out
},
attrs
=
{
"upscale_factor"
:
upscale_factor
})
return
out
def
fsp_matrix
(
x
,
y
):
"""
...
...
python/paddle/fluid/metrics.py
浏览文件 @
2e0b8713
...
...
@@ -227,7 +227,7 @@ class Precision(MetricBase):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
metric.update(preds=preds, labels=labels)
numpy_precision = metric.eval()
"""
...
...
@@ -241,9 +241,11 @@ class Precision(MetricBase):
raise
ValueError
(
"The 'preds' must be a numpy ndarray."
)
if
not
_is_numpy_
(
labels
):
raise
ValueError
(
"The 'labels' must be a numpy ndarray."
)
sample_num
=
labels
[
0
]
sample_num
=
labels
.
shape
[
0
]
preds
=
np
.
rint
(
preds
).
astype
(
"int32"
)
for
i
in
range
(
sample_num
):
pred
=
preds
[
i
]
.
astype
(
"int32"
)
pred
=
preds
[
i
]
label
=
labels
[
i
]
if
label
==
1
:
if
pred
==
label
:
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
2e0b8713
...
...
@@ -81,6 +81,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
list
(
REMOVE_ITEM TEST_OPS test_imperative_se_resnext
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_mnist
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_layers
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
...
...
@@ -118,7 +119,7 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
py_test_modules
(
test_layers MODULES test_layers ENVS FLAGS_cudnn_deterministic=1
)
if
(
NOT WIN32
)
py_test_modules
(
test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL
)
endif
()
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_conditional_block.py
0 → 100644
浏览文件 @
2e0b8713
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle.fluid
as
fluid
import
unittest
fluid
.
core
.
_set_eager_deletion_mode
(
0.0
,
1.0
,
True
)
from
test_conditional_block
import
*
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py
浏览文件 @
2e0b8713
...
...
@@ -23,6 +23,8 @@ from test_elementwise_sub_op import *
from
test_concat_op
import
*
from
test_gather_op
import
*
from
test_gaussian_random_batch_size_like_op
import
*
from
test_uniform_random_batch_size_like_op
import
*
from
test_fill_constant_batch_size_like_op
import
*
from
test_lod_reset_op
import
*
from
test_scatter_op
import
*
from
test_mean_op
import
*
...
...
@@ -40,6 +42,7 @@ from test_sequence_unpad_op import *
from
test_sequence_scatter_op
import
*
from
test_sequence_slice_op
import
*
from
test_pad2d_op
import
*
from
test_fill_zeros_like2_op
import
*
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
0 → 100644
浏览文件 @
2e0b8713
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.framework
import
convert_np_dtype_to_dtype_
from
op_test
import
OpTest
class
TestFillZerosLike2Op
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fill_zeros_like2"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
self
.
dtype
)}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
self
.
attrs
=
{
'dtype'
:
convert_np_dtype_to_dtype_
(
self
.
dtype
)}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFillZerosLike2OpFp16
(
TestFillZerosLike2Op
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
class
TestFillZerosLike2OpFp64
(
TestFillZerosLike2Op
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float64
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_imperative_basic.py
浏览文件 @
2e0b8713
...
...
@@ -348,6 +348,55 @@ class TestImperative(unittest.TestCase):
self
.
assertEqual
(
mlp
.
_fc2
,
sublayers
[
1
])
self
.
assertEqual
(
len
(
sublayers
),
2
)
def
test_dygraph_vs_static
(
self
):
inp1
=
np
.
random
.
rand
(
4
,
3
,
3
)
inp2
=
np
.
random
.
rand
(
4
,
3
,
3
)
# dynamic graph
with
fluid
.
dygraph
.
guard
():
if
np
.
sum
(
inp1
)
<
np
.
sum
(
inp2
):
x
=
fluid
.
layers
.
elementwise_add
(
inp1
,
inp2
)
else
:
x
=
fluid
.
layers
.
elementwise_sub
(
inp1
,
inp2
)
dygraph_result
=
x
.
_numpy
()
# static graph
with
new_program_scope
():
inp_data1
=
fluid
.
layers
.
data
(
name
=
'inp1'
,
shape
=
[
3
,
3
],
dtype
=
np
.
float32
)
inp_data2
=
fluid
.
layers
.
data
(
name
=
'inp2'
,
shape
=
[
3
,
3
],
dtype
=
np
.
float32
)
a
=
fluid
.
layers
.
expand
(
fluid
.
layers
.
reshape
(
fluid
.
layers
.
reduce_sum
(
inp_data1
),
[
1
,
1
]),
[
4
,
1
])
b
=
fluid
.
layers
.
expand
(
fluid
.
layers
.
reshape
(
fluid
.
layers
.
reduce_sum
(
inp_data2
),
[
1
,
1
]),
[
4
,
1
])
cond
=
fluid
.
layers
.
less_than
(
x
=
a
,
y
=
b
)
ie
=
fluid
.
layers
.
IfElse
(
cond
)
with
ie
.
true_block
():
d1
=
ie
.
input
(
inp_data1
)
d2
=
ie
.
input
(
inp_data2
)
d3
=
fluid
.
layers
.
elementwise_add
(
d1
,
d2
)
ie
.
output
(
d3
)
with
ie
.
false_block
():
d1
=
ie
.
input
(
inp_data1
)
d2
=
ie
.
input
(
inp_data2
)
d3
=
fluid
.
layers
.
elementwise_sub
(
d1
,
d2
)
ie
.
output
(
d3
)
out
=
ie
()
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
static_result
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'inp1'
:
inp1
,
'inp2'
:
inp2
},
fetch_list
=
out
)[
0
]
self
.
assertTrue
(
np
.
allclose
(
dygraph_result
,
static_result
))
def
test_rnn
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
[
10.0
,
11.0
,
12.0
]])
...
...
python/paddle/fluid/tests/unittests/test_imperative_transformer.py
浏览文件 @
2e0b8713
...
...
@@ -302,8 +302,11 @@ use_py_reader = False
# if we run sync mode
sync
=
False
# how many batches we use
batch_num
=
50
if
not
core
.
is_compiled_with_cuda
():
# how many batches we use
batch_num
=
50
else
:
batch_num
=
5
np
.
random
.
seed
=
1
src_word_np
=
np
.
random
.
randint
(
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
2e0b8713
...
...
@@ -600,6 +600,280 @@ class TestLayer(LayerTest):
self
.
assertTrue
(
np
.
allclose
(
static_rlt2
,
static_rlt
))
self
.
assertTrue
(
np
.
allclose
(
nce_loss3
.
_numpy
(),
static_rlt
))
def
test_conv3d
(
self
):
with
self
.
static_graph
():
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
6
,
6
,
6
],
dtype
=
'float32'
)
ret
=
layers
.
conv3d
(
input
=
images
,
num_filters
=
3
,
filter_size
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
np
.
ones
(
[
2
,
3
,
6
,
6
,
6
],
dtype
=
'float32'
)},
fetch_list
=
[
ret
])[
0
]
with
self
.
static_graph
():
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
6
,
6
,
6
],
dtype
=
'float32'
)
conv3d
=
nn
.
Conv3D
(
'conv3d'
,
num_filters
=
3
,
filter_size
=
2
)
ret
=
conv3d
(
images
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
np
.
ones
(
[
2
,
3
,
6
,
6
,
6
],
dtype
=
'float32'
)},
fetch_list
=
[
ret
])[
0
]
with
self
.
dynamic_graph
():
images
=
np
.
ones
([
2
,
3
,
6
,
6
,
6
],
dtype
=
'float32'
)
conv3d
=
nn
.
Conv3D
(
'conv3d'
,
num_filters
=
3
,
filter_size
=
2
)
dy_ret
=
conv3d
(
base
.
to_variable
(
images
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_row_conv
(
self
):
input
=
np
.
arange
(
15
).
reshape
([
3
,
5
]).
astype
(
'float32'
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
with
self
.
static_graph
():
x
=
layers
.
data
(
name
=
'X'
,
shape
=
[
3
,
5
],
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
row_conv
(
input
=
x
,
future_context_size
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
static_graph
():
x
=
layers
.
data
(
name
=
'X'
,
shape
=
[
3
,
5
],
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
rowConv
=
nn
.
RowConv
(
'RowConv'
,
future_context_size
=
2
)
ret
=
rowConv
(
x
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
# TODO: dygraph can't support LODTensor
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_group_norm
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
shape
=
(
2
,
4
,
3
,
3
)
input
=
np
.
random
.
random
(
shape
).
astype
(
'float32'
)
with
self
.
static_graph
():
X
=
fluid
.
layers
.
data
(
name
=
'X'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
group_norm
(
input
=
X
,
groups
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
static_graph
():
X
=
fluid
.
layers
.
data
(
name
=
'X'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
groupNorm
=
nn
.
GroupNorm
(
'GroupNorm'
,
groups
=
2
)
ret
=
groupNorm
(
X
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
dynamic_graph
():
groupNorm
=
nn
.
GroupNorm
(
'GroupNorm'
,
groups
=
2
)
dy_ret
=
groupNorm
(
base
.
to_variable
(
input
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_spectral_norm
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
shape
=
(
2
,
4
,
3
,
3
)
input
=
np
.
random
.
random
(
shape
).
astype
(
'float32'
)
with
self
.
static_graph
():
Weight
=
fluid
.
layers
.
data
(
name
=
'Weight'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
spectral_norm
(
weight
=
Weight
,
dim
=
1
,
power_iters
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'Weight'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
),
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
static_graph
():
Weight
=
fluid
.
layers
.
data
(
name
=
'Weight'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
spectralNorm
=
nn
.
SpectralNorm
(
'SpectralNorm'
,
dim
=
1
,
power_iters
=
2
)
ret
=
spectralNorm
(
Weight
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'Weight'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
dynamic_graph
():
spectralNorm
=
nn
.
SpectralNorm
(
'SpectralNorm'
,
dim
=
1
,
power_iters
=
2
)
dy_ret
=
spectralNorm
(
base
.
to_variable
(
input
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_tree_conv
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
adj_array
=
[
1
,
2
,
1
,
3
,
1
,
4
,
1
,
5
,
2
,
6
,
2
,
7
,
2
,
8
,
4
,
9
,
4
,
10
]
adj
=
np
.
array
(
adj_array
).
reshape
((
1
,
9
,
2
)).
astype
(
'int32'
)
adj
=
np
.
tile
(
adj
,
(
1
,
1
,
1
))
vectors
=
np
.
random
.
random
((
1
,
10
,
5
)).
astype
(
'float32'
)
with
self
.
static_graph
():
NodesVector
=
fluid
.
layers
.
data
(
name
=
'NodesVector'
,
shape
=
(
1
,
10
,
5
),
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
EdgeSet
=
fluid
.
layers
.
data
(
name
=
'EdgeSet'
,
shape
=
(
1
,
9
,
2
),
dtype
=
'int32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
tree_conv
(
nodes_vector
=
NodesVector
,
edge_set
=
EdgeSet
,
output_size
=
6
,
num_filters
=
1
,
max_depth
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'NodesVector'
:
fluid
.
create_lod_tensor
(
data
=
vectors
,
recursive_seq_lens
=
[[
1
]],
place
=
place
),
'EdgeSet'
:
fluid
.
create_lod_tensor
(
data
=
adj
,
recursive_seq_lens
=
[[
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
False
)[
0
]
with
self
.
static_graph
():
NodesVector
=
fluid
.
layers
.
data
(
name
=
'NodesVector'
,
shape
=
(
1
,
10
,
5
),
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
EdgeSet
=
fluid
.
layers
.
data
(
name
=
'EdgeSet'
,
shape
=
(
1
,
9
,
2
),
dtype
=
'int32'
,
lod_level
=
1
,
append_batch_size
=
False
)
treeConv
=
nn
.
TreeConv
(
'TreeConv'
,
output_size
=
6
,
num_filters
=
1
,
max_depth
=
2
)
ret
=
treeConv
(
NodesVector
,
EdgeSet
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'NodesVector'
:
fluid
.
create_lod_tensor
(
data
=
vectors
,
recursive_seq_lens
=
[[
1
]],
place
=
place
),
'EdgeSet'
:
fluid
.
create_lod_tensor
(
data
=
adj
,
recursive_seq_lens
=
[[
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
False
)[
0
]
with
self
.
dynamic_graph
():
treeConv
=
nn
.
TreeConv
(
'SpectralNorm'
,
output_size
=
6
,
num_filters
=
1
,
max_depth
=
2
)
dy_ret
=
treeConv
(
base
.
to_variable
(
vectors
),
base
.
to_variable
(
adj
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
def
test_conv3d_transpose
(
self
):
input_array
=
np
.
arange
(
0
,
48
).
reshape
(
[
2
,
3
,
2
,
2
,
2
]).
astype
(
'float32'
)
with
self
.
static_graph
():
img
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
2
,
2
,
2
],
dtype
=
'float32'
)
out
=
layers
.
conv3d_transpose
(
input
=
img
,
num_filters
=
12
,
filter_size
=
12
,
use_cudnn
=
False
)
static_rlt
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
input_array
},
fetch_list
=
[
out
])[
0
]
with
self
.
static_graph
():
img
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
2
,
2
,
2
],
dtype
=
'float32'
)
conv3d_transpose
=
nn
.
Conv3DTranspose
(
'Conv3DTranspose'
,
num_filters
=
12
,
filter_size
=
12
,
use_cudnn
=
False
)
out
=
conv3d_transpose
(
img
)
static_rlt2
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
input_array
},
fetch_list
=
[
out
])[
0
]
with
self
.
dynamic_graph
():
conv3d_transpose
=
nn
.
Conv3DTranspose
(
'Conv3DTranspose'
,
num_filters
=
12
,
filter_size
=
12
,
use_cudnn
=
False
)
dy_rlt
=
conv3d_transpose
(
base
.
to_variable
(
input_array
))
self
.
assertTrue
(
np
.
allclose
(
static_rlt2
,
static_rlt
))
self
.
assertTrue
(
np
.
allclose
(
dy_rlt
.
_numpy
(),
static_rlt
))
class
TestBook
(
LayerTest
):
def
test_all_layers
(
self
):
...
...
@@ -1634,6 +1908,41 @@ class TestBook(LayerTest):
out
=
layers
.
flatten
(
x
,
axis
=
1
,
name
=
"flatten"
)
return
(
out
)
def
test_kldiv_loss
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
32
,
128
,
128
],
dtype
=
"float32"
)
target
=
layers
.
data
(
name
=
'target'
,
shape
=
[
32
,
128
,
128
],
dtype
=
"float32"
)
loss
=
layers
.
kldiv_loss
(
x
=
x
,
target
=
target
,
reduction
=
'batchmean'
)
self
.
assertIsNotNone
(
loss
)
print
(
str
(
program
))
def
test_temporal_shift
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"X"
,
shape
=
[
16
,
4
,
4
],
dtype
=
"float32"
)
out
=
layers
.
temporal_shift
(
x
,
seg_num
=
4
,
shift_ratio
=
0.2
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_shuffle_channel
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"X"
,
shape
=
[
16
,
4
,
4
],
dtype
=
"float32"
)
out
=
layers
.
shuffle_channel
(
x
,
group
=
4
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
def
test_pixel_shuffle
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
"X"
,
shape
=
[
9
,
4
,
4
],
dtype
=
"float32"
)
out
=
layers
.
pixel_shuffle
(
x
,
upscale_factor
=
3
)
self
.
assertIsNotNone
(
out
)
print
(
str
(
program
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
0 → 100644
浏览文件 @
2e0b8713
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestPixelShuffle
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"pixel_shuffle"
n
,
c
,
h
,
w
=
2
,
9
,
4
,
4
up_factor
=
3
shape
=
[
n
,
c
,
h
,
w
]
x
=
np
.
random
.
random
(
shape
).
astype
(
"float32"
)
new_shape
=
(
n
,
c
//
(
up_factor
*
up_factor
),
up_factor
,
up_factor
,
h
,
w
)
# reshape to (num,output_channel,upscale_factor,upscale_factor,h,w)
npresult
=
np
.
reshape
(
x
,
new_shape
)
# transpose to (num,output_channel,h,upscale_factor,w,upscale_factor)
npresult
=
npresult
.
transpose
(
0
,
1
,
4
,
2
,
5
,
3
)
oshape
=
[
n
,
c
//
(
up_factor
*
up_factor
),
h
*
up_factor
,
w
*
up_factor
]
npresult
=
np
.
reshape
(
npresult
,
oshape
)
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
npresult
}
self
.
attrs
=
{
'upscale_factor'
:
up_factor
}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录