Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
218e02ac
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
218e02ac
编写于
4月 03, 2019
作者:
L
lujun
提交者:
GitHub
4月 03, 2019
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #19 from PaddlePaddle/develop
merge to local
上级
67cddc25
cf307d0d
变更
55
展开全部
隐藏空白更改
内联
并排
Showing
55 changed file
with
1649 addition
and
397 deletion
+1649
-397
paddle/fluid/framework/details/all_reduce_deps_pass.cc
paddle/fluid/framework/details/all_reduce_deps_pass.cc
+151
-90
paddle/fluid/framework/details/all_reduce_deps_pass.h
paddle/fluid/framework/details/all_reduce_deps_pass.h
+0
-32
paddle/fluid/framework/details/all_reduce_op_handle.cc
paddle/fluid/framework/details/all_reduce_op_handle.cc
+1
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+8
-8
paddle/fluid/framework/details/op_handle_base.cc
paddle/fluid/framework/details/op_handle_base.cc
+1
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+26
-2
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-5
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+3
-10
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+11
-10
paddle/fluid/inference/api/paddle_pass_builder.cc
paddle/fluid/inference/api/paddle_pass_builder.cc
+44
-8
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+7
-34
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+9
-5
paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+5
-3
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
...ce/tests/api/analyzer_int8_image_classification_tester.cc
+29
-24
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+4
-3
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
.../fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+2
-2
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
+5
-3
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+6
-4
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
...nference/tests/api/analyzer_text_classification_tester.cc
+4
-3
paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc
.../fluid/inference/tests/api/analyzer_transformer_tester.cc
+1
-1
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+3
-2
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+92
-55
paddle/fluid/inference/tests/api/trt_models_tester.cc
paddle/fluid/inference/tests/api/trt_models_tester.cc
+1
-1
paddle/fluid/op_use_default_grad_op_maker.spec
paddle/fluid/op_use_default_grad_op_maker.spec
+0
-6
paddle/fluid/operators/batch_size_like.h
paddle/fluid/operators/batch_size_like.h
+3
-0
paddle/fluid/operators/elementwise/elementwise_div_op.cc
paddle/fluid/operators/elementwise/elementwise_div_op.cc
+38
-1
paddle/fluid/operators/elementwise/elementwise_div_op.h
paddle/fluid/operators/elementwise/elementwise_div_op.h
+4
-2
paddle/fluid/operators/elementwise/elementwise_max_op.cc
paddle/fluid/operators/elementwise/elementwise_max_op.cc
+40
-1
paddle/fluid/operators/elementwise/elementwise_max_op.h
paddle/fluid/operators/elementwise/elementwise_max_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_min_op.cc
paddle/fluid/operators/elementwise/elementwise_min_op.cc
+40
-1
paddle/fluid/operators/elementwise/elementwise_min_op.h
paddle/fluid/operators/elementwise/elementwise_min_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_op.h
paddle/fluid/operators/elementwise/elementwise_op.h
+5
-5
paddle/fluid/operators/fill_constant_batch_size_like_op.cc
paddle/fluid/operators/fill_constant_batch_size_like_op.cc
+3
-1
paddle/fluid/operators/fill_zeros_like_op.cc
paddle/fluid/operators/fill_zeros_like_op.cc
+45
-0
paddle/fluid/operators/fill_zeros_like_op.cu.cc
paddle/fluid/operators/fill_zeros_like_op.cu.cc
+10
-0
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
+5
-9
paddle/fluid/operators/hinge_loss_op.cc
paddle/fluid/operators/hinge_loss_op.cc
+21
-1
paddle/fluid/operators/huber_loss_op.cc
paddle/fluid/operators/huber_loss_op.cc
+23
-13
paddle/fluid/operators/row_conv_op.cc
paddle/fluid/operators/row_conv_op.cc
+26
-4
paddle/fluid/operators/uniform_random_batch_size_like_op.cc
paddle/fluid/operators/uniform_random_batch_size_like_op.cc
+5
-4
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+10
-3
python/paddle/fluid/dygraph/nn.py
python/paddle/fluid/dygraph/nn.py
+540
-8
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+5
-3
python/paddle/fluid/tests/unittests/CMakeLists.txt
python/paddle/fluid/tests/unittests/CMakeLists.txt
+2
-1
python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py
...ests/test_eager_deletion_no_need_buffer_vars_inference.py
+3
-0
python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
.../paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
+50
-0
python/paddle/fluid/tests/unittests/test_imperative_basic.py
python/paddle/fluid/tests/unittests/test_imperative_basic.py
+49
-0
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
...n/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
+10
-10
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+274
-0
未找到文件。
paddle/fluid/framework/details/all_reduce_deps_pass.cc
浏览文件 @
218e02ac
...
...
@@ -13,125 +13,186 @@
// limitations under the License.
#include <algorithm>
#include <m
emory
>
#include <m
ap
>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/all_reduce_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/details/op_graph_view.h"
#include "paddle/fluid/framework/
details/var_handle
.h"
#include "paddle/fluid/framework/
ir/graph
.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/framework/op_proto_maker.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
VarHandle
*
GetValidInput
(
const
OpHandleBase
*
a
)
{
for
(
auto
p
:
a
->
Inputs
())
{
VarHandle
*
b
=
dynamic_cast
<
VarHandle
*>
(
p
);
if
(
b
)
{
return
b
;
class
AllReduceDepsPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
{
std
::
vector
<
AllReduceOpHandle
*>
all_reduce_op_handles
=
GetSortedAllReduceOps
(
*
graph
);
for
(
size_t
i
=
1
;
i
<
all_reduce_op_handles
.
size
();
++
i
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
CreateControlDepVar
());
graph
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
all_reduce_op_handles
[
i
-
1
]
->
AddOutput
(
dep_var
);
all_reduce_op_handles
[
i
]
->
AddInput
(
dep_var
);
}
}
return
nullptr
;
}
void
AllReduceDepsPass
::
ApplyImpl
(
ir
::
Graph
*
graph
)
const
{
auto
graph_ops
=
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
*
graph
);
// get vars order
int
order
=
0
;
std
::
unordered_map
<
std
::
string
,
int
>
vars
;
// TODO(gongwb): use graph topology sort to find the order of operators.
// Note that must assert topology sort is stable
auto
&
ops
=
graph
->
Get
<
const
std
::
vector
<
OpDesc
*>>
(
kStaleProgramOpDescs
);
for
(
auto
*
op_desc
:
ops
)
{
try
{
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
op_desc
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
continue
;
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op_desc
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
0
);
auto
outputs
=
op_desc
->
Outputs
();
for
(
auto
&
o_it
:
outputs
)
{
for
(
auto
&
v
:
o_it
.
second
)
{
// values
vars
[
v
]
=
order
;
VLOG
(
10
)
<<
"in all_reduce_deps_pass:"
<<
v
;
}
}
order
++
;
}
catch
(
boost
::
bad_get
e
)
{
if
(
VLOG_IS_ON
(
10
))
{
DebugString
(
*
graph
,
all_reduce_op_handles
);
}
}
std
::
vector
<
OpHandleBase
*>
dist_ops
;
// get allreduce ops.
for
(
auto
&
op
:
graph_ops
)
{
// FIXME(gongwb):add broad cast.
if
(
op
->
Name
()
==
"all_reduce"
||
op
->
Name
()
==
"reduce"
)
{
dist_ops
.
push_back
(
op
);
std
::
vector
<
AllReduceOpHandle
*>
GetSortedAllReduceOps
(
const
ir
::
Graph
&
graph
)
const
{
std
::
vector
<
AllReduceOpHandle
*>
all_reduce_op_handles
;
std
::
unordered_map
<
OpHandleBase
*
,
size_t
>
pending_ops
;
std
::
unordered_set
<
OpHandleBase
*>
ready_ops
;
std
::
unordered_set
<
OpHandleBase
*>
next_ready_ops
;
auto
op_handles
=
ir
::
FilterByNodeWrapper
<
OpHandleBase
>
(
graph
);
size_t
num_of_ops
=
op_handles
.
size
();
for
(
OpHandleBase
*
op
:
op_handles
)
{
size_t
not_ready_vars
=
op
->
NotReadyInputSize
();
if
(
not_ready_vars
)
{
pending_ops
.
insert
({
op
,
not_ready_vars
});
}
else
{
ready_ops
.
insert
(
op
);
}
}
}
VLOG
(
10
)
<<
"dist_ops size:"
<<
dist_ops
.
size
()
<<
", outputs size:"
<<
vars
.
size
()
<<
", ops size:"
<<
ops
.
size
();
std
::
sort
(
dist_ops
.
begin
(),
dist_ops
.
end
(),
[
&
](
OpHandleBase
*
op1
,
OpHandleBase
*
op2
)
{
VarHandle
*
i0
=
dynamic_cast
<
VarHandle
*>
(
GetValidInput
(
op1
));
VarHandle
*
i1
=
dynamic_cast
<
VarHandle
*>
(
GetValidInput
(
op2
));
PADDLE_ENFORCE
(
i0
!=
nullptr
&&
i1
!=
nullptr
,
"%s convert to %s error"
,
op1
->
DebugString
(),
op2
->
DebugString
());
auto
l_it
=
vars
.
find
(
i0
->
name
());
auto
r_it
=
vars
.
find
(
i1
->
name
());
PADDLE_ENFORCE
(
l_it
!=
vars
.
end
()
&&
r_it
!=
vars
.
end
(),
"can't find var's name %s and %s in opdesc"
,
i0
->
name
(),
i1
->
name
());
if
(
l_it
->
second
<
r_it
->
second
)
return
true
;
GetSortedAllReduceOps
(
ready_ops
,
&
all_reduce_op_handles
);
size_t
has_run_ops
=
ready_ops
.
size
();
while
(
has_run_ops
!=
num_of_ops
)
{
for
(
auto
*
op
:
ready_ops
)
{
for
(
auto
&
ready_var
:
op
->
Outputs
())
{
for
(
auto
*
pend_op
:
ready_var
->
PendingOps
())
{
auto
&
deps
=
--
pending_ops
[
pend_op
];
if
(
deps
==
0
)
{
next_ready_ops
.
insert
(
pend_op
);
}
}
}
}
if
(
l_it
->
second
==
r_it
->
second
)
{
return
i0
->
name
()
<
i1
->
name
();
PADDLE_ENFORCE_NE
(
next_ready_ops
.
size
(),
0
,
"There maybe have a cycle."
);
ready_ops
.
clear
();
std
::
swap
(
ready_ops
,
next_ready_ops
);
GetSortedAllReduceOps
(
ready_ops
,
&
all_reduce_op_handles
);
has_run_ops
+=
ready_ops
.
size
();
}
return
all_reduce_op_handles
;
}
return
false
;
});
// add dependency.
auto
&
sorted_ops
=
dist_ops
;
for
(
size_t
i
=
1
;
i
<
sorted_ops
.
size
();
++
i
)
{
auto
*
dep_var
=
new
DummyVarHandle
(
graph
->
CreateControlDepVar
());
auto
*
pre_op
=
sorted_ops
[
i
-
1
];
auto
*
op
=
sorted_ops
[
i
];
pre_op
->
AddOutput
(
dep_var
);
op
->
AddInput
(
dep_var
);
graph
->
Get
<
GraphDepVars
>
(
kGraphDepVars
).
emplace
(
dep_var
);
void
GetSortedAllReduceOps
(
const
std
::
unordered_set
<
OpHandleBase
*>&
ready_ops
,
std
::
vector
<
AllReduceOpHandle
*>*
all_reduce_op_handles
)
const
{
std
::
vector
<
AllReduceOpHandle
*>
current_all_reduce_op_handles
;
for
(
auto
&
op_handle
:
ready_ops
)
{
auto
all_reduce_op_handle
=
dynamic_cast
<
AllReduceOpHandle
*>
(
op_handle
);
if
(
all_reduce_op_handle
)
{
current_all_reduce_op_handles
.
emplace_back
(
all_reduce_op_handle
);
}
}
VLOG
(
10
)
<<
"add all_reduce sequential dependencies between "
<<
pre_op
<<
" and "
<<
op
;
// NOTE(zcd): For distributed training, it is important to keep the order of
// allReduce on each node consistent. Otherwise, hang may occur.
// Sort the current_all_reduce_op_handles according to the name of input.
sort
(
current_all_reduce_op_handles
.
begin
(),
current_all_reduce_op_handles
.
end
(),
[](
const
AllReduceOpHandle
*
left
,
const
AllReduceOpHandle
*
right
)
->
bool
{
auto
left_in_vars
=
DynamicCast
<
VarHandle
>
(
left
->
Inputs
());
auto
right_in_vars
=
DynamicCast
<
VarHandle
>
(
right
->
Inputs
());
PADDLE_ENFORCE_GT
(
left_in_vars
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
left_in_vars
.
size
(),
right_in_vars
.
size
());
return
left_in_vars
[
0
]
->
Name
()
>
right_in_vars
[
0
]
->
Name
();
});
all_reduce_op_handles
->
insert
(
all_reduce_op_handles
->
end
(),
current_all_reduce_op_handles
.
begin
(),
current_all_reduce_op_handles
.
end
());
}
VLOG
(
10
)
<<
"pre_op:"
<<
pre_op
->
DebugString
()
<<
", op:"
<<
op
->
DebugString
();
void
DebugString
(
const
ir
::
Graph
&
graph
,
const
std
::
vector
<
AllReduceOpHandle
*>&
all_reduce_op_handles
)
const
{
// get vars order
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
vars
=
GetSoredGradientsFromStaleProgram
(
graph
);
std
::
stringstream
out
;
size_t
grads_of_stale_program
=
0
;
out
<<
"Get Order From kStaleProgramOpDescs: "
;
for
(
auto
&
var
:
vars
)
{
out
<<
"Order "
<<
var
.
first
<<
" ["
;
for
(
auto
&
var_name
:
var
.
second
)
{
out
<<
var_name
<<
", "
;
++
grads_of_stale_program
;
}
out
<<
"], "
;
}
VLOG
(
10
)
<<
out
.
str
();
std
::
stringstream
out2
;
out2
<<
"Get Order From Topological order: "
;
for
(
auto
&
op
:
all_reduce_op_handles
)
{
bool
find_valid_input
=
false
;
for
(
auto
&
in_var
:
op
->
Inputs
())
{
if
(
dynamic_cast
<
VarHandle
*>
(
in_var
))
{
out2
<<
in_var
->
Name
()
<<
", "
;
find_valid_input
=
true
;
break
;
}
}
PADDLE_ENFORCE
(
find_valid_input
,
"Doesn't find valid input."
);
}
VLOG
(
10
)
<<
out2
.
str
();
if
(
grads_of_stale_program
!=
all_reduce_op_handles
.
size
())
{
VLOG
(
10
)
<<
"The gradients number of stale program and graph is not equal."
;
}
}
}
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
GetSoredGradientsFromStaleProgram
(
const
ir
::
Graph
&
graph
)
const
{
std
::
map
<
int
,
std
::
vector
<
std
::
string
>>
vars
;
auto
ops
=
graph
.
Get
<
const
std
::
vector
<
OpDesc
*>>
(
kStaleProgramOpDescs
);
int
order
=
0
;
for
(
auto
*
op_desc
:
ops
)
{
try
{
bool
is_bk_op
=
static_cast
<
bool
>
(
boost
::
get
<
int
>
(
op_desc
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
&
static_cast
<
int
>
(
OpRole
::
kBackward
));
if
(
!
is_bk_op
)
continue
;
auto
backward_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op_desc
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
()));
if
(
backward_vars
.
empty
())
continue
;
PADDLE_ENFORCE_EQ
(
backward_vars
.
size
()
%
2
,
0
);
for
(
size_t
i
=
1
;
i
<
backward_vars
.
size
();
i
+=
2
)
{
vars
[
order
].
emplace_back
(
backward_vars
[
i
]);
VLOG
(
1
)
<<
"get parameter and gradient: "
<<
backward_vars
[
i
-
1
]
<<
", "
<<
backward_vars
[
i
];
}
order
++
;
}
catch
(
boost
::
bad_get
e
)
{
}
}
return
vars
;
}
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
...
...
paddle/fluid/framework/details/all_reduce_deps_pass.h
已删除
100644 → 0
浏览文件 @
67cddc25
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
// TODO(gongwb): overlap allreduce with backward computation.
class
AllReduceDepsPass
:
public
ir
::
Pass
{
protected:
void
ApplyImpl
(
ir
::
Graph
*
graph
)
const
override
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/all_reduce_op_handle.cc
浏览文件 @
218e02ac
...
...
@@ -28,7 +28,7 @@
// asynchronous nccl allreduce or synchronous issue:
// https://github.com/PaddlePaddle/Paddle/issues/15049
DEFINE_bool
(
sync_nccl_allreduce
,
fals
e
,
sync_nccl_allreduce
,
tru
e
,
"If set true, will call `cudaStreamSynchronize(nccl_stream)`"
"after allreduce, this mode can get better performance in some scenarios."
);
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
218e02ac
...
...
@@ -163,14 +163,11 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
"graph_printer"
,
new
details
::
GraphvizSSAGraphPrinter
);
}
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
if
(
VLOG_IS_ON
(
2
))
{
AppendPass
(
"all_reduce_deps_pass"
);
}
if
(
SeqOnlyAllReduceOps
(
strategy_
))
{
// experimental shows that the program will be faster if append
// all_reduce_deps_pass here.
if
(
!
strategy_
.
enable_parallel_graph_
&&
(
SeqOnlyAllReduceOps
(
strategy_
)
||
strategy
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
))
{
VLOG
(
10
)
<<
"Add all_reduce_deps_pass"
;
AppendPass
(
"all_reduce_deps_pass"
);
}
...
...
@@ -179,6 +176,9 @@ class ParallelExecutorPassBuilder : public ir::PassBuilder {
VLOG
(
10
)
<<
"Add modify_op_lock_and_record_event_pass"
;
AppendPass
(
"modify_op_lock_and_record_event_pass"
);
}
// Verify that the graph is correct for multi-device executor.
AppendPass
(
"multi_devices_check_pass"
);
}
// Convert graph to run on multi-devices.
...
...
paddle/fluid/framework/details/op_handle_base.cc
浏览文件 @
218e02ac
...
...
@@ -68,7 +68,7 @@ void OpHandleBase::Run(bool use_cuda) {
if
(
out_var_handle
)
{
PADDLE_ENFORCE
(
platform
::
is_same_place
(
place
,
out_var_handle
->
place
()),
"The place of
in
put(%s) is not consistent with the "
"The place of
out
put(%s) is not consistent with the "
"place of current op(%s)."
,
out_var_handle
->
Name
(),
Name
());
out_var_handle
->
SetGenerateEvent
(
events_
.
at
(
dev_id
));
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
218e02ac
...
...
@@ -617,6 +617,25 @@ void OpDesc::Flush() {
static
std
::
once_flag
init_infer_shape_funcs
;
/**
* NOTE(paddle-dev): Very tricky code here. Maybe we should find a
* better way to register compile-time infershape method gentlely.
*
* Normally, we can register a class derived from InferShapeBase, so that
* we can set the field of `infer_shape_` inside OpInfo when registering op.
*
* However, there is another way we can set the field of `infer_shape_` inside
* OpInfo. Usually, we overload InferShape method of OperatorWithKernel. After
* running the following method InitInferShapeFuncs, `infer_shape_` would be set
* to be the InferShape method of OperatorWithKernel. That is to say, we borrow
* the run-time InferShape method of OperatorWithKernel to be the compile-time
* InferShape method.
*
* However, during compiling time, we may not know inputs, outputs and attrs of
* run-time OperatorWithKernel. So the following code creates a fake
* OperatorWithKernel object. That is why the field info_ of OperatorBase
* would be null.
*/
static
void
InitInferShapeFuncs
()
{
std
::
call_once
(
init_infer_shape_funcs
,
[]
{
auto
&
map
=
OpInfoMap
::
Instance
();
...
...
@@ -628,11 +647,16 @@ static void InitInferShapeFuncs() {
PADDLE_ENFORCE
(
it
!=
info_map
.
end
(),
"%s has not been registered"
,
op_type
);
auto
&
op_info
=
it
->
second
;
auto
op
=
static_cast
<
OperatorWithKernel
*>
(
op_info
.
Creator
()(
""
,
VariableNameMap
{},
VariableNameMap
{},
AttributeMap
{}));
if
(
op_info
.
infer_shape_
)
{
// infer_shape has been registered.
continue
;
}
auto
op
=
dynamic_cast
<
OperatorWithKernel
*>
(
op_info
.
Creator
()(
""
,
VariableNameMap
{},
VariableNameMap
{},
AttributeMap
{}));
PADDLE_ENFORCE_NOT_NULL
(
op
,
"InferShapeBase is not registered to Operator %s"
,
op_type
);
op_info
.
infer_shape_
=
[
op
](
InferShapeContext
*
ctx
)
{
op
->
InferShape
(
ctx
);
};
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
218e02ac
...
...
@@ -19,11 +19,6 @@ limitations under the License. */
#include <tuple>
#include <utility>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/details/all_reduce_deps_pass.h"
#include "paddle/fluid/framework/details/async_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/fast_threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
...
...
@@ -31,6 +26,8 @@ limitations under the License. */
#include "paddle/fluid/framework/details/reference_count_pass_helper.h"
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include "paddle/fluid/framework/details/threaded_ssa_graph_executor.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/platform/profiler.h"
#ifdef WITH_GPERFTOOLS
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
218e02ac
...
...
@@ -142,7 +142,6 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
void
AnalysisConfig
::
EnableMKLDNN
()
{
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
...
...
@@ -235,16 +234,13 @@ void AnalysisConfig::Update() {
}
if
(
use_mkldnn_
)
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
enable_ir_optim_
)
{
LOG
(
ERROR
)
<<
"EnableMKLDNN() only works when IR optimization is enabled."
;
}
else
{
pass_builder
()
->
EnableMKLDNN
();
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMKLDNN
();
use_mkldnn_
=
true
;
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MKLDNN"
;
use_mkldnn_
=
false
;
#endif
}
...
...
@@ -256,9 +252,6 @@ void AnalysisConfig::Update() {
}
#ifdef PADDLE_WITH_MKLDNN
pass_builder
()
->
EnableMkldnnQuantizer
();
#else
LOG
(
ERROR
)
<<
"Please compile with MKLDNN first to use MkldnnQuantizer"
;
use_mkldnn_quantizer_
=
false
;
#endif
}
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
218e02ac
...
...
@@ -27,6 +27,7 @@
#include <string>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/string/printf.h"
...
...
@@ -266,17 +267,17 @@ static std::string DescribeZeroCopyTensor(const ZeroCopyTensor &tensor) {
}
static
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms, fps: "
<<
1
/
(
latency
/
1000.
f
)
double
batch_
latency
,
int
epoch
=
1
)
{
PADDLE_ENFORCE
(
batch_size
>
0
,
"Non-positive batch size."
);
double
sample_latency
=
batch_latency
/
batch_size
;
LOG
(
INFO
)
<<
"====== threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
" ======"
;
if
(
epoch
>
1
)
{
int
samples
=
batch_size
*
epoch
;
LOG
(
INFO
)
<<
"====== sample number: "
<<
samples
<<
", average latency of each sample: "
<<
latency
/
samples
<<
"ms ======"
;
}
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", iterations: "
<<
epoch
<<
", repetitions: "
<<
repeat
<<
" ======"
;
LOG
(
INFO
)
<<
"====== batch latency: "
<<
batch_latency
<<
"ms, number of samples: "
<<
batch_size
*
epoch
<<
", sample latency: "
<<
sample_latency
<<
"ms, fps: "
<<
1000.
f
/
sample_latency
<<
" ======"
;
}
static
bool
IsFileExists
(
const
std
::
string
&
path
)
{
...
...
paddle/fluid/inference/api/paddle_pass_builder.cc
浏览文件 @
218e02ac
...
...
@@ -64,10 +64,12 @@ void PaddlePassBuilder::DeletePass(size_t idx) {
passes_
.
erase
(
std
::
begin
(
passes_
)
+
idx
);
}
void
GpuPassStrategy
::
EnableMKLDNN
(
)
{
LOG
(
ERROR
)
<<
"GPU not support MKLDNN yet"
;
void
PaddlePassBuilder
::
AppendAnalysisPass
(
const
std
::
string
&
pass
)
{
analysis_passes_
.
push_back
(
pass
)
;
}
void
PaddlePassBuilder
::
ClearPasses
()
{
passes_
.
clear
();
}
// The following passes works for Anakin sub-graph engine.
const
std
::
vector
<
std
::
string
>
kAnakinSubgraphPasses
({
"infer_clean_graph_pass"
,
//
...
...
@@ -102,12 +104,12 @@ GpuPassStrategy::GpuPassStrategy() : PassStrategy({}) {
use_gpu_
=
true
;
}
void
GpuPassStrategy
::
EnableM
kldnnQuantizer
()
{
LOG
(
ERROR
)
<<
"GPU not support MKL
-DNN quantization
"
;
void
GpuPassStrategy
::
EnableM
KLDNN
()
{
LOG
(
ERROR
)
<<
"GPU not support MKL
DNN yet
"
;
}
void
PaddlePassBuilder
::
AppendAnalysisPass
(
const
std
::
string
&
pass
)
{
analysis_passes_
.
push_back
(
pass
)
;
void
GpuPassStrategy
::
EnableMkldnnQuantizer
(
)
{
LOG
(
ERROR
)
<<
"GPU not support MKL-DNN quantization"
;
}
CpuPassStrategy
::
CpuPassStrategy
()
:
PassStrategy
({})
{
...
...
@@ -130,10 +132,44 @@ CpuPassStrategy::CpuPassStrategy() : PassStrategy({}) {
"conv_bn_fuse_pass"
,
//
"conv_eltwiseadd_bn_fuse_pass"
,
//
"is_test_pass"
,
//
"identity_scale_op_clean_pass"
,
//
"runtime_context_cache_pass"
,
//
});
use_gpu_
=
false
;
}
void
PaddlePassBuilder
::
ClearPasses
()
{
passes_
.
clear
();
}
void
CpuPassStrategy
::
EnableMKLDNN
()
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_
)
{
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
for
(
auto
&
pass
:
std
::
vector
<
std
::
string
>
(
{
"depthwise_conv_mkldnn_pass"
,
//
"conv_bn_fuse_pass"
,
// Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass"
,
// preserve correct pass order
"conv_bias_mkldnn_fuse_pass"
,
//
"conv3d_bias_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
,
"conv_relu_mkldnn_fuse_pass"
}))
{
passes_
.
push_back
(
pass
);
}
}
use_mkldnn_
=
true
;
#else
use_mkldnn_
=
false
;
#endif
}
void
CpuPassStrategy
::
EnableMkldnnQuantizer
()
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_quantizer_
)
{
passes_
.
push_back
(
"cpu_quantize_placement_pass"
);
}
use_mkldnn_quantizer_
=
true
;
#else
use_mkldnn_quantizer_
=
false
;
#endif
}
}
// namespace paddle
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
218e02ac
...
...
@@ -109,43 +109,16 @@ class CpuPassStrategy : public PassStrategy {
CpuPassStrategy
();
explicit
CpuPassStrategy
(
const
CpuPassStrategy
&
other
)
:
PassStrategy
(
other
.
AllPasses
())
{}
:
PassStrategy
(
other
.
AllPasses
())
{
use_gpu_
=
other
.
use_gpu_
;
use_mkldnn_
=
other
.
use_mkldnn_
;
use_mkldnn_quantizer_
=
other
.
use_mkldnn_quantizer_
;
}
virtual
~
CpuPassStrategy
()
=
default
;
void
EnableMKLDNN
()
override
{
// TODO(Superjomn) Consider the way to mix CPU with GPU.
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_
)
{
passes_
.
insert
(
passes_
.
begin
(),
"mkldnn_placement_pass"
);
for
(
auto
&
pass
:
std
::
vector
<
std
::
string
>
(
{
"depthwise_conv_mkldnn_pass"
,
//
"conv_bn_fuse_pass"
,
// Execute BN passes again to
"conv_eltwiseadd_bn_fuse_pass"
,
// preserve correct pass order
"conv_bias_mkldnn_fuse_pass"
,
//
"conv3d_bias_mkldnn_fuse_pass"
,
//
"conv_relu_mkldnn_fuse_pass"
,
//
"conv_elementwise_add_mkldnn_fuse_pass"
}))
{
passes_
.
push_back
(
pass
);
}
}
use_mkldnn_
=
true
;
#else
use_mkldnn_
=
false
;
#endif
}
void
EnableMkldnnQuantizer
()
override
{
#ifdef PADDLE_WITH_MKLDNN
if
(
!
use_mkldnn_quantizer_
)
{
passes_
.
push_back
(
"cpu_quantize_placement_pass"
);
}
use_mkldnn_quantizer_
=
true
;
#else
use_mkldnn_quantizer_
=
false
;
#endif
}
void
EnableMKLDNN
()
override
;
void
EnableMkldnnQuantizer
()
override
;
protected:
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
218e02ac
...
...
@@ -26,7 +26,11 @@ endfunction()
function
(
inference_analysis_api_int8_test target model_dir data_dir filename
)
inference_analysis_test
(
${
target
}
SRCS
${
filename
}
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
benchmark
ARGS --infer_model=
${
model_dir
}
/model --infer_data=
${
data_dir
}
/data.bin --batch_size=100
)
ARGS --infer_model=
${
model_dir
}
/model
--infer_data=
${
data_dir
}
/data.bin
--warmup_batch_size=100
--batch_size=50
--iterations=2
)
endfunction
()
function
(
inference_analysis_api_test_with_fake_data target install_dir filename model_name
)
...
...
@@ -146,22 +150,22 @@ inference_analysis_api_test_with_fake_data(test_analyzer_mobilenet_depthwise_con
# int8 image classification tests
if
(
WITH_MKLDNN
)
set
(
INT8_DATA_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/int8"
)
set
(
INT8_DATA_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/int8
v2
"
)
if
(
NOT EXISTS
${
INT8_DATA_DIR
}
)
inference_download_and_uncompress
(
${
INT8_DATA_DIR
}
${
INFERENCE_URL
}
"/int8"
"imagenet_val_100
.tar.gz"
)
inference_download_and_uncompress
(
${
INT8_DATA_DIR
}
"
${
INFERENCE_URL
}
/int8"
"imagenet_val_100_tail
.tar.gz"
)
endif
()
#resnet50 int8
set
(
INT8_RESNET50_MODEL_DIR
"
${
INT8_DATA_DIR
}
/resnet50"
)
if
(
NOT EXISTS
${
INT8_RESNET50_MODEL_DIR
}
)
inference_download_and_uncompress
(
${
INT8_RESNET50_MODEL_DIR
}
${
INFERENCE_URL
}
"
/int8"
"resnet50_int8_model.tar.gz"
)
inference_download_and_uncompress
(
${
INT8_RESNET50_MODEL_DIR
}
"
${
INFERENCE_URL
}
/int8"
"resnet50_int8_model.tar.gz"
)
endif
()
inference_analysis_api_int8_test
(
test_analyzer_int8_resnet50
${
INT8_RESNET50_MODEL_DIR
}
${
INT8_DATA_DIR
}
analyzer_int8_image_classification_tester.cc SERIAL
)
#mobilenet int8
set
(
INT8_MOBILENET_MODEL_DIR
"
${
INT8_DATA_DIR
}
/mobilenet"
)
if
(
NOT EXISTS
${
INT8_MOBILENET_MODEL_DIR
}
)
inference_download_and_uncompress
(
${
INT8_MOBILENET_MODEL_DIR
}
${
INFERENCE_URL
}
"
/int8"
"mobilenetv1_int8_model.tar.gz"
)
inference_download_and_uncompress
(
${
INT8_MOBILENET_MODEL_DIR
}
"
${
INFERENCE_URL
}
/int8"
"mobilenetv1_int8_model.tar.gz"
)
endif
()
inference_analysis_api_int8_test
(
test_analyzer_int8_mobilenet
${
INT8_MOBILENET_MODEL_DIR
}
${
INT8_DATA_DIR
}
analyzer_int8_image_classification_tester.cc SERIAL
)
endif
()
...
...
paddle/fluid/inference/tests/api/analyzer_bert_tester.cc
浏览文件 @
218e02ac
...
...
@@ -154,7 +154,7 @@ void profile(bool use_mkldnn = false) {
config
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs
;
LoadInputData
(
&
inputs
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
config
),
...
...
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
218e02ac
...
...
@@ -197,7 +197,7 @@ void profile(bool use_mkldnn = false) {
cfg
.
SetMKLDNNOp
(
op_list
);
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -206,9 +206,11 @@ void profile(bool use_mkldnn = false) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_GT
(
output
.
size
(),
0
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
result_data
[
i
],
1e-3
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_int8_image_classification_tester.cc
浏览文件 @
218e02ac
...
...
@@ -17,8 +17,6 @@ limitations under the License. */
#include "paddle/fluid/inference/api/paddle_analysis_config.h"
#include "paddle/fluid/inference/tests/api/tester_helper.h"
DEFINE_int32
(
iterations
,
0
,
"Number of iterations"
);
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
...
...
@@ -30,8 +28,13 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
SwitchIrOptim
();
cfg
->
SwitchSpecifyInputNames
(
false
);
cfg
->
SetCpuMathLibraryNumThreads
(
FLAGS_paddle_num_threads
);
cfg
->
EnableMKLDNN
();
cfg
->
pass_builder
()
->
SetPasses
(
{
"infer_clean_graph_pass"
,
"mkldnn_placement_pass"
,
"depthwise_conv_mkldnn_pass"
,
"conv_bn_fuse_pass"
,
"conv_eltwiseadd_bn_fuse_pass"
,
"conv_bias_mkldnn_fuse_pass"
,
"conv_elementwise_add_mkldnn_fuse_pass"
,
"conv_relu_mkldnn_fuse_pass"
,
"fc_fuse_pass"
,
"is_test_pass"
});
}
template
<
typename
T
>
...
...
@@ -40,8 +43,8 @@ class TensorReader {
TensorReader
(
std
::
ifstream
&
file
,
size_t
beginning_offset
,
std
::
vector
<
int
>
shape
,
std
::
string
name
)
:
file_
(
file
),
position
(
beginning_offset
),
shape_
(
shape
),
name_
(
name
)
{
numel
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
T
>
());
numel
=
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
size_t
{
1
},
std
::
multiplies
<
size_t
>
());
}
PaddleTensor
NextBatch
()
{
...
...
@@ -71,10 +74,14 @@ class TensorReader {
};
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
GetWarmupData
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
test_data
,
int
num_images
)
{
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
test_data
,
int
num_images
=
FLAGS_warmup_batch_size
)
{
int
test_data_batch_size
=
test_data
[
0
][
0
].
shape
[
0
];
CHECK_LE
(
static_cast
<
size_t
>
(
num_images
),
test_data
.
size
()
*
test_data_batch_size
);
auto
iterations_max
=
test_data
.
size
();
PADDLE_ENFORCE
(
static_cast
<
size_t
>
(
num_images
)
<=
iterations_max
*
test_data_batch_size
,
"The requested quantization warmup data size "
+
std
::
to_string
(
num_images
)
+
" is bigger than all test data size."
);
PaddleTensor
images
;
images
.
name
=
"input"
;
...
...
@@ -120,20 +127,17 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs,
std
::
vector
<
int
>
image_batch_shape
{
batch_size
,
3
,
224
,
224
};
std
::
vector
<
int
>
label_batch_shape
{
batch_size
,
1
};
auto
images_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
());
auto
labels_offset_in_file
=
static_cast
<
size_t
>
(
file
.
tellg
())
+
sizeof
(
float
)
*
total_images
*
std
::
accumulate
(
image_batch_shape
.
begin
()
+
1
,
image_batch_shape
.
end
(),
1
,
std
::
multiplies
<
int
>
());
images_offset_in_file
+
sizeof
(
float
)
*
total_images
*
3
*
224
*
224
;
TensorReader
<
float
>
image_reader
(
file
,
0
,
image_batch_shape
,
"input"
);
TensorReader
<
float
>
image_reader
(
file
,
images_offset_in_file
,
image_batch_shape
,
"input"
);
TensorReader
<
int64_t
>
label_reader
(
file
,
labels_offset_in_file
,
label_batch_shape
,
"label"
);
auto
iterations
=
total_images
/
batch_size
;
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
iterations
)
iterations
=
FLAGS_iterations
;
for
(
auto
i
=
0
;
i
<
iterations
;
i
++
)
{
auto
iterations_max
=
total_images
/
batch_size
;
for
(
auto
i
=
0
;
i
<
iterations_max
;
i
++
)
{
auto
images
=
image_reader
.
NextBatch
();
auto
labels
=
label_reader
.
NextBatch
();
inputs
->
emplace_back
(
...
...
@@ -148,20 +152,21 @@ TEST(Analyzer_int8_resnet50, quantization) {
AnalysisConfig
q_cfg
;
SetConfig
(
&
q_cfg
);
// read data from file and prepare batches with test data
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
,
100
);
SetInput
(
&
input_slots_all
);
// prepare warmup batch from input data read earlier
// warmup batch size can be different than batch size
std
::
shared_ptr
<
std
::
vector
<
PaddleTensor
>>
warmup_data
=
GetWarmupData
(
input_slots_all
,
100
);
GetWarmupData
(
input_slots_all
);
// configure quantizer
q_cfg
.
EnableMkldnnQuantizer
();
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupData
(
warmup_data
);
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupBatchSize
(
100
);
q_cfg
.
mkldnn_quantizer_config
()
->
SetWarmupBatchSize
(
FLAGS_warmup_batch_size
);
CompareQuantizedAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
q_cfg
),
input_slots_all
);
CompareQuantizedAndAnalysis
(
&
cfg
,
&
q_cfg
,
input_slots_all
);
}
}
// namespace analysis
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
218e02ac
...
...
@@ -124,7 +124,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_LAC
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -137,11 +137,13 @@ TEST(Analyzer_LAC, profile) {
24
,
25
,
25
,
25
,
38
,
30
,
31
,
14
,
15
,
44
,
24
,
25
,
25
,
25
,
25
,
25
,
44
,
24
,
25
,
25
,
25
,
36
,
42
,
43
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
14
,
15
,
44
,
38
,
39
,
14
,
15
,
44
,
22
,
23
,
23
,
23
,
23
,
23
,
23
,
23
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
size_t
batch1_size
=
sizeof
(
lac_ref_data
)
/
sizeof
(
int64_t
);
PADDLE_ENFORCE_GE
(
size
,
batch1_size
);
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
output
s
[
0
].
data
.
data
());
int64_t
*
pdata
=
static_cast
<
int64_t
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
batch1_size
;
++
i
)
{
EXPECT_EQ
(
pdata
[
i
],
lac_ref_data
[
i
]);
}
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
218e02ac
...
...
@@ -96,7 +96,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
...
...
@@ -108,8 +108,9 @@ void profile(bool use_mkldnn = false) {
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
2UL
);
for
(
auto
&
output
:
outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
back
().
size
(),
2UL
);
for
(
auto
&
output
:
outputs
.
back
())
{
size_t
size
=
GetSize
(
output
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
.
data
.
data
());
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
218e02ac
...
...
@@ -106,7 +106,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
memory_load
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
,
memory_load
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -117,10 +117,12 @@ void profile(bool memory_load = false) {
// the first inference result
const
int
chinese_ner_result_data
[]
=
{
30
,
45
,
41
,
48
,
17
,
26
,
48
,
39
,
38
,
16
,
25
};
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
int64_t
*
result
=
static_cast
<
int64_t
*>
(
output
s
[
0
].
data
.
data
());
int64_t
*
result
=
static_cast
<
int64_t
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
std
::
min
(
11UL
,
size
);
i
++
)
{
EXPECT_EQ
(
result
[
i
],
chinese_ner_result_data
[
i
]);
}
...
...
paddle/fluid/inference/tests/api/analyzer_pyramid_dnn_tester.cc
浏览文件 @
218e02ac
...
...
@@ -127,7 +127,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_Pyramid_DNN
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -135,10 +135,12 @@ TEST(Analyzer_Pyramid_DNN, profile) {
input_slots_all
,
&
outputs
,
FLAGS_num_threads
);
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
&&
!
FLAGS_zero_copy
)
{
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
// output is probability, which is in (0, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
0
);
...
...
paddle/fluid/inference/tests/api/analyzer_resnet50_tester.cc
浏览文件 @
218e02ac
...
...
@@ -40,7 +40,7 @@ void profile(bool use_mkldnn = false) {
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
218e02ac
...
...
@@ -229,7 +229,7 @@ TEST(Analyzer_rnn1, profile) {
SetConfig
(
&
cfg
);
cfg
.
DisableGpu
();
cfg
.
SwitchIrDebug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -280,7 +280,7 @@ TEST(Analyzer_rnn1, compare_determine) {
TEST
(
Analyzer_rnn1
,
multi_thread
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
paddle/fluid/inference/tests/api/analyzer_rnn2_tester.cc
浏览文件 @
218e02ac
...
...
@@ -126,7 +126,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_rnn2
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -136,9 +136,11 @@ TEST(Analyzer_rnn2, profile) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
size_t
size
=
GetSize
(
outputs
[
0
]);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_GT
(
output
.
size
(),
0
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
result
[
i
],
result_data
[
i
],
1e-3
);
}
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
218e02ac
...
...
@@ -110,7 +110,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_seq_conv1
,
profile
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -119,10 +119,12 @@ TEST(Analyzer_seq_conv1, profile) {
if
(
FLAGS_num_threads
==
1
&&
!
FLAGS_test_all_data
)
{
// the first inference result
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
1UL
);
size_t
size
=
GetSize
(
outputs
[
0
]);
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
output
=
outputs
.
back
();
PADDLE_ENFORCE_EQ
(
output
.
size
(),
1UL
);
size_t
size
=
GetSize
(
output
[
0
]);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
result
=
static_cast
<
float
*>
(
output
s
[
0
].
data
.
data
());
float
*
result
=
static_cast
<
float
*>
(
output
[
0
].
data
.
data
());
// output is probability, which is in (0, 1).
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_GT
(
result
[
i
],
0
);
...
...
paddle/fluid/inference/tests/api/analyzer_seq_pool1_tester.cc
浏览文件 @
218e02ac
...
...
@@ -156,7 +156,7 @@ void profile(bool use_mkldnn = false) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
,
use_mkldnn
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
TestPrediction
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
...
...
paddle/fluid/inference/tests/api/analyzer_text_classification_tester.cc
浏览文件 @
218e02ac
...
...
@@ -70,7 +70,7 @@ TEST(Analyzer_Text_Classification, profile) {
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
cfg
.
SwitchIrDebug
();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -79,8 +79,9 @@ TEST(Analyzer_Text_Classification, profile) {
if
(
FLAGS_num_threads
==
1
)
{
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
outputs
.
size
();
for
(
auto
&
output
:
outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
LOG
(
INFO
)
<<
"get outputs "
<<
outputs
.
back
().
size
();
for
(
auto
&
output
:
outputs
.
back
())
{
LOG
(
INFO
)
<<
"output.shape: "
<<
to_string
(
output
.
shape
);
// no lod ?
CHECK_EQ
(
output
.
lod
.
size
(),
0UL
);
...
...
paddle/fluid/inference/tests/api/analyzer_transformer_tester.cc
浏览文件 @
218e02ac
...
...
@@ -186,7 +186,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
void
profile
(
bool
use_mkldnn
=
false
)
{
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
if
(
use_mkldnn
)
{
cfg
.
EnableMKLDNN
();
}
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
218e02ac
...
...
@@ -87,7 +87,7 @@ void profile(bool use_mkldnn = false) {
cfg
.
EnableMKLDNN
();
}
// cfg.pass_builder()->TurnOnDebug();
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
...
...
@@ -100,7 +100,8 @@ void profile(bool use_mkldnn = false) {
auto
refer
=
ProcessALine
(
line
);
file
.
close
();
auto
&
output
=
outputs
.
front
();
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
auto
&
output
=
outputs
.
back
().
front
();
size_t
numel
=
output
.
data
.
length
()
/
PaddleDtypeSize
(
output
.
dtype
);
CHECK_EQ
(
numel
,
refer
.
data
.
size
());
for
(
size_t
i
=
0
;
i
<
numel
;
++
i
)
{
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
218e02ac
...
...
@@ -41,7 +41,10 @@ DEFINE_string(model_name, "", "model name");
DEFINE_string
(
infer_model
,
""
,
"model path"
);
DEFINE_string
(
infer_data
,
""
,
"data file"
);
DEFINE_string
(
refer_result
,
""
,
"reference result for comparison"
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size"
);
DEFINE_int32
(
warmup_batch_size
,
100
,
"batch size for quantization warmup"
);
// setting iterations to 0 means processing the whole dataset
DEFINE_int32
(
iterations
,
0
,
"number of batches to process"
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_bool
(
test_all_data
,
false
,
"Test the all dataset in data file."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
...
...
@@ -239,7 +242,7 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
}
input
.
shape
=
shape
;
input
.
dtype
=
PaddleDType
::
FLOAT32
;
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
1
,
size_t
len
=
std
::
accumulate
(
shape
.
begin
(),
shape
.
end
(),
size_t
{
1
}
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
input
.
data
.
Resize
(
len
*
sizeof
(
float
));
input
.
lod
.
assign
({{
0
,
static_cast
<
size_t
>
(
FLAGS_batch_size
)}});
...
...
@@ -286,17 +289,18 @@ void ConvertPaddleTensorToZeroCopyTensor(
void
PredictionWarmUp
(
PaddlePredictor
*
predictor
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_thread
s
,
int
tid
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
output
s
,
int
num_threads
,
int
tid
)
{
int
batch_size
=
FLAGS_batch_size
;
LOG
(
INFO
)
<<
"Running thread "
<<
tid
<<
", warm up run..."
;
if
(
FLAGS_zero_copy
)
{
ConvertPaddleTensorToZeroCopyTensor
(
predictor
,
inputs
[
0
]);
}
outputs
->
resize
(
1
);
Timer
warmup_timer
;
warmup_timer
.
tic
();
if
(
!
FLAGS_zero_copy
)
{
predictor
->
Run
(
inputs
[
0
],
outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
0
],
&
(
*
outputs
)[
0
]
,
batch_size
);
}
else
{
predictor
->
ZeroCopyRun
();
}
...
...
@@ -308,11 +312,16 @@ void PredictionWarmUp(PaddlePredictor *predictor,
void
PredictionRun
(
PaddlePredictor
*
predictor
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
int
tid
)
{
int
batch_size
=
FLAGS_batch_size
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
outputs
,
int
num_threads
,
int
tid
)
{
int
num_times
=
FLAGS_repeat
;
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
" run "
<<
num_times
<<
" times..."
;
int
iterations
=
inputs
.
size
();
// process the whole dataset ...
if
(
FLAGS_iterations
>
0
&&
FLAGS_iterations
<
inputs
.
size
())
iterations
=
FLAGS_iterations
;
// ... unless the number of iterations is set
outputs
->
resize
(
iterations
);
LOG
(
INFO
)
<<
"Thread "
<<
tid
<<
", number of threads "
<<
num_threads
<<
", run "
<<
num_times
<<
" times..."
;
Timer
run_timer
;
double
elapsed_time
=
0
;
#ifdef WITH_GPERFTOOLS
...
...
@@ -320,14 +329,14 @@ void PredictionRun(PaddlePredictor *predictor,
#endif
if
(
!
FLAGS_zero_copy
)
{
run_timer
.
tic
();
for
(
size_t
i
=
0
;
i
<
i
nputs
.
size
()
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
i
terations
;
i
++
)
{
for
(
int
j
=
0
;
j
<
num_times
;
j
++
)
{
predictor
->
Run
(
inputs
[
i
],
outputs
,
batch_size
);
predictor
->
Run
(
inputs
[
i
],
&
(
*
outputs
)[
i
],
FLAGS_
batch_size
);
}
}
elapsed_time
=
run_timer
.
toc
();
}
else
{
for
(
size_t
i
=
0
;
i
<
i
nputs
.
size
()
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<
i
terations
;
i
++
)
{
ConvertPaddleTensorToZeroCopyTensor
(
predictor
,
inputs
[
i
]);
run_timer
.
tic
();
for
(
int
j
=
0
;
j
<
num_times
;
j
++
)
{
...
...
@@ -340,13 +349,14 @@ void PredictionRun(PaddlePredictor *predictor,
ProfilerStop
();
#endif
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
elapsed_time
/
num_times
,
inputs
.
size
());
auto
batch_latency
=
elapsed_time
/
(
iterations
*
num_times
);
PrintTime
(
FLAGS_batch_size
,
num_times
,
num_threads
,
tid
,
batch_latency
,
iterations
);
if
(
FLAGS_record_benchmark
)
{
Benchmark
benchmark
;
benchmark
.
SetName
(
FLAGS_model_name
);
benchmark
.
SetBatchSize
(
batch_size
);
benchmark
.
SetLatency
(
elapsed_time
/
num_times
);
benchmark
.
SetBatchSize
(
FLAGS_
batch_size
);
benchmark
.
SetLatency
(
batch_latency
);
benchmark
.
PersistToFile
(
"benchmark_record.txt"
);
}
}
...
...
@@ -354,16 +364,17 @@ void PredictionRun(PaddlePredictor *predictor,
void
TestOneThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
bool
use_analysis
=
true
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
*
outputs
,
bool
use_analysis
=
true
)
{
auto
predictor
=
CreateTestPredictor
(
config
,
use_analysis
);
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
1
,
0
);
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
FLAGS_paddle_num_threads
,
0
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
FLAGS_paddle_num_threads
,
0
);
}
void
TestMultiThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_threads
,
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
*
outputs
,
int
num_threads
,
bool
use_analysis
=
true
)
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
...
...
@@ -376,7 +387,7 @@ void TestMultiThreadPrediction(
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
std
::
vector
<
PaddleTensor
>
outputs_tid
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs_tid
;
auto
&
predictor
=
predictors
[
tid
];
#ifdef PADDLE_WITH_MKLDNN
if
(
use_analysis
)
{
...
...
@@ -384,8 +395,8 @@ void TestMultiThreadPrediction(
->
SetMkldnnThreadID
(
static_cast
<
int
>
(
tid
)
+
1
);
}
#endif
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
outputs
,
num_threads
,
tid
);
PredictionRun
(
predictor
.
get
(),
inputs
,
outputs
,
num_threads
,
tid
);
PredictionWarmUp
(
predictor
.
get
(),
inputs
,
&
outputs_tid
,
num_threads
,
tid
);
PredictionRun
(
predictor
.
get
(),
inputs
,
&
outputs_tid
,
num_threads
,
tid
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
...
...
@@ -395,8 +406,8 @@ void TestMultiThreadPrediction(
void
TestPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
std
::
vector
<
PaddleTensor
>
*
outputs
,
int
num_thread
s
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
output
s
,
int
num_threads
,
bool
use_analysis
=
FLAGS_use_analysis
)
{
PrintConfig
(
config
,
use_analysis
);
if
(
num_threads
==
1
)
{
TestOneThreadPrediction
(
config
,
inputs
,
outputs
,
use_analysis
);
...
...
@@ -406,30 +417,41 @@ void TestPrediction(const PaddlePredictor::Config *config,
}
}
void
CompareTopAccuracy
(
const
std
::
vector
<
PaddleTensor
>
&
output_slots1
,
const
std
::
vector
<
PaddleTensor
>
&
output_slots2
)
{
// first output: avg_cost
if
(
output_slots
1
.
size
()
==
0
||
output_slots2
.
size
()
==
0
)
void
CompareTopAccuracy
(
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_quant
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
output_slots_ref
)
{
if
(
output_slots
_quant
.
size
()
==
0
||
output_slots_ref
.
size
()
==
0
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: output_slots vector is empty."
);
PADDLE_ENFORCE
(
output_slots1
.
size
()
>=
2UL
);
PADDLE_ENFORCE
(
output_slots2
.
size
()
>=
2UL
);
// second output: acc_top1
if
(
output_slots1
[
1
].
lod
.
size
()
>
0
||
output_slots2
[
1
].
lod
.
size
()
>
0
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output has nonempty LoD."
);
if
(
output_slots1
[
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
||
output_slots2
[
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output is of a wrong type."
);
float
*
top1_quantized
=
static_cast
<
float
*>
(
output_slots1
[
1
].
data
.
data
());
float
*
top1_reference
=
static_cast
<
float
*>
(
output_slots2
[
1
].
data
.
data
());
LOG
(
INFO
)
<<
"top1 INT8 accuracy: "
<<
*
top1_quantized
;
LOG
(
INFO
)
<<
"top1 FP32 accuracy: "
<<
*
top1_reference
;
float
total_accs1_quant
{
0
};
float
total_accs1_ref
{
0
};
for
(
size_t
i
=
0
;
i
<
output_slots_quant
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
output_slots_quant
[
i
].
size
()
>=
2UL
);
PADDLE_ENFORCE
(
output_slots_ref
[
i
].
size
()
>=
2UL
);
// second output: acc_top1
if
(
output_slots_quant
[
i
][
1
].
lod
.
size
()
>
0
||
output_slots_ref
[
i
][
1
].
lod
.
size
()
>
0
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output has nonempty LoD."
);
if
(
output_slots_quant
[
i
][
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
||
output_slots_ref
[
i
][
1
].
dtype
!=
paddle
::
PaddleDType
::
FLOAT32
)
throw
std
::
invalid_argument
(
"CompareTopAccuracy: top1 accuracy output is of a wrong type."
);
total_accs1_quant
+=
*
static_cast
<
float
*>
(
output_slots_quant
[
i
][
1
].
data
.
data
());
total_accs1_ref
+=
*
static_cast
<
float
*>
(
output_slots_ref
[
i
][
1
].
data
.
data
());
}
float
avg_acc1_quant
=
total_accs1_quant
/
output_slots_quant
.
size
();
float
avg_acc1_ref
=
total_accs1_ref
/
output_slots_ref
.
size
();
LOG
(
INFO
)
<<
"Avg top1 INT8 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_quant
;
LOG
(
INFO
)
<<
"Avg top1 FP32 accuracy: "
<<
std
::
fixed
<<
std
::
setw
(
6
)
<<
std
::
setprecision
(
4
)
<<
avg_acc1_ref
;
LOG
(
INFO
)
<<
"Accepted accuracy drop threshold: "
<<
FLAGS_quantized_accuracy
;
CHECK_LE
(
std
::
abs
(
*
top1_quantized
-
*
top1_reference
),
FLAGS_quantized_accuracy
);
CHECK_LE
(
std
::
abs
(
avg_acc1_quant
-
avg_acc1_ref
),
FLAGS_quantized_accuracy
);
}
void
CompareDeterministic
(
...
...
@@ -455,20 +477,35 @@ void CompareNativeAndAnalysis(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
native_outputs
,
analysis_outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
native_outputs
,
analysis_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
native_outputs
,
false
);
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
CompareResult
(
analysis_outputs
,
native_outputs
);
PADDLE_ENFORCE
(
native_outputs
.
size
()
>
0
,
"Native output is empty."
);
PADDLE_ENFORCE
(
analysis_outputs
.
size
()
>
0
,
"Analysis output is empty."
);
CompareResult
(
analysis_outputs
.
back
(),
native_outputs
.
back
());
}
void
CompareQuantizedAndAnalysis
(
const
PaddlePredictor
::
Config
*
config
,
const
PaddlePredictor
::
Config
*
qconfig
,
const
AnalysisConfig
*
config
,
const
AnalysisConfig
*
qconfig
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
)
{
PrintConfig
(
config
,
true
);
std
::
vector
<
PaddleTensor
>
analysis_outputs
,
quantized_outputs
;
TestOneThreadPrediction
(
config
,
inputs
,
&
analysis_outputs
,
true
);
TestOneThreadPrediction
(
qconfig
,
inputs
,
&
quantized_outputs
,
true
);
PADDLE_ENFORCE_EQ
(
inputs
[
0
][
0
].
shape
[
0
],
FLAGS_batch_size
,
"Input data has to be packed batch by batch."
);
LOG
(
INFO
)
<<
"FP32 & INT8 prediction run: batch_size "
<<
FLAGS_batch_size
<<
", warmup batch size "
<<
FLAGS_warmup_batch_size
<<
"."
;
LOG
(
INFO
)
<<
"--- FP32 prediction start ---"
;
auto
*
cfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
config
);
PrintConfig
(
cfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
analysis_outputs
;
TestOneThreadPrediction
(
cfg
,
inputs
,
&
analysis_outputs
,
true
);
LOG
(
INFO
)
<<
"--- INT8 prediction start ---"
;
auto
*
qcfg
=
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
qconfig
);
PrintConfig
(
qcfg
,
true
);
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
quantized_outputs
;
TestOneThreadPrediction
(
qcfg
,
inputs
,
&
quantized_outputs
,
true
);
LOG
(
INFO
)
<<
"--- comparing outputs --- "
;
CompareTopAccuracy
(
quantized_outputs
,
analysis_outputs
);
}
...
...
@@ -578,9 +615,9 @@ static bool CompareTensorData(const framework::LoDTensor &a,
const
framework
::
LoDTensor
&
b
)
{
auto
a_shape
=
framework
::
vectorize
(
a
.
dims
());
auto
b_shape
=
framework
::
vectorize
(
b
.
dims
());
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
1
,
size_t
a_size
=
std
::
accumulate
(
a_shape
.
begin
(),
a_shape
.
end
(),
size_t
{
1
}
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
1
,
size_t
b_size
=
std
::
accumulate
(
b_shape
.
begin
(),
b_shape
.
end
(),
size_t
{
1
}
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
if
(
a_size
!=
b_size
)
{
LOG
(
ERROR
)
<<
string
::
Sprintf
(
"tensor data size not match, %d != %d"
,
...
...
paddle/fluid/inference/tests/api/trt_models_tester.cc
浏览文件 @
218e02ac
...
...
@@ -74,7 +74,7 @@ void profile(std::string model_dir, bool use_analysis, bool use_tensorrt) {
SetFakeImageInput
(
&
inputs_all
,
model_dir
,
false
,
"__model__"
,
""
);
}
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>
>
outputs
;
if
(
use_analysis
||
use_tensorrt
)
{
AnalysisConfig
config
;
config
.
EnableUseGpu
(
100
,
0
);
...
...
paddle/fluid/op_use_default_grad_op_maker.spec
浏览文件 @
218e02ac
...
...
@@ -8,9 +8,6 @@ conv_shift
cos
cos_sim
dequantize
elementwise_div
elementwise_max
elementwise_min
elu
fc
flatten
...
...
@@ -28,8 +25,6 @@ gelu
gru
hard_shrink
hierarchical_sigmoid
hinge_loss
huber_loss
leaky_relu
log
logsigmoid
...
...
@@ -57,7 +52,6 @@ requantize
reshape
rnn_memory_helper
round
row_conv
sequence_softmax
sin
softplus
...
...
paddle/fluid/operators/batch_size_like.h
浏览文件 @
218e02ac
...
...
@@ -74,5 +74,8 @@ class BatchSizeLikeOpMaker : public framework::OpProtoAndCheckerMaker {
virtual
void
Apply
()
=
0
;
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
BatchSizeLikeNoNeedBufferVarsInference
,
"Input"
);
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/elementwise/elementwise_div_op.cc
浏览文件 @
218e02ac
...
...
@@ -13,10 +13,47 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_div_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
paddle
{
namespace
operators
{
class
ElementwiseDivOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Div"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = X / Y"
;
}
};
class
ElementwiseDivGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"elementwise_div_grad"
);
op
->
SetInput
(
"Y"
,
Input
(
"Y"
));
op
->
SetInput
(
"Out"
,
Output
(
"Out"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_OP
(
elementwise_div
,
"Div"
,
"Out = X / Y"
);
REGISTER_OPERATOR
(
elementwise_div
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseDivOpMaker
,
ops
::
ElementwiseOpInferVarType
,
ops
::
ElementwiseDivGradOpDescMaker
);
REGISTER_OPERATOR
(
elementwise_div_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OP_CPU_KERNEL
(
elementwise_div
,
...
...
paddle/fluid/operators/elementwise/elementwise_div_op.h
浏览文件 @
218e02ac
...
...
@@ -47,7 +47,7 @@ struct DivGradDX {
template
<
typename
T
>
struct
DivGradDY
{
HOSTDEVICE
T
operator
()(
T
x
,
T
y
,
T
out
,
T
dout
)
const
{
return
-
dout
*
x
/
(
y
*
y
)
;
return
-
dout
*
out
/
y
;
}
};
...
...
@@ -58,13 +58,15 @@ class ElementwiseDivGradKernel : public ElemwiseGradKernel<T> {
ElemwiseGradKernel
<
T
>::
Compute
(
ctx
);
using
Tensor
=
framework
::
Tensor
;
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
auto
*
x
=
dout
;
// Fake x, not used
ElemwiseGradCompute
<
DeviceContext
,
T
,
DivGradDX
<
T
>
,
DivGradDY
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
DivGradDX
<
T
>
(),
DivGradDY
<
T
>
());
}
...
...
paddle/fluid/operators/elementwise/elementwise_max_op.cc
浏览文件 @
218e02ac
...
...
@@ -13,9 +13,48 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_max_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
paddle
{
namespace
operators
{
class
ElementwiseMaxOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Max"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = max(X, Y)"
;
}
};
class
ElementwiseMaxGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"elementwise_max_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Y"
,
Input
(
"Y"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_OP
(
elementwise_max
,
"Max"
,
"Out = max(X, Y)"
);
REGISTER_OPERATOR
(
elementwise_max
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMaxOpMaker
,
ops
::
ElementwiseOpInferVarType
,
ops
::
ElementwiseMaxGradOpDescMaker
);
REGISTER_OPERATOR
(
elementwise_max_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OP_CPU_KERNEL
(
elementwise_max
,
ops
::
ElementwiseMaxKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise/elementwise_max_op.h
浏览文件 @
218e02ac
...
...
@@ -63,10 +63,10 @@ class ElementwiseMaxGradKernel : public ElemwiseGradKernel<T> {
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
out
=
dout
;
// Fake out, not used
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
ElemwiseGradCompute
<
DeviceContext
,
T
,
MaxGradDx
<
T
>
,
MaxGradDy
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
MaxGradDx
<
T
>
(),
MaxGradDy
<
T
>
());
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.cc
浏览文件 @
218e02ac
...
...
@@ -13,9 +13,48 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/elementwise/elementwise_min_op.h"
#include <memory>
#include <string>
#include "paddle/fluid/operators/elementwise/elementwise_op.h"
namespace
paddle
{
namespace
operators
{
class
ElementwiseMinOpMaker
:
public
ElementwiseOpMaker
{
protected:
std
::
string
GetName
()
const
override
{
return
"Min"
;
}
std
::
string
GetEquation
()
const
override
{
return
"Out = min(X, Y)"
;
}
};
class
ElementwiseMinGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"elementwise_min_grad"
);
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Y"
,
Input
(
"Y"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_ELEMWISE_OP
(
elementwise_min
,
"Min"
,
"Out = min(X, Y)"
);
REGISTER_OPERATOR
(
elementwise_min
,
ops
::
ElementwiseOp
,
ops
::
ElementwiseMinOpMaker
,
ops
::
ElementwiseOpInferVarType
,
ops
::
ElementwiseMinGradOpDescMaker
);
REGISTER_OPERATOR
(
elementwise_min_grad
,
ops
::
ElementwiseOpGrad
);
REGISTER_OP_CPU_KERNEL
(
elementwise_min
,
ops
::
ElementwiseMinKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/elementwise/elementwise_min_op.h
浏览文件 @
218e02ac
...
...
@@ -62,10 +62,10 @@ class ElementwiseMinGradKernel : public ElemwiseGradKernel<T> {
auto
*
x
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
y
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
*
out
=
ctx
.
Input
<
Tensor
>
(
"Out"
);
auto
*
dout
=
ctx
.
Input
<
Tensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
dx
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"X"
));
auto
*
dy
=
ctx
.
Output
<
Tensor
>
(
framework
::
GradVarName
(
"Y"
));
auto
*
out
=
dout
;
// Fake out, not used
int
axis
=
ctx
.
Attr
<
int
>
(
"axis"
);
ElemwiseGradCompute
<
DeviceContext
,
T
,
MinGradDx
<
T
>
,
MinGradDy
<
T
>>
(
ctx
,
*
x
,
*
y
,
*
out
,
*
dout
,
axis
,
dx
,
dy
,
MinGradDx
<
T
>
(),
MinGradDy
<
T
>
());
...
...
paddle/fluid/operators/elementwise/elementwise_op.h
浏览文件 @
218e02ac
...
...
@@ -173,12 +173,12 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
using
Tensor
=
framework
::
Tensor
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null
"
);
auto
out_grad_name
=
framework
::
GradVarName
(
"Out
"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)
),
PADDLE_ENFORCE
(
ctx
->
HasInput
(
out_grad_name
),
"Input(Out@GRAD) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
x_dims
=
ctx
->
GetInputDim
(
out_grad_name
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
PADDLE_ENFORCE_GE
(
x_dims
.
size
(),
y_dims
.
size
(),
...
...
@@ -187,8 +187,8 @@ class ElementwiseOpGrad : public framework::OperatorWithKernel {
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
ShareDim
(
"X"
,
/*->*/
x_grad_name
);
ctx
->
ShareLoD
(
"X"
,
/*->*/
x_grad_name
);
ctx
->
ShareDim
(
out_grad_name
,
/*->*/
x_grad_name
);
ctx
->
ShareLoD
(
out_grad_name
,
/*->*/
x_grad_name
);
}
if
(
ctx
->
HasOutput
(
y_grad_name
))
{
ctx
->
ShareDim
(
"Y"
,
/*->*/
y_grad_name
);
...
...
paddle/fluid/operators/fill_constant_batch_size_like_op.cc
浏览文件 @
218e02ac
...
...
@@ -46,6 +46,7 @@ obtained from the `input` tensor.
)DOC"
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -53,7 +54,8 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOp
,
paddle
::
framework
::
EmptyGradOpMaker
,
ops
::
FillConstantBatchSizeLikeOpMaker
);
ops
::
FillConstantBatchSizeLikeOpMaker
,
ops
::
BatchSizeLikeNoNeedBufferVarsInference
);
REGISTER_OP_CPU_KERNEL
(
fill_constant_batch_size_like
,
ops
::
FillConstantBatchSizeLikeOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
...
...
paddle/fluid/operators/fill_zeros_like_op.cc
浏览文件 @
218e02ac
...
...
@@ -36,6 +36,7 @@ class FillZerosLikeOpMaker : public framework::OpProtoAndCheckerMaker {
void
Make
()
override
{
AddInput
(
"X"
,
"The input of fill-zeros-like op."
);
AddOutput
(
"Out"
,
"The variable will be filled up with zeros."
);
ExtraMake
();
AddComment
(
R"DOC(
FillZerosLike Operator.
...
...
@@ -44,13 +45,49 @@ The output will have the same size as the input.
)DOC"
);
}
protected:
virtual
void
ExtraMake
()
{}
};
class
FillZerosLikeOp2
:
public
FillZerosLikeOp
{
public:
using
FillZerosLikeOp
::
FillZerosLikeOp
;
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
GetPlace
());
}
};
class
FillZerosLikeOp2Maker
:
public
FillZerosLikeOpMaker
{
protected:
void
ExtraMake
()
override
{
this
->
AddAttr
<
int
>
(
"dtype"
,
"(int, default 5(FP32)) "
"Output data type."
)
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
FillZerosLikeOp2NoNeedBufferVarsInference
,
"X"
);
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
fill_zeros_like
,
ops
::
FillZerosLikeOp
,
ops
::
FillZerosLikeOpMaker
);
REGISTER_OPERATOR
(
fill_zeros_like2
,
ops
::
FillZerosLikeOp2
,
ops
::
FillZerosLikeOp2Maker
,
ops
::
FillZerosLikeOp2NoNeedBufferVarsInference
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
...
...
@@ -58,3 +95,11 @@ REGISTER_OP_CPU_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
bool
>
);
REGISTER_OP_CPU_KERNEL
(
fill_zeros_like2
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
bool
>
);
paddle/fluid/operators/fill_zeros_like_op.cu.cc
浏览文件 @
218e02ac
...
...
@@ -26,3 +26,13 @@ REGISTER_OP_CUDA_KERNEL(
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
REGISTER_OP_CUDA_KERNEL
(
fill_zeros_like2
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
paddle
::
platform
::
float16
>
,
ops
::
FillZerosLikeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
bool
>
);
paddle/fluid/operators/gaussian_random_batch_size_like_op.cc
浏览文件 @
218e02ac
...
...
@@ -65,17 +65,13 @@ by input arguments.
}
};
DECLARE_NO_NEED_BUFFER_VARS_INFERENCE
(
GaussianRandomBatchSizeLikeNoNeedBufferVarsInference
,
"Input"
);
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
gaussian_random_batch_size_like
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOp
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeNoNeedBufferVarsInference
);
REGISTER_OPERATOR
(
gaussian_random_batch_size_like
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOp
,
paddle
::
operators
::
GaussianRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
BatchSizeLikeNoNeedBufferVarsInference
);
// Kernels are registered in gaussian_random_op.cc and gaussian_random_op.cu
paddle/fluid/operators/hinge_loss_op.cc
浏览文件 @
218e02ac
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/hinge_loss_op.h"
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -97,12 +100,29 @@ class HingeLossGradOp : public framework::OperatorWithKernel {
}
};
class
HingeLossGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"hinge_loss_grad"
);
op
->
SetInput
(
"Logits"
,
Input
(
"Logits"
));
op
->
SetInput
(
"Labels"
,
Input
(
"Labels"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Loss"
),
OutputGrad
(
"Loss"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Logits"
),
InputGrad
(
"Logits"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
hinge_loss
,
ops
::
HingeLossOp
,
ops
::
HingeLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
HingeLossGradOpDescMaker
);
REGISTER_OPERATOR
(
hinge_loss_grad
,
ops
::
HingeLossGradOp
);
REGISTER_OP_CPU_KERNEL
(
hinge_loss
,
...
...
paddle/fluid/operators/huber_loss_op.cc
浏览文件 @
218e02ac
...
...
@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/huber_loss_op.h"
#include <memory>
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
...
...
@@ -90,38 +93,45 @@ class HuberLossGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Y"
),
"Input(Y) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Residual"
),
"Input(Residual) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
auto
y_dims
=
ctx
->
GetInputDim
(
"Y"
);
auto
residual_dims
=
ctx
->
GetInputDim
(
"Residual"
);
auto
out_grad_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
));
PADDLE_ENFORCE_EQ
(
residual_dims
,
x_dims
);
PADDLE_ENFORCE_EQ
(
out_grad_dims
,
x_dims
);
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
auto
y_grad_name
=
framework
::
GradVarName
(
"Y"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
ctx
->
SetOutputDim
(
x_grad_name
,
x
_dims
);
ctx
->
SetOutputDim
(
x_grad_name
,
residual
_dims
);
}
if
(
ctx
->
HasOutput
(
y_grad_name
))
{
ctx
->
SetOutputDim
(
y_grad_name
,
y
_dims
);
ctx
->
SetOutputDim
(
y_grad_name
,
residual
_dims
);
}
}
};
class
HuberLossGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"huber_loss_grad"
);
op
->
SetInput
(
"Residual"
,
Output
(
"Residual"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Y"
),
InputGrad
(
"Y"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
huber_loss
,
ops
::
HuberLossOp
,
ops
::
HuberLossOpMaker
<
float
>
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
HuberLossGradOpDescMaker
);
REGISTER_OPERATOR
(
huber_loss_grad
,
ops
::
HuberLossGradOp
);
REGISTER_OP_CPU_KERNEL
(
huber_loss
,
ops
::
HuberLossKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
...
...
paddle/fluid/operators/row_conv_op.cc
浏览文件 @
218e02ac
...
...
@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/row_conv_op.h"
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/framework/eigen.h"
namespace
paddle
{
...
...
@@ -54,7 +58,6 @@ class RowConvGradOp : public framework::OperatorWithKernel {
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Filter"
),
"Input(Filter) should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
...
...
@@ -62,8 +65,8 @@ class RowConvGradOp : public framework::OperatorWithKernel {
auto
x_grad_name
=
framework
::
GradVarName
(
"X"
);
if
(
ctx
->
HasOutput
(
x_grad_name
))
{
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
ctx
->
SetOutputDim
(
x_grad_name
,
x
_dims
);
auto
dout_dims
=
ctx
->
GetInputDim
(
framework
::
GradVarName
(
"Out"
)
);
ctx
->
SetOutputDim
(
x_grad_name
,
dout
_dims
);
}
auto
filter_grad_name
=
framework
::
GradVarName
(
"Filter"
);
...
...
@@ -259,12 +262,31 @@ class RowConvGradKernel<platform::CPUDeviceContext, T>
}
}
};
class
RowConvGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"row_conv_grad"
);
op
->
SetAttrMap
(
Attrs
());
op
->
SetInput
(
"X"
,
Input
(
"X"
));
op
->
SetInput
(
"Filter"
,
Input
(
"Filter"
));
op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
op
->
SetOutput
(
framework
::
GradVarName
(
"Filter"
),
InputGrad
(
"Filter"
));
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
row_conv
,
ops
::
RowConvOp
,
ops
::
RowConvOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
ops
::
RowConvGradOpDescMaker
);
REGISTER_OPERATOR
(
row_conv_grad
,
ops
::
RowConvGradOp
);
REGISTER_OP_CPU_KERNEL
(
row_conv
,
ops
::
RowConvKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
...
...
paddle/fluid/operators/uniform_random_batch_size_like_op.cc
浏览文件 @
218e02ac
...
...
@@ -64,8 +64,9 @@ with random values sampled from a uniform distribution.
}
// namespace operators
}
// namespace paddle
REGISTER_OP_WITHOUT_GRADIENT
(
uniform_random_batch_size_like
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOp
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOpMaker
);
REGISTER_OPERATOR
(
uniform_random_batch_size_like
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOp
,
paddle
::
operators
::
UniformRandomBatchSizeLikeOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
paddle
::
operators
::
BatchSizeLikeNoNeedBufferVarsInference
);
// Kernels are registered in uniform_random_op.cc and uniform_random_op.cu
python/paddle/fluid/backward.py
浏览文件 @
218e02ac
...
...
@@ -231,9 +231,16 @@ def _remove_no_grad_branch_(op_descs, no_grad_set):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
arg
in
op_desc
.
input_arg_names
():
if
core
.
grad_var_suffix
()
in
arg
and
arg
in
no_grad_set
:
to_insert
.
append
((
_create_op_desc_
(
"fill_zeros_like"
,
{
"X"
:
[
_strip_grad_suffix_
(
arg
)]
},
{
"Out"
:
[
arg
]},
{}),
idx
))
x_in
=
_strip_grad_suffix_
(
arg
)
x_in_var_desc
=
op_desc
.
block
().
find_var_recursive
(
cpt
.
to_bytes
(
x_in
))
assert
x_in_var_desc
is
not
None
,
"Variable {} not found"
.
format
(
x_in
)
dtype
=
x_in_var_desc
.
dtype
()
to_insert
.
append
(
(
_create_op_desc_
(
"fill_zeros_like2"
,
{
"X"
:
[
x_in
]},
{
"Out"
:
[
arg
]},
{
"dtype"
:
dtype
}),
idx
))
list
([
op_descs
.
insert
(
p
[
1
],
p
[
0
])
for
p
in
reversed
(
to_insert
)])
...
...
python/paddle/fluid/dygraph/nn.py
浏览文件 @
218e02ac
此差异已折叠。
点击以展开。
python/paddle/fluid/metrics.py
浏览文件 @
218e02ac
...
...
@@ -227,7 +227,7 @@ class Precision(MetricBase):
metric.reset()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, preds, labels])
metric.update(preds=preds, labels=labels)
metric.update(preds=preds, labels=labels)
numpy_precision = metric.eval()
"""
...
...
@@ -241,9 +241,11 @@ class Precision(MetricBase):
raise
ValueError
(
"The 'preds' must be a numpy ndarray."
)
if
not
_is_numpy_
(
labels
):
raise
ValueError
(
"The 'labels' must be a numpy ndarray."
)
sample_num
=
labels
[
0
]
sample_num
=
labels
.
shape
[
0
]
preds
=
np
.
rint
(
preds
).
astype
(
"int32"
)
for
i
in
range
(
sample_num
):
pred
=
preds
[
i
]
.
astype
(
"int32"
)
pred
=
preds
[
i
]
label
=
labels
[
i
]
if
label
==
1
:
if
pred
==
label
:
...
...
python/paddle/fluid/tests/unittests/CMakeLists.txt
浏览文件 @
218e02ac
...
...
@@ -81,6 +81,7 @@ list(REMOVE_ITEM TEST_OPS test_imperative_resnet)
list
(
REMOVE_ITEM TEST_OPS test_imperative_se_resnext
)
list
(
REMOVE_ITEM TEST_OPS test_imperative_mnist
)
list
(
REMOVE_ITEM TEST_OPS test_ir_memory_optimize_transformer
)
list
(
REMOVE_ITEM TEST_OPS test_layers
)
foreach
(
TEST_OP
${
TEST_OPS
}
)
py_test_modules
(
${
TEST_OP
}
MODULES
${
TEST_OP
}
)
endforeach
(
TEST_OP
)
...
...
@@ -118,7 +119,7 @@ py_test_modules(test_parallel_executor_crf MODULES test_parallel_executor_crf SE
py_test_modules
(
test_parallel_executor_fetch_feed MODULES test_parallel_executor_fetch_feed SERIAL
)
set_tests_properties
(
test_parallel_executor_fetch_feed PROPERTIES TIMEOUT 450
)
py_test_modules
(
test_parallel_executor_transformer MODULES test_parallel_executor_transformer SERIAL
)
py_test_modules
(
test_layers MODULES test_layers ENVS FLAGS_cudnn_deterministic=1
)
if
(
NOT WIN32
)
py_test_modules
(
test_ir_memory_optimize_transformer MODULES test_ir_memory_optimize_transformer SERIAL
)
endif
()
...
...
python/paddle/fluid/tests/unittests/test_eager_deletion_no_need_buffer_vars_inference.py
浏览文件 @
218e02ac
...
...
@@ -23,6 +23,8 @@ from test_elementwise_sub_op import *
from
test_concat_op
import
*
from
test_gather_op
import
*
from
test_gaussian_random_batch_size_like_op
import
*
from
test_uniform_random_batch_size_like_op
import
*
from
test_fill_constant_batch_size_like_op
import
*
from
test_lod_reset_op
import
*
from
test_scatter_op
import
*
from
test_mean_op
import
*
...
...
@@ -40,6 +42,7 @@ from test_sequence_unpad_op import *
from
test_sequence_scatter_op
import
*
from
test_sequence_slice_op
import
*
from
test_pad2d_op
import
*
from
test_fill_zeros_like2_op
import
*
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_fill_zeros_like2_op.py
0 → 100644
浏览文件 @
218e02ac
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
from
paddle.fluid.framework
import
convert_np_dtype_to_dtype_
from
op_test
import
OpTest
class
TestFillZerosLike2Op
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"fill_zeros_like2"
self
.
dtype
=
np
.
float32
self
.
init_dtype
()
self
.
inputs
=
{
'X'
:
np
.
random
.
random
((
219
,
232
)).
astype
(
self
.
dtype
)}
self
.
outputs
=
{
'Out'
:
np
.
zeros_like
(
self
.
inputs
[
"X"
])}
self
.
attrs
=
{
'dtype'
:
convert_np_dtype_to_dtype_
(
self
.
dtype
)}
def
init_dtype
(
self
):
pass
def
test_check_output
(
self
):
self
.
check_output
()
class
TestFillZerosLike2OpFp16
(
TestFillZerosLike2Op
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float16
class
TestFillZerosLike2OpFp64
(
TestFillZerosLike2Op
):
def
init_dtype
(
self
):
self
.
dtype
=
np
.
float64
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_imperative_basic.py
浏览文件 @
218e02ac
...
...
@@ -348,6 +348,55 @@ class TestImperative(unittest.TestCase):
self
.
assertEqual
(
mlp
.
_fc2
,
sublayers
[
1
])
self
.
assertEqual
(
len
(
sublayers
),
2
)
def
test_dygraph_vs_static
(
self
):
inp1
=
np
.
random
.
rand
(
4
,
3
,
3
)
inp2
=
np
.
random
.
rand
(
4
,
3
,
3
)
# dynamic graph
with
fluid
.
dygraph
.
guard
():
if
np
.
sum
(
inp1
)
<
np
.
sum
(
inp2
):
x
=
fluid
.
layers
.
elementwise_add
(
inp1
,
inp2
)
else
:
x
=
fluid
.
layers
.
elementwise_sub
(
inp1
,
inp2
)
dygraph_result
=
x
.
_numpy
()
# static graph
with
new_program_scope
():
inp_data1
=
fluid
.
layers
.
data
(
name
=
'inp1'
,
shape
=
[
3
,
3
],
dtype
=
np
.
float32
)
inp_data2
=
fluid
.
layers
.
data
(
name
=
'inp2'
,
shape
=
[
3
,
3
],
dtype
=
np
.
float32
)
a
=
fluid
.
layers
.
expand
(
fluid
.
layers
.
reshape
(
fluid
.
layers
.
reduce_sum
(
inp_data1
),
[
1
,
1
]),
[
4
,
1
])
b
=
fluid
.
layers
.
expand
(
fluid
.
layers
.
reshape
(
fluid
.
layers
.
reduce_sum
(
inp_data2
),
[
1
,
1
]),
[
4
,
1
])
cond
=
fluid
.
layers
.
less_than
(
x
=
a
,
y
=
b
)
ie
=
fluid
.
layers
.
IfElse
(
cond
)
with
ie
.
true_block
():
d1
=
ie
.
input
(
inp_data1
)
d2
=
ie
.
input
(
inp_data2
)
d3
=
fluid
.
layers
.
elementwise_add
(
d1
,
d2
)
ie
.
output
(
d3
)
with
ie
.
false_block
():
d1
=
ie
.
input
(
inp_data1
)
d2
=
ie
.
input
(
inp_data2
)
d3
=
fluid
.
layers
.
elementwise_sub
(
d1
,
d2
)
ie
.
output
(
d3
)
out
=
ie
()
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
static_result
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
{
'inp1'
:
inp1
,
'inp2'
:
inp2
},
fetch_list
=
out
)[
0
]
self
.
assertTrue
(
np
.
allclose
(
dygraph_result
,
static_result
))
def
test_rnn
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
,
3.0
],
[
4.0
,
5.0
,
6.0
],
[
7.0
,
8.0
,
9.0
],
[
10.0
,
11.0
,
12.0
]])
...
...
python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py
浏览文件 @
218e02ac
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
paddle.fluid.dygraph.nn
import
Embedding
import
paddle.fluid.framework
as
framework
from
paddle.fluid.optimizer
import
SGDOptimizer
...
...
@@ -278,7 +279,8 @@ class TestDygraphPtbRnn(unittest.TestCase):
num_steps
=
num_steps
,
init_scale
=
init_scale
)
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
(
)
if
not
core
.
is_compiled_with_cuda
()
else
fluid
.
CUDAPlace
(
0
))
sgd
=
SGDOptimizer
(
learning_rate
=
1e-3
)
x
=
fluid
.
layers
.
data
(
name
=
"x"
,
shape
=
[
-
1
,
num_steps
,
1
],
dtype
=
'int64'
)
...
...
@@ -331,18 +333,16 @@ class TestDygraphPtbRnn(unittest.TestCase):
for
k
in
range
(
3
,
len
(
out
)):
static_param_updated
[
static_param_name_list
[
k
-
3
]]
=
out
[
k
]
self
.
assertTrue
(
np
.
allclose
(
static_loss_value
,
dy_loss
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_last_cell_value
,
last_cell
.
_numpy
()))
self
.
assertTrue
(
np
.
array_equal
(
static_loss_value
,
dy_loss
.
_numpy
()))
self
.
assertTrue
(
np
.
array_equal
(
static_last_cell_value
,
last_cell
.
_numpy
()))
self
.
assertTrue
(
np
.
a
llclose
(
static_last_hidden_value
,
last_hidden
.
_numpy
()))
np
.
a
rray_equal
(
static_last_hidden_value
,
last_hidden
.
_numpy
()))
for
key
,
value
in
six
.
iteritems
(
static_param_init
):
# print("static_init name: {}, value {}".format(key, value))
# print("dy_init name: {}, value {}".format(key, dy_param_init[key]))
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_init
[
key
]))
self
.
assertTrue
(
np
.
array_equal
(
value
,
dy_param_init
[
key
]))
for
key
,
value
in
six
.
iteritems
(
static_param_updated
):
# print("static name: {}, value {}".format(key, value))
# print("dy name: {}, value {}".format(key, dy_param_updated[key]))
self
.
assertTrue
(
np
.
allclose
(
value
,
dy_param_updated
[
key
]))
self
.
assertTrue
(
np
.
array_equal
(
value
,
dy_param_updated
[
key
]))
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
218e02ac
...
...
@@ -595,6 +595,280 @@ class TestLayer(LayerTest):
self
.
assertTrue
(
np
.
allclose
(
static_rlt2
,
static_rlt
))
self
.
assertTrue
(
np
.
allclose
(
nce_loss3
.
_numpy
(),
static_rlt
))
def
test_conv3d
(
self
):
with
self
.
static_graph
():
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
6
,
6
,
6
],
dtype
=
'float32'
)
ret
=
layers
.
conv3d
(
input
=
images
,
num_filters
=
3
,
filter_size
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
np
.
ones
(
[
2
,
3
,
6
,
6
,
6
],
dtype
=
'float32'
)},
fetch_list
=
[
ret
])[
0
]
with
self
.
static_graph
():
images
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
6
,
6
,
6
],
dtype
=
'float32'
)
conv3d
=
nn
.
Conv3D
(
'conv3d'
,
num_filters
=
3
,
filter_size
=
2
)
ret
=
conv3d
(
images
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
np
.
ones
(
[
2
,
3
,
6
,
6
,
6
],
dtype
=
'float32'
)},
fetch_list
=
[
ret
])[
0
]
with
self
.
dynamic_graph
():
images
=
np
.
ones
([
2
,
3
,
6
,
6
,
6
],
dtype
=
'float32'
)
conv3d
=
nn
.
Conv3D
(
'conv3d'
,
num_filters
=
3
,
filter_size
=
2
)
dy_ret
=
conv3d
(
base
.
to_variable
(
images
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_row_conv
(
self
):
input
=
np
.
arange
(
15
).
reshape
([
3
,
5
]).
astype
(
'float32'
)
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
with
self
.
static_graph
():
x
=
layers
.
data
(
name
=
'X'
,
shape
=
[
3
,
5
],
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
row_conv
(
input
=
x
,
future_context_size
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
static_graph
():
x
=
layers
.
data
(
name
=
'X'
,
shape
=
[
3
,
5
],
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
rowConv
=
nn
.
RowConv
(
'RowConv'
,
future_context_size
=
2
)
ret
=
rowConv
(
x
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
# TODO: dygraph can't support LODTensor
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_group_norm
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
shape
=
(
2
,
4
,
3
,
3
)
input
=
np
.
random
.
random
(
shape
).
astype
(
'float32'
)
with
self
.
static_graph
():
X
=
fluid
.
layers
.
data
(
name
=
'X'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
group_norm
(
input
=
X
,
groups
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
static_graph
():
X
=
fluid
.
layers
.
data
(
name
=
'X'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
groupNorm
=
nn
.
GroupNorm
(
'GroupNorm'
,
groups
=
2
)
ret
=
groupNorm
(
X
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'X'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
dynamic_graph
():
groupNorm
=
nn
.
GroupNorm
(
'GroupNorm'
,
groups
=
2
)
dy_ret
=
groupNorm
(
base
.
to_variable
(
input
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_spectral_norm
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
shape
=
(
2
,
4
,
3
,
3
)
input
=
np
.
random
.
random
(
shape
).
astype
(
'float32'
)
with
self
.
static_graph
():
Weight
=
fluid
.
layers
.
data
(
name
=
'Weight'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
spectral_norm
(
weight
=
Weight
,
dim
=
1
,
power_iters
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'Weight'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
),
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
static_graph
():
Weight
=
fluid
.
layers
.
data
(
name
=
'Weight'
,
shape
=
shape
,
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
spectralNorm
=
nn
.
SpectralNorm
(
'SpectralNorm'
,
dim
=
1
,
power_iters
=
2
)
ret
=
spectralNorm
(
Weight
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'Weight'
:
fluid
.
create_lod_tensor
(
data
=
input
,
recursive_seq_lens
=
[[
1
,
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
True
)[
0
]
with
self
.
dynamic_graph
():
spectralNorm
=
nn
.
SpectralNorm
(
'SpectralNorm'
,
dim
=
1
,
power_iters
=
2
)
dy_ret
=
spectralNorm
(
base
.
to_variable
(
input
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
def
test_tree_conv
(
self
):
if
core
.
is_compiled_with_cuda
():
place
=
core
.
CUDAPlace
(
0
)
else
:
place
=
core
.
CPUPlace
()
adj_array
=
[
1
,
2
,
1
,
3
,
1
,
4
,
1
,
5
,
2
,
6
,
2
,
7
,
2
,
8
,
4
,
9
,
4
,
10
]
adj
=
np
.
array
(
adj_array
).
reshape
((
1
,
9
,
2
)).
astype
(
'int32'
)
adj
=
np
.
tile
(
adj
,
(
1
,
1
,
1
))
vectors
=
np
.
random
.
random
((
1
,
10
,
5
)).
astype
(
'float32'
)
with
self
.
static_graph
():
NodesVector
=
fluid
.
layers
.
data
(
name
=
'NodesVector'
,
shape
=
(
1
,
10
,
5
),
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
EdgeSet
=
fluid
.
layers
.
data
(
name
=
'EdgeSet'
,
shape
=
(
1
,
9
,
2
),
dtype
=
'int32'
,
lod_level
=
1
,
append_batch_size
=
False
)
ret
=
layers
.
tree_conv
(
nodes_vector
=
NodesVector
,
edge_set
=
EdgeSet
,
output_size
=
6
,
num_filters
=
1
,
max_depth
=
2
)
static_ret
=
self
.
get_static_graph_result
(
feed
=
{
'NodesVector'
:
fluid
.
create_lod_tensor
(
data
=
vectors
,
recursive_seq_lens
=
[[
1
]],
place
=
place
),
'EdgeSet'
:
fluid
.
create_lod_tensor
(
data
=
adj
,
recursive_seq_lens
=
[[
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
False
)[
0
]
with
self
.
static_graph
():
NodesVector
=
fluid
.
layers
.
data
(
name
=
'NodesVector'
,
shape
=
(
1
,
10
,
5
),
dtype
=
'float32'
,
lod_level
=
1
,
append_batch_size
=
False
)
EdgeSet
=
fluid
.
layers
.
data
(
name
=
'EdgeSet'
,
shape
=
(
1
,
9
,
2
),
dtype
=
'int32'
,
lod_level
=
1
,
append_batch_size
=
False
)
treeConv
=
nn
.
TreeConv
(
'TreeConv'
,
output_size
=
6
,
num_filters
=
1
,
max_depth
=
2
)
ret
=
treeConv
(
NodesVector
,
EdgeSet
)
static_ret2
=
self
.
get_static_graph_result
(
feed
=
{
'NodesVector'
:
fluid
.
create_lod_tensor
(
data
=
vectors
,
recursive_seq_lens
=
[[
1
]],
place
=
place
),
'EdgeSet'
:
fluid
.
create_lod_tensor
(
data
=
adj
,
recursive_seq_lens
=
[[
1
]],
place
=
place
)
},
fetch_list
=
[
ret
],
with_lod
=
False
)[
0
]
with
self
.
dynamic_graph
():
treeConv
=
nn
.
TreeConv
(
'SpectralNorm'
,
output_size
=
6
,
num_filters
=
1
,
max_depth
=
2
)
dy_ret
=
treeConv
(
base
.
to_variable
(
vectors
),
base
.
to_variable
(
adj
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
static_ret2
))
self
.
assertTrue
(
np
.
allclose
(
static_ret
,
dy_ret
.
_numpy
()))
def
test_conv3d_transpose
(
self
):
input_array
=
np
.
arange
(
0
,
48
).
reshape
(
[
2
,
3
,
2
,
2
,
2
]).
astype
(
'float32'
)
with
self
.
static_graph
():
img
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
2
,
2
,
2
],
dtype
=
'float32'
)
out
=
layers
.
conv3d_transpose
(
input
=
img
,
num_filters
=
12
,
filter_size
=
12
,
use_cudnn
=
False
)
static_rlt
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
input_array
},
fetch_list
=
[
out
])[
0
]
with
self
.
static_graph
():
img
=
layers
.
data
(
name
=
'pixel'
,
shape
=
[
3
,
2
,
2
,
2
],
dtype
=
'float32'
)
conv3d_transpose
=
nn
.
Conv3DTranspose
(
'Conv3DTranspose'
,
num_filters
=
12
,
filter_size
=
12
,
use_cudnn
=
False
)
out
=
conv3d_transpose
(
img
)
static_rlt2
=
self
.
get_static_graph_result
(
feed
=
{
'pixel'
:
input_array
},
fetch_list
=
[
out
])[
0
]
with
self
.
dynamic_graph
():
conv3d_transpose
=
nn
.
Conv3DTranspose
(
'Conv3DTranspose'
,
num_filters
=
12
,
filter_size
=
12
,
use_cudnn
=
False
)
dy_rlt
=
conv3d_transpose
(
base
.
to_variable
(
input_array
))
self
.
assertTrue
(
np
.
allclose
(
static_rlt2
,
static_rlt
))
self
.
assertTrue
(
np
.
allclose
(
dy_rlt
.
_numpy
(),
static_rlt
))
class
TestBook
(
unittest
.
TestCase
):
def
test_fit_a_line
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录