Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
37d49b38
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
37d49b38
编写于
11月 15, 2018
作者:
T
Tao Luo
提交者:
GitHub
11月 15, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #14409 from luotao1/dam_fc
Enhance fc_op for 3-D shape tensor
上级
7a64d48f
1d867805
变更
14
显示空白变更内容
内联
并排
Showing
14 changed file
with
60 addition
and
49 deletion
+60
-49
paddle/fluid/framework/ir/fc_fuse_pass.cc
paddle/fluid/framework/ir/fc_fuse_pass.cc
+1
-0
paddle/fluid/framework/ir/fc_fuse_pass_tester.cc
paddle/fluid/framework/ir/fc_fuse_pass_tester.cc
+1
-0
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
...e/fluid/inference/analysis/ir_passes/subgraph_detector.cc
+1
-1
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+1
-1
paddle/fluid/inference/tests/api/CMakeLists.txt
paddle/fluid/inference/tests/api/CMakeLists.txt
+1
-5
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
+10
-15
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
+0
-3
paddle/fluid/operators/fc_op.cc
paddle/fluid/operators/fc_op.cc
+33
-12
paddle/fluid/operators/hash_op.cc
paddle/fluid/operators/hash_op.cc
+1
-1
paddle/fluid/operators/math/selected_rows_functor.cc
paddle/fluid/operators/math/selected_rows_functor.cc
+1
-1
paddle/fluid/operators/math/sequence_pooling_test.cc
paddle/fluid/operators/math/sequence_pooling_test.cc
+4
-4
paddle/fluid/operators/merge_ids_op.h
paddle/fluid/operators/merge_ids_op.h
+4
-4
paddle/fluid/operators/ref_by_trainer_id_op.h
paddle/fluid/operators/ref_by_trainer_id_op.h
+1
-1
paddle/fluid/operators/split_ids_op.h
paddle/fluid/operators/split_ids_op.h
+1
-1
未找到文件。
paddle/fluid/framework/ir/fc_fuse_pass.cc
浏览文件 @
37d49b38
...
...
@@ -57,6 +57,7 @@ std::unique_ptr<ir::Graph> FCFusePass::ApplyImpl(
desc
.
SetInput
(
"W"
,
std
::
vector
<
std
::
string
>
({
fc_Y_in
}));
desc
.
SetInput
(
"Bias"
,
std
::
vector
<
std
::
string
>
({
fc_bias_in
}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
fc_out_out
}));
desc
.
SetAttr
(
"in_num_col_dims"
,
mul
->
Op
()
->
GetAttr
(
"x_num_col_dims"
));
desc
.
SetType
(
"fc"
);
auto
fc_node
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
mul
,
elementwise_add
,
mul_out
});
...
...
paddle/fluid/framework/ir/fc_fuse_pass_tester.cc
浏览文件 @
37d49b38
...
...
@@ -29,6 +29,7 @@ void SetOp(ProgramDesc* prog, const std::string& type,
if
(
type
==
"mul"
)
{
op
->
SetInput
(
"X"
,
{
inputs
[
0
]});
op
->
SetInput
(
"Y"
,
{
inputs
[
1
]});
op
->
SetAttr
(
"x_num_col_dims"
,
{
1
});
}
else
if
(
type
==
"elementwise_add"
)
{
op
->
SetInput
(
"X"
,
inputs
);
}
...
...
paddle/fluid/inference/analysis/ir_passes/subgraph_detector.cc
浏览文件 @
37d49b38
...
...
@@ -412,7 +412,7 @@ void DetachDeletedNodes(framework::ir::Graph *graph) {
void
SubGraphFuser
::
ReplaceNodesWithSubGraphs
()
{
auto
subgraphs
=
SubgraphDetector
(
graph_
,
node_inside_subgraph_teller_
)();
for
(
auto
&
subgraph
:
subgraphs
)
{
if
(
subgraph
.
size
()
<=
min_subgraph_size_
)
continue
;
if
(
subgraph
.
size
()
<=
(
size_t
)
min_subgraph_size_
)
continue
;
LOG
(
INFO
)
<<
"detect a subgraph size "
<<
subgraph
.
size
();
std
::
unordered_set
<
Node
*>
subgraph_uniq
(
subgraph
.
begin
(),
subgraph
.
end
());
// replace this sub-graph with the first node. Two steps: 1. Create a Block
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
37d49b38
...
...
@@ -114,7 +114,7 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
// it is either an OP's input or an OP's output.
auto
&
subgraph_nodes
=
*
Agent
(
node
).
subgraph
();
for
(
in
t
index
=
0
;
index
<
block_desc
.
OpSize
();
index
++
)
{
for
(
size_
t
index
=
0
;
index
<
block_desc
.
OpSize
();
index
++
)
{
framework
::
proto
::
OpDesc
*
op
=
block_desc
.
Op
(
index
)
->
Proto
();
auto
correspond_node
=
subgraph_nodes
[
index
];
PADDLE_ENFORCE_EQ
(
correspond_node
->
Name
(),
op
->
type
());
...
...
paddle/fluid/inference/tests/api/CMakeLists.txt
浏览文件 @
37d49b38
...
...
@@ -45,11 +45,7 @@ inference_analysis_api_test(test_analyzer_rnn2 ${RNN2_INSTALL_DIR} analyzer_rnn2
# DAM
set
(
DAM_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/dam"
)
download_model_and_data
(
${
DAM_INSTALL_DIR
}
"DAM_model.tar.gz"
"DAM_data.txt.tar.gz"
)
inference_analysis_test
(
test_analyzer_dam SRCS analyzer_dam_tester.cc
EXTRA_DEPS
${
INFERENCE_EXTRA_DEPS
}
ARGS
--infer_model=
${
DAM_INSTALL_DIR
}
/model
--infer_data=
${
DAM_INSTALL_DIR
}
/data.txt
--use_analysis=0
)
inference_analysis_api_test
(
test_analyzer_dam
${
DAM_INSTALL_DIR
}
analyzer_dam_tester.cc
)
# chinese_ner
set
(
CHINESE_NER_INSTALL_DIR
"
${
INFERENCE_DEMO_INSTALL_DIR
}
/chinese_ner"
)
...
...
paddle/fluid/inference/tests/api/analyzer_dam_tester.cc
浏览文件 @
37d49b38
...
...
@@ -69,7 +69,7 @@ struct DataRecord {
num_lines
++
;
std
::
vector
<
std
::
string
>
data
;
split
(
line
,
','
,
&
data
);
CHECK_EQ
(
data
.
size
(),
2
*
MAX_TURN_NUM
+
3
);
CHECK_EQ
(
data
.
size
(),
(
size_t
)(
2
*
MAX_TURN_NUM
+
3
)
);
// load turn data
std
::
vector
<
int64_t
>
turns_tmp
[
MAX_TURN_NUM
];
for
(
int
i
=
0
;
i
<
MAX_TURN_NUM
;
++
i
)
{
...
...
@@ -197,7 +197,6 @@ TEST(Analyzer_dam, fuse_statis) {
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
if
(
FLAGS_use_analysis
)
{
int
num_ops
;
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
>
(
cfg
);
auto
fuse_statis
=
GetFuseStatis
(
...
...
@@ -205,7 +204,6 @@ TEST(Analyzer_dam, fuse_statis) {
ASSERT_TRUE
(
fuse_statis
.
count
(
"fc_fuse"
));
EXPECT_EQ
(
fuse_statis
.
at
(
"fc_fuse"
),
317
);
EXPECT_EQ
(
num_ops
,
2020
);
}
}
// Compare result of NativeConfig and AnalysisConfig
...
...
@@ -216,11 +214,8 @@ TEST(Analyzer_dam, compare) {
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
SetInput
(
&
input_slots_all
);
if
(
FLAGS_use_analysis
)
{
CompareNativeAndAnalysis
(
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
reinterpret_cast
<
const
PaddlePredictor
::
Config
*>
(
&
cfg
),
input_slots_all
);
}
}
// namespace inference
...
...
paddle/fluid/inference/tests/api/analyzer_vis_tester.cc
浏览文件 @
37d49b38
...
...
@@ -59,9 +59,6 @@ void SetConfig(AnalysisConfig *cfg) {
cfg
->
specify_input_name
=
true
;
// TODO(TJ): fix fusion gru
cfg
->
pass_builder
()
->
DeletePass
(
"fc_gru_fuse_pass"
);
#ifdef PADDLE_WITH_MKLDNN
cfg
->
EnableMKLDNN
();
#endif
}
void
SetInput
(
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
*
inputs
)
{
...
...
paddle/fluid/operators/fc_op.cc
浏览文件 @
37d49b38
...
...
@@ -27,11 +27,9 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
"Out(Output) of Fully Connected should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"W"
),
"W(Input) of Fully Connected should not be null."
);
// NCHW
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
// IO, I=C*H*W
auto
w_dims
=
ctx
->
GetInputDim
(
"W"
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
w_dims
[
1
]});
if
(
ctx
->
HasInput
(
"Bias"
))
{
auto
bias_dims
=
ctx
->
GetInputDim
(
"Bias"
);
...
...
@@ -44,14 +42,32 @@ void FCOp::InferShape(framework::InferShapeContext* ctx) const {
"The shape of Bias must be [1, dim]."
);
}
}
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_mkldnn"
))
{
PADDLE_ENFORCE
(
in_dims
.
size
()
==
2
||
in_dims
.
size
()
==
4
,
"Fully Connected input should be 2-D or 4-D tensor."
);
}
PADDLE_ENFORCE_EQ
(
w_dims
.
size
(),
2UL
,
"Fully Connected input should be 2-D tensor."
);
PADDLE_ENFORCE_EQ
(
framework
::
product
(
in_dims
)
/
in_dims
[
0
],
w_dims
[
0
],
"Fully Connected input and weigth size do not match."
);
int
in_num_col_dims
=
ctx
->
Attrs
().
Get
<
int
>
(
"in_num_col_dims"
);
PADDLE_ENFORCE_GT
(
in_dims
.
size
(),
in_num_col_dims
,
"The input tensor Input's rank of FCOp should be larger than "
"in_num_col_dims."
);
auto
in_mat_dims
=
framework
::
flatten_to_2d
(
in_dims
,
in_num_col_dims
);
PADDLE_ENFORCE_EQ
(
in_mat_dims
[
1
],
w_dims
[
0
],
"Fully Connected input and weigth size do not match. %s, %s"
);
std
::
vector
<
int64_t
>
output_dims
;
output_dims
.
reserve
(
static_cast
<
size_t
>
(
in_num_col_dims
+
1
));
for
(
int
i
=
0
;
i
<
in_num_col_dims
;
++
i
)
{
output_dims
.
push_back
(
in_dims
[
i
]);
}
output_dims
.
push_back
(
w_dims
[
1
]);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_
shape
));
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
output_
dims
));
ctx
->
ShareLoD
(
"Input"
,
"Out"
);
}
...
...
@@ -101,12 +117,15 @@ framework::OpKernelType FCOpGrad::GetExpectedKernelType(
}
void
FCOpMaker
::
Make
()
{
AddInput
(
"Input"
,
"(Tensor), The input tensor of fully connected operator with format "
"(NCHW). "
);
AddInput
(
"Input"
,
"(Tensor), The input tensor of fully connected operator."
);
AddInput
(
"W"
,
"(Tensor), The weight fc op with shape (I, O)."
);
AddInput
(
"Bias"
,
"(Tensor, optional) Bias vector with shape (1 x O"
)
.
AsDispensable
();
AddAttr
<
int
>
(
"in_num_col_dims"
,
"(int, default 1), The fc op can take tensors with more than "
"two dimensions as its inputs."
)
.
SetDefault
(
1
)
.
EqualGreaterThan
(
1
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of fully connected operator. "
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
...
...
@@ -131,13 +150,15 @@ class FCOpKernel : public framework::OpKernel<T> {
auto
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
auto
in_dims
=
input
->
dims
();
auto
w_dims
=
w
->
dims
();
auto
out_dims
=
output
->
dims
();
int
M
=
framework
::
product
(
out_dims
)
/
out_dims
[
out_dims
.
size
()
-
1
];
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
w_data
=
w
->
data
<
T
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
ctx
);
math
::
FCCompute
<
platform
::
CPUDeviceContext
,
T
>
(
blas
,
in_dims
[
0
]
,
w_dims
[
1
],
w_dims
[
0
],
input_data
,
w_data
,
output_data
,
blas
,
M
,
w_dims
[
1
],
w_dims
[
0
],
input_data
,
w_data
,
output_data
,
bias
?
bias
->
data
<
T
>
()
:
NULL
);
// TODO(TJ): fuse act
...
...
paddle/fluid/operators/hash_op.cc
浏览文件 @
37d49b38
...
...
@@ -38,7 +38,7 @@ class HashOp : public framework::OperatorWithKernel {
std
::
vector
<
int64_t
>
out_dims
;
out_dims
.
reserve
(
dims
.
size
()
+
1
);
// copy all dims except the last one
for
(
size_
t
i
=
0u
;
i
!=
dims
.
size
()
-
1
;
++
i
)
{
for
(
in
t
i
=
0u
;
i
!=
dims
.
size
()
-
1
;
++
i
)
{
out_dims
.
emplace_back
(
dims
[
i
]);
}
int
num_hash
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_hash"
);
...
...
paddle/fluid/operators/math/selected_rows_functor.cc
浏览文件 @
37d49b38
...
...
@@ -244,7 +244,7 @@ typename std::enable_if<
std
::
is_same
<
DeviceContext
,
platform
::
CPUDeviceContext
>::
value
>::
type
elementwise_add_to
(
const
DeviceContext
&
ctx
,
BlasT
<
DeviceContext
,
T
>*
blas
,
size_t
data_len
,
const
T
*
in
,
T
*
out
)
{
for
(
int64
_t
i
=
0
;
i
<
data_len
;
i
++
)
{
for
(
size
_t
i
=
0
;
i
<
data_len
;
i
++
)
{
out
[
i
]
+=
in
[
i
];
}
}
...
...
paddle/fluid/operators/math/sequence_pooling_test.cc
浏览文件 @
37d49b38
...
...
@@ -70,11 +70,11 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
EXPECT_EQ
(
in_grad
.
lod
(),
lod
);
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
for
(
int64
_t
i
=
0
;
i
<
in_grad
.
lod
()[
0
].
size
()
-
1
;
++
i
)
{
for
(
size
_t
i
=
0
;
i
<
in_grad
.
lod
()[
0
].
size
()
-
1
;
++
i
)
{
int64_t
begin
=
in_grad
.
lod
()[
0
][
i
];
int64_t
end
=
in_grad
.
lod
()[
0
][
i
+
1
];
paddle
::
framework
::
Tensor
tmp
=
in_grad
.
Slice
(
begin
,
end
);
for
(
int64
_t
j
=
0
;
j
!=
tmp
.
numel
()
/
second_dim
;
++
j
)
{
for
(
size
_t
j
=
0
;
j
!=
tmp
.
numel
()
/
second_dim
;
++
j
)
{
for
(
int64_t
m
=
0
;
m
!=
second_dim
;
++
m
)
{
EXPECT_EQ
(
tmp
.
data
<
T
>
()[
m
+
j
*
second_dim
],
out_grad
.
data
<
T
>
()[
m
+
i
*
second_dim
]);
...
...
@@ -82,11 +82,11 @@ void TestSequencePoolingSum(const paddle::framework::LoD& lod) {
}
}
}
else
{
for
(
int64
_t
i
=
0
;
i
<
cpu_in_grad
.
lod
()[
0
].
size
()
-
1
;
++
i
)
{
for
(
size
_t
i
=
0
;
i
<
cpu_in_grad
.
lod
()[
0
].
size
()
-
1
;
++
i
)
{
int64_t
begin
=
cpu_in_grad
.
lod
()[
0
][
i
];
int64_t
end
=
cpu_in_grad
.
lod
()[
0
][
i
+
1
];
paddle
::
framework
::
Tensor
tmp
=
cpu_in_grad
.
Slice
(
begin
,
end
);
for
(
int64
_t
j
=
0
;
j
!=
tmp
.
numel
()
/
second_dim
;
++
j
)
{
for
(
size
_t
j
=
0
;
j
!=
tmp
.
numel
()
/
second_dim
;
++
j
)
{
for
(
int64_t
m
=
0
;
m
!=
second_dim
;
++
m
)
{
EXPECT_EQ
(
tmp
.
data
<
T
>
()[
m
+
j
*
second_dim
],
cpu_out_grad
.
data
<
T
>
()[
m
+
i
*
second_dim
]);
...
...
paddle/fluid/operators/merge_ids_op.h
浏览文件 @
37d49b38
...
...
@@ -43,11 +43,11 @@ class MergeIdsOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
ids
.
size
(),
outs
.
size
(),
"the number of Ids and Out should be the same"
);
in
t
row_ids_size
=
0
;
size_
t
row_ids_size
=
0
;
int
row_size
=
0
;
int
embedding_size
=
0
;
for
(
in
t
i
=
0
;
i
<
x_tensors
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
x_tensors
.
size
();
++
i
)
{
const
auto
*
x_tensor
=
x_tensors
[
i
];
const
auto
*
row_id
=
row_ids
[
i
];
...
...
@@ -66,7 +66,7 @@ class MergeIdsOpKernel : public framework::OpKernel<T> {
std
::
unordered_map
<
int64_t
,
std
::
tuple
<
int64_t
,
int64_t
>>
selected_rows_idx_map
;
for
(
in
t
i
=
0
;
i
<
x_tensors
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
x_tensors
.
size
();
++
i
)
{
const
auto
*
row_id
=
row_ids
[
i
];
for
(
int
j
=
0
;
j
<
row_id
->
numel
();
++
j
)
{
...
...
@@ -78,7 +78,7 @@ class MergeIdsOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_EQ
(
row_ids_size
,
selected_rows_idx_map
.
size
(),
"the rows and tensor map size should be the same"
);
for
(
in
t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
outs
.
size
();
++
i
)
{
auto
*
out_ids
=
ids
[
i
];
auto
*
out
=
outs
[
i
];
...
...
paddle/fluid/operators/ref_by_trainer_id_op.h
浏览文件 @
37d49b38
...
...
@@ -38,7 +38,7 @@ class RefByTrainerIdKernel : public framework::OpKernel<T> {
}
else
{
trainer_id
=
*
trainer_id_data
;
}
PADDLE_ENFORCE_LT
(
trainer_id
,
in_list
.
size
());
PADDLE_ENFORCE_LT
(
(
size_t
)
trainer_id
,
in_list
.
size
());
out
->
mutable_data
<
T
>
(
context
.
GetPlace
());
out
->
ShareDataWith
(
*
(
in_list
[
trainer_id
]));
}
...
...
paddle/fluid/operators/split_ids_op.h
浏览文件 @
37d49b38
...
...
@@ -64,7 +64,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
out_ids
.
resize
(
outs
.
size
());
// split id by their shard_num.
for
(
in
t
i
=
0
;
i
<
all_ids
.
size
();
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
all_ids
.
size
();
++
i
)
{
T
id
=
all_ids
[
i
];
size_t
shard_id
=
static_cast
<
size_t
>
(
id
)
%
shard_num
;
out_ids
[
shard_id
].
push_back
(
id
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录