Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
5a9fe9f4
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5a9fe9f4
编写于
5月 31, 2018
作者:
W
wangliu
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'upstream/develop' into develop
上级
0d53906f
8a6f5a88
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
230 addition
and
89 deletion
+230
-89
src/framework/operator.h
src/framework/operator.h
+1
-1
src/framework/program/block_desc.cpp
src/framework/program/block_desc.cpp
+1
-7
src/framework/program/block_desc.h
src/framework/program/block_desc.h
+4
-0
src/framework/program/program-optimize/node.cpp
src/framework/program/program-optimize/node.cpp
+51
-56
src/framework/program/program-optimize/node.h
src/framework/program/program-optimize/node.h
+4
-2
src/framework/program/program-optimize/program_optimize.cpp
src/framework/program/program-optimize/program_optimize.cpp
+143
-7
src/framework/program/program-optimize/program_optimize.h
src/framework/program/program-optimize/program_optimize.h
+9
-4
src/framework/program/program_desc.cpp
src/framework/program/program_desc.cpp
+7
-2
src/operators/kernel/arm/depthwise_conv_kernel.cpp
src/operators/kernel/arm/depthwise_conv_kernel.cpp
+10
-10
未找到文件。
src/framework/operator.h
浏览文件 @
5a9fe9f4
...
...
@@ -151,7 +151,7 @@ class FusionOpMatcher : PaddleMobileObject {
virtual
Node
&
BeginNode
()
{
return
node_
;
}
std
::
string
BeginType
()
{
return
node_
.
Begin
Type
();
}
std
::
string
BeginType
()
{
return
node_
.
Type
();
}
protected:
Node
node_
;
...
...
src/framework/program/block_desc.cpp
浏览文件 @
5a9fe9f4
...
...
@@ -25,13 +25,7 @@ std::vector<std::shared_ptr<VarDesc>> BlockDesc::Vars() const {
return
res
;
}
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
BlockDesc
::
Ops
()
const
{
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
res
;
for
(
const
auto
&
op
:
ops_
)
{
res
.
push_back
(
op
);
}
return
res
;
}
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
BlockDesc
::
Ops
()
const
{
return
ops_
;
}
BlockDesc
::
BlockDesc
(
PaddleMobile__Framework__Proto__BlockDesc
*
desc
)
:
index_
(
desc
->
idx
),
parent_index_
(
desc
->
idx
)
{
...
...
src/framework/program/block_desc.h
浏览文件 @
5a9fe9f4
...
...
@@ -26,6 +26,7 @@ class BlockDesc : PaddleMobileObject {
public:
friend
class
Node
;
friend
class
ProgramOptimize
;
BlockDesc
()
{}
BlockDesc
(
PaddleMobile__Framework__Proto__BlockDesc
*
desc
);
BlockDesc
(
const
BlockDesc
&
block_desc
)
:
index_
(
block_desc
.
index_
),
parent_index_
(
block_desc
.
parent_index_
)
{
...
...
@@ -43,6 +44,8 @@ class BlockDesc : PaddleMobileObject {
const
int
&
ID
()
const
{
return
index_
;
}
const
bool
&
MultiThread
()
const
{
return
multi_thread_
;
}
const
int
&
Parent
()
const
{
return
parent_index_
;
}
bool
operator
==
(
const
paddle_mobile
::
framework
::
BlockDesc
&
in_block
)
const
{
...
...
@@ -58,6 +61,7 @@ class BlockDesc : PaddleMobileObject {
private:
int
index_
;
bool
multi_thread_
;
int
parent_index_
;
std
::
vector
<
std
::
shared_ptr
<
OpDesc
>>
ops_
;
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
VarDesc
>>
vars_
;
...
...
src/framework/program/program-optimize/node.cpp
浏览文件 @
5a9fe9f4
...
...
@@ -45,17 +45,6 @@ bool Node::operator==(const Node &in) {
return
true
;
}
// std::shared_ptr<Node> Node::MatchTheFirstNode(std::string type){
//
// for (const auto &node : outputs_){
// if (node->type_ == type){
// return node;
// }else{
//
// }
// }
//}
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
Node
::
OpDescs
(
uint
size
)
{
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
op_descs
;
OpDescs
(
size
-
1
,
&
op_descs
);
...
...
@@ -75,21 +64,40 @@ void Node::OpDescs(uint index,
void
Node
::
OpDescs
(
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_desc
,
Node
*
node
,
bool
adding_thread
,
int
thread_num
)
{
bool
can_add_split
=
false
;
if
(
outputs_
.
size
()
>
1
)
{
adding_thread
=
false
;
}
bool
can_add_split
=
false
;
// 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
if
(
outputs_
.
size
()
>
1
&&
op_input_output_key
[
op_desc_
->
type_
].
second
.
size
()
==
1
)
{
can_add_split
=
true
;
if
(
op_input_output_key
[
op_desc_
->
type_
].
second
.
size
()
!=
1
)
{
DLOG
<<
"当前 op desc 输出数不为 1 "
;
can_add_split
=
false
;
}
// 遍历当前节点的 output 节点
for
(
const
auto
&
output
:
outputs_
)
{
if
(
op_input_output_key
.
find
(
output
->
op_desc_
->
type_
)
!=
op_input_output_key
.
end
())
{
auto
inputs_and_outputs
=
op_input_output_key
[
output
->
op_desc_
->
type_
];
auto
outputs_of_output
=
output
->
op_desc_
->
Output
(
inputs_and_outputs
.
second
[
0
]);
auto
inputs_of_output
=
output
->
op_desc_
->
Input
(
inputs_and_outputs
.
first
[
0
]);
// 不支持 output 有多个 output 的情况
if
(
output
->
outputs_
.
size
()
>
0
)
{
can_add_split
=
false
;
break
;
}
//与节点关联的 OpDesc
std
::
shared_ptr
<
framework
::
OpDesc
>
&
op_desc
=
output
->
op_desc_
;
//获取这个 op 的 inputs key 和 outputs key
auto
inputs_and_outputs
=
op_input_output_key
[
op_desc
->
type_
];
//判断现在 是否存在这个 op
//判断这个 output 和 input key 的 size 等于 1
if
(
op_input_output_key
.
find
(
op_desc
->
type_
)
!=
op_input_output_key
.
end
()
&&
inputs_and_outputs
.
first
.
size
()
==
1
&&
inputs_and_outputs
.
second
.
size
()
==
1
)
{
auto
inputs_of_output
=
op_desc
->
Input
(
inputs_and_outputs
.
first
[
0
]);
auto
outputs_of_output
=
op_desc
->
Output
(
inputs_and_outputs
.
second
[
0
]);
// 判断一下, 如果输入和输出没有同名, 是支持的
for
(
int
i
=
0
;
i
<
inputs_of_output
.
size
();
++
i
)
{
std
::
string
input_of_output
=
inputs_of_output
[
i
];
for
(
int
j
=
0
;
j
<
outputs_of_output
.
size
();
++
j
)
{
...
...
@@ -101,7 +109,7 @@ void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
}
}
}
}
else
{
}
else
{
// 如果模型中包含没有的 op, 则不支持添加 split
DLOG
<<
"找不到 这个 op 类型: "
<<
output
->
op_desc_
->
type_
;
can_add_split
=
false
;
}
...
...
@@ -124,12 +132,10 @@ void Node::OpDescs(std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
if
(
can_add_split
)
{
adding_thread
=
true
;
std
::
shared_ptr
<
class
OpDesc
>
split_op_desc
=
std
::
make_shared
<
class
OpDesc
>
();
std
::
shared_ptr
<
OpDesc
>
split_op_desc
=
std
::
make_shared
<
OpDesc
>
();
split_op_desc
->
type_
=
G_OP_TYPE_SPLIT
;
auto
outputs
=
this
->
op_desc_
->
Output
(
op_input_output_key
[
this
->
op_desc_
->
Type
()].
second
[
0
]);
split_op_desc
->
inputs_
=
{
{
op_input_output_key
[
G_OP_TYPE_SPLIT
].
first
[
0
],
outputs
}};
auto
&
split_outputs
=
...
...
@@ -157,41 +163,12 @@ std::vector<std::shared_ptr<framework::OpDesc>> Node::OpDescs() {
return
op_descs
;
}
std
::
string
Node
::
ToString
(
std
::
string
blank
,
const
Node
*
node
)
const
{
std
::
stringstream
ss
;
ss
<<
type_
<<
"->
\n
"
;
if
(
inputs_
.
size
()
>
1
&&
node
!=
inputs_
.
back
())
{
return
ss
.
str
();
}
else
if
(
inputs_
.
size
()
>
1
&&
node
==
inputs_
.
back
())
{
ss
<<
"
\n
"
<<
blank
<<
type_
<<
"
\n
"
;
}
for
(
int
i
=
0
;
i
<
outputs_
.
size
();
++
i
)
{
ss
<<
blank
<<
outputs_
[
i
]
->
ToString
(
blank
+
" "
,
this
)
<<
""
;
}
return
ss
.
str
();
}
std
::
string
Node
::
ToString
()
const
{
return
this
->
ToString
(
" "
,
this
);
}
std
::
shared_ptr
<
Node
>
Node
::
To
(
int
size
)
{
std
::
shared_ptr
<
Node
>
node
=
std
::
make_shared
<
Node
>
();
this
->
To
(
size
-
1
,
node
);
return
node
;
}
// Node &Node::To(int size) {
// if (size == 1) {
// this->outputs_.clear();
// }
//
// for (int j = 0; j < this->outputs_.size(); ++j) {
// outputs_[j]->To(size - 1);
// }
// return *this;
//}
void
Node
::
To
(
int
index
,
std
::
shared_ptr
<
Node
>
node
)
{
node
->
type_
=
this
->
type_
;
if
(
index
!=
0
)
{
...
...
@@ -268,6 +245,24 @@ void Node::Folder(
}
}
std
::
string
Node
::
ToString
(
std
::
string
blank
,
const
Node
*
node
)
const
{
std
::
stringstream
ss
;
ss
<<
type_
<<
"->
\n
"
;
if
(
inputs_
.
size
()
>
1
&&
node
!=
inputs_
.
back
())
{
return
ss
.
str
();
}
else
if
(
inputs_
.
size
()
>
1
&&
node
==
inputs_
.
back
())
{
ss
<<
"
\n
"
<<
blank
<<
type_
<<
"
\n
"
;
}
for
(
int
i
=
0
;
i
<
outputs_
.
size
();
++
i
)
{
ss
<<
blank
<<
outputs_
[
i
]
->
ToString
(
blank
+
" "
,
this
)
<<
""
;
}
return
ss
.
str
();
}
std
::
string
Node
::
ToString
()
const
{
return
this
->
ToString
(
" "
,
this
);
}
void
Node
::
Description
()
{
if
(
op_desc_
.
get
())
{
DLOG
<<
*
op_desc_
;
...
...
src/framework/program/program-optimize/node.h
浏览文件 @
5a9fe9f4
...
...
@@ -27,6 +27,8 @@ namespace paddle_mobile {
namespace
framework
{
class
Node
:
PaddleMobileObject
{
friend
class
ProgramOptimize
;
public:
Node
()
{}
explicit
Node
(
const
std
::
string
&
type
)
:
type_
(
type
)
{}
...
...
@@ -42,8 +44,8 @@ class Node : PaddleMobileObject {
std
::
map
<
std
::
string
,
std
::
pair
<
std
::
string
,
std
::
string
>>
change_map
);
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
OpDescs
(
uint
size
);
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
OpDescs
();
std
::
shared_ptr
<
framework
::
OpDesc
>
OpDesc
()
{
return
op_desc_
;
}
std
::
string
Begin
Type
()
{
return
type_
;
}
std
::
shared_ptr
<
framework
::
OpDesc
>
OpDesc
OfNode
()
{
return
op_desc_
;
}
std
::
string
Type
()
{
return
type_
;
}
void
Description
();
private:
...
...
src/framework/program/program-optimize/program_optimize.cpp
浏览文件 @
5a9fe9f4
...
...
@@ -19,11 +19,12 @@ namespace paddle_mobile {
namespace
framework
{
// std::shared_ptr<ProgramDesc> ProgramOptimize::Optimize() {}
std
::
shared_ptr
<
ProgramDesc
>
ProgramOptimize
::
FushionOptimize
(
std
::
shared_ptr
<
ProgramDesc
>
ori_des
)
{
ProgramDesc
*
optimize_program
=
new
ProgramDesc
(
*
ori_des
);
std
::
shared_ptr
<
ProgramDesc
>
ori_des
,
bool
add_split
)
{
// ProgramDesc *optimize_program = new ProgramDesc(*ori_des);
std
::
shared_ptr
<
ProgramDesc
>
optimize_program
=
std
::
make_shared
<
ProgramDesc
>
(
*
ori_des
);
current_block_
=
optimize_program
->
Blocks
().
size
();
for
(
int
i
=
0
;
i
<
optimize_program
->
Blocks
().
size
();
++
i
)
{
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Node
>>
output_nodes
;
...
...
@@ -96,10 +97,145 @@ std::shared_ptr<ProgramDesc> ProgramOptimize::FushionOptimize(
}
// DLOG << "node: \n" << *begin_node;
block
->
ops_
=
begin_node
->
OpDescs
();
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
op_descs
;
GenerateOps
(
&
op_descs
,
begin_node
.
get
());
block
->
ops_
=
op_descs
;
}
for
(
int
m
=
0
;
m
<
new_blocks_
.
size
();
++
m
)
{
std
::
shared_ptr
<
BlockDesc
>
new_block
=
new_blocks_
[
m
];
new_block
->
index_
=
m
+
ori_des
->
blocks_
.
size
();
optimize_program
->
blocks_
.
push_back
(
new_block
);
}
return
optimize_program
;
}
void
ProgramOptimize
::
GenerateOps
(
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_desc
,
Node
*
input_node
,
Node
*
current_node
,
bool
adding_thread
,
int
thread_num
,
std
::
shared_ptr
<
BlockDesc
>
new_block
)
{
if
(
current_node
->
outputs_
.
size
()
>
1
)
{
adding_thread
=
false
;
}
bool
can_add_split
=
false
;
// 如果当前节点有多个输出 并且 只有当前节点对应的 op_desc_ 输出数为 1 时支持
if
(
current_node
->
outputs_
.
size
()
>
1
&&
op_input_output_key
[
current_node
->
op_desc_
->
type_
].
second
.
size
()
==
1
)
{
can_add_split
=
true
;
// 遍历当前节点的 output 节点
for
(
const
auto
&
output
:
current_node
->
outputs_
)
{
// 不支持 output 有多个 output 的情况
if
(
output
->
outputs_
.
size
()
>
1
)
{
DLOG
<<
"don't support multi output of output"
;
can_add_split
=
false
;
break
;
}
//与节点关联的 OpDesc
std
::
shared_ptr
<
framework
::
OpDesc
>
&
op_desc
=
output
->
op_desc_
;
//获取这个 op 的 inputs key 和 outputs key
auto
inputs_and_outputs
=
op_input_output_key
[
op_desc
->
type_
];
//判断现在 是否存在这个 op
//判断这个 output 和 input key 的 size 等于 1
if
(
op_input_output_key
.
find
(
op_desc
->
type_
)
!=
op_input_output_key
.
end
()
&&
inputs_and_outputs
.
first
.
size
()
==
1
&&
inputs_and_outputs
.
second
.
size
()
==
1
)
{
auto
inputs_of_output
=
op_desc
->
Input
(
inputs_and_outputs
.
first
[
0
]);
auto
outputs_of_output
=
op_desc
->
Output
(
inputs_and_outputs
.
second
[
0
]);
// 判断一下, 如果输入和输出没有同名, 是支持的
for
(
int
i
=
0
;
i
<
inputs_of_output
.
size
();
++
i
)
{
std
::
string
input_of_output
=
inputs_of_output
[
i
];
for
(
int
j
=
0
;
j
<
outputs_of_output
.
size
();
++
j
)
{
std
::
string
output_of_output
=
outputs_of_output
[
j
];
if
(
input_of_output
==
output_of_output
)
{
DLOG
<<
"output的 output 包含 input"
<<
input_of_output
;
can_add_split
=
false
;
break
;
}
}
}
}
else
{
// 如果模型中包含没有的 op, 则不支持添加 split
DLOG
<<
"找不到 这个 op 类型: "
<<
output
->
op_desc_
->
type_
;
can_add_split
=
false
;
}
}
}
if
(
current_node
->
inputs_
.
size
()
>
1
&&
input_node
!=
current_node
->
inputs_
.
back
())
{
return
;
}
else
if
(
current_node
->
inputs_
.
size
()
>
1
&&
input_node
==
current_node
->
inputs_
.
back
())
{
new_block
.
reset
();
adding_thread
=
false
;
op_desc
->
push_back
(
current_node
->
op_desc_
);
}
else
{
if
(
new_block
.
get
()
&&
adding_thread
)
{
new_block
->
ops_
.
push_back
(
current_node
->
op_desc_
);
}
else
{
op_desc
->
push_back
(
current_node
->
op_desc_
);
}
}
if
(
adding_thread
)
{
Attribute
attr
;
attr
.
Set
<
int
>
(
thread_num
);
current_node
->
op_desc_
->
attrs_
[
"thread"
]
=
attr
;
}
if
(
can_add_split
)
{
new_block
=
std
::
make_shared
<
BlockDesc
>
();
new_block
->
multi_thread_
=
true
;
new_block
->
index_
=
current_block_
;
new_blocks_
.
push_back
(
new_block
);
adding_thread
=
true
;
std
::
shared_ptr
<
OpDesc
>
split_op_desc
=
std
::
make_shared
<
OpDesc
>
();
split_op_desc
->
type_
=
G_OP_TYPE_SPLIT
;
auto
outputs
=
current_node
->
op_desc_
->
Output
(
op_input_output_key
[
current_node
->
op_desc_
->
Type
()].
second
[
0
]);
split_op_desc
->
inputs_
=
{
{
op_input_output_key
[
G_OP_TYPE_SPLIT
].
first
[
0
],
outputs
}};
auto
&
split_outputs
=
split_op_desc
->
outputs_
[
op_input_output_key
[
G_OP_TYPE_SPLIT
].
second
[
0
]];
for
(
const
auto
&
output
:
current_node
->
outputs_
)
{
split_outputs
.
push_back
(
outputs
[
0
]);
}
Attribute
attr
;
attr
.
Set
<
int
>
(
current_block_
);
split_op_desc
->
attrs_
[
"block_id"
]
=
attr
;
op_desc
->
push_back
(
split_op_desc
);
current_block_
++
;
}
for
(
int
i
=
0
;
i
<
current_node
->
outputs_
.
size
();
++
i
)
{
auto
&
output
=
current_node
->
outputs_
[
i
];
if
(
can_add_split
)
{
GenerateOps
(
op_desc
,
current_node
,
output
.
get
(),
adding_thread
,
i
,
new_block
);
}
else
{
GenerateOps
(
op_desc
,
current_node
,
output
.
get
(),
adding_thread
,
thread_num
,
new_block
);
}
}
std
::
shared_ptr
<
ProgramDesc
>
shared_optimzie
(
optimize_program
);
return
shared_optimzie
;
}
void
ProgramOptimize
::
GenerateOps
(
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_descs
,
Node
*
begin_node
)
{
// std::vector<std::shared_ptr<framework::OpDesc>> *op_desc,
// Node *input_node, Node *current_node, bool adding_thread, int
// thread_num
this
->
GenerateOps
(
op_descs
,
begin_node
,
begin_node
,
false
,
-
1
,
nullptr
);
}
}
// namespace framework
}
// namespace paddle_mobile
src/framework/program/program-optimize/program_optimize.h
浏览文件 @
5a9fe9f4
...
...
@@ -28,12 +28,17 @@ class ProgramOptimize {
public:
ProgramOptimize
()
{}
std
::
shared_ptr
<
ProgramDesc
>
FushionOptimize
(
std
::
shared_ptr
<
ProgramDesc
>
ori_des
);
std
::
shared_ptr
<
ProgramDesc
>
ori_des
,
bool
add_split
=
false
);
private:
// std::shared_ptr<ProgramDesc> ori_desc_;
std
::
vector
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
Node
>>>
outputs_nodes_
;
int
current_block_
;
std
::
vector
<
std
::
shared_ptr
<
BlockDesc
>>
new_blocks_
;
void
GenerateOps
(
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_descs
,
Node
*
begin_node
);
void
GenerateOps
(
std
::
vector
<
std
::
shared_ptr
<
framework
::
OpDesc
>>
*
op_desc
,
Node
*
input_node
,
Node
*
current_node
,
bool
adding_thread
,
int
thread_num
,
std
::
shared_ptr
<
BlockDesc
>
new_block
);
};
}
// namespace framework
}
// namespace paddle_mobile
src/framework/program/program_desc.cpp
浏览文件 @
5a9fe9f4
...
...
@@ -32,11 +32,13 @@ void ProgramDesc::Description(std::string header) {
if
(
header
.
size
())
{
LOG
(
kLOG_INFO
)
<<
header
;
}
for
(
const
auto
&
block
:
this
->
blocks_
)
{
for
(
int
i
=
0
;
i
<
this
->
blocks_
.
size
();
++
i
)
{
auto
block
=
this
->
blocks_
[
i
];
LOG
(
kLOG_DEBUG
)
<<
"block: "
<<
block
->
ID
();
LOG
(
kLOG_INFO
)
<<
"block ops size: "
<<
block
->
Ops
().
size
();
for
(
int
j
=
0
;
j
<
block
->
Ops
().
size
();
++
j
)
{
const
auto
&
op
=
block
->
Ops
()[
j
];
auto
op
=
block
->
Ops
()[
j
];
LOG
(
kLOG_DEBUG1
)
<<
"op: "
<<
op
->
Type
();
for
(
auto
&
input
:
op
->
GetInputs
())
{
LOG
(
kLOG_DEBUG2
)
<<
"input parameter: "
<<
input
.
first
;
...
...
@@ -71,6 +73,9 @@ void ProgramDesc::Description(std::string header) {
}
}
}
for
(
const
auto
&
block
:
this
->
blocks_
)
{
}
#endif
}
...
...
src/operators/kernel/arm/depthwise_conv_kernel.cpp
浏览文件 @
5a9fe9f4
...
...
@@ -32,7 +32,7 @@ void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam ¶m) const {
std
::
vector
<
int
>
paddings
=
param
.
Paddings
();
std
::
vector
<
int
>
dilations
=
param
.
Dilations
();
DLOG
<<
" compute end get Attrs "
<<
strides
[
0
];
//
DLOG << " compute end get Attrs " << strides[0];
const
int
batch_size
=
static_cast
<
int
>
(
input
->
dims
()[
0
]);
...
...
@@ -59,17 +59,17 @@ void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam ¶m) const {
col_matrix
.
ShareDataWith
(
col
);
col_matrix
.
Resize
(
col_matrix_shape
);
}
DLOG
<<
" col_shape = "
<<
col_shape
;
DLOG
<<
" col_matrix_shape = "
<<
col_matrix_shape
;
//
DLOG << " col_shape = " << col_shape;
//
DLOG << " col_matrix_shape = " << col_matrix_shape;
framework
::
DDim
input_shape
=
framework
::
slice_ddim
(
input
->
dims
(),
1
,
static_cast
<
int
>
(
input
->
dims
().
size
()));
DLOG
<<
" input_shape = "
<<
input_shape
;
//
DLOG << " input_shape = " << input_shape;
framework
::
DDim
filter_matrix_shape
=
{
filter
.
dims
()[
0
],
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
DLOG
<<
" filter.dims() = "
<<
filter
.
dims
();
//
DLOG << " filter.dims() = " << filter.dims();
framework
::
DDim
output_matrix_shape
=
{
output
->
dims
()[
1
],
...
...
@@ -85,8 +85,8 @@ void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam ¶m) const {
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
Tensor
in_batch
=
input
->
Slice
(
i
,
i
+
1
).
Resize
(
input_shape
);
Tensor
out_batch
=
output
->
Slice
(
i
,
i
+
1
).
Resize
(
output_matrix_shape
);
DLOG
<<
" in_batch.dims() = "
<<
in_batch
.
dims
();
DLOG
<<
" out_batch.dims() = "
<<
out_batch
.
dims
();
//
DLOG << " in_batch.dims() = " << in_batch.dims();
//
DLOG << " out_batch.dims() = " << out_batch.dims();
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
Tensor
in_slice
=
in_batch
.
Slice
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
...
...
@@ -109,9 +109,9 @@ void DepthwiseConvKernel<CPU, float>::Compute(const ConvParam ¶m) const {
// gemm
Tensor
out_slice
=
out_batch
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
DLOG
<<
" out_slice "
<<
out_slice
.
dims
();
DLOG
<<
" filter_slice "
<<
filter_slice
.
dims
();
DLOG
<<
" col_matrix "
<<
col_matrix
.
dims
();
//
DLOG << " out_slice " << out_slice.dims();
//
DLOG << " filter_slice " << filter_slice.dims();
//
DLOG << " col_matrix " << col_matrix.dims();
math
::
matmul
<
float
>
(
filter_slice
,
false
,
col_matrix
,
false
,
static_cast
<
float
>
(
1
),
&
out_slice
,
static_cast
<
float
>
(
0
));
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录