Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
91de3b45
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
91de3b45
编写于
4月 08, 2020
作者:
J
jackzhang235
提交者:
GitHub
4月 08, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' into add_dropout
上级
77360ba7
d1b35283
变更
17
显示空白变更内容
内联
并排
Showing
17 changed file
with
44 addition
and
14 deletion
+44
-14
lite/api/cxx_api.cc
lite/api/cxx_api.cc
+0
-2
lite/core/mir/mlu_postprocess_pass.cc
lite/core/mir/mlu_postprocess_pass.cc
+23
-2
lite/kernels/host/multiclass_nms_compute.cc
lite/kernels/host/multiclass_nms_compute.cc
+1
-0
lite/kernels/mlu/bridges/CMakeLists.txt
lite/kernels/mlu/bridges/CMakeLists.txt
+2
-2
lite/kernels/mlu/bridges/act_op.cc
lite/kernels/mlu/bridges/act_op.cc
+1
-0
lite/kernels/mlu/bridges/batch_norm_op.cc
lite/kernels/mlu/bridges/batch_norm_op.cc
+2
-0
lite/kernels/mlu/bridges/concat_op.cc
lite/kernels/mlu/bridges/concat_op.cc
+1
-0
lite/kernels/mlu/bridges/conv_op.cc
lite/kernels/mlu/bridges/conv_op.cc
+1
-0
lite/kernels/mlu/bridges/elementwise_ops.cc
lite/kernels/mlu/bridges/elementwise_ops.cc
+2
-0
lite/kernels/mlu/bridges/fc_op.cc
lite/kernels/mlu/bridges/fc_op.cc
+1
-0
lite/kernels/mlu/bridges/graph.h
lite/kernels/mlu/bridges/graph.h
+0
-4
lite/kernels/mlu/bridges/interpolate_op.cc
lite/kernels/mlu/bridges/interpolate_op.cc
+1
-0
lite/kernels/mlu/bridges/pool_op.cc
lite/kernels/mlu/bridges/pool_op.cc
+1
-0
lite/kernels/mlu/bridges/scale_op.cc
lite/kernels/mlu/bridges/scale_op.cc
+1
-0
lite/kernels/mlu/bridges/softmax_op.cc
lite/kernels/mlu/bridges/softmax_op.cc
+1
-0
lite/kernels/mlu/bridges/test_helper.cc
lite/kernels/mlu/bridges/test_helper.cc
+2
-1
lite/kernels/mlu/bridges/transpose_op.cc
lite/kernels/mlu/bridges/transpose_op.cc
+4
-3
未找到文件。
lite/api/cxx_api.cc
浏览文件 @
91de3b45
...
...
@@ -316,11 +316,9 @@ void Predictor::Build(const cpp::ProgramDesc &desc,
}
}
}
#ifndef LITE_WITH_MLU
if
(
is_quantized_model
)
{
inner_places
.
emplace_back
(
Place
{
TARGET
(
kARM
),
PRECISION
(
kInt8
)});
}
#endif
Program
program
(
desc
,
scope_
,
inner_places
);
...
...
lite/core/mir/mlu_postprocess_pass.cc
浏览文件 @
91de3b45
...
...
@@ -60,8 +60,19 @@ Node* MLUPostprocessPass::InsertCastBefore(const std::string& op_type,
CHECK
(
0
)
<<
"Unsupport cast type"
;
}
cast_op
->
Attach
(
op_desc
,
inst_node
->
AsStmt
().
op
()
->
scope
());
auto
v_places
=
graph
->
valid_places
();
for
(
auto
it
=
v_places
.
begin
();
it
!=
v_places
.
end
();)
{
if
(
it
->
target
!=
TARGET
(
kMLU
)
&&
it
->
target
!=
TARGET
(
kHost
)
&&
it
->
target
!=
TARGET
(
kX86
))
{
it
=
v_places
.
erase
(
it
);
}
else
{
++
it
;
}
}
// create kernels
auto
kernels
=
cast_op
->
CreateKernels
(
graph
->
valid_places
()
);
auto
kernels
=
cast_op
->
CreateKernels
(
v_places
);
std
::
vector
<
std
::
unique_ptr
<
KernelBase
>>
selected_kernels
;
bool
is_found
=
false
;
for
(
auto
&
kernel
:
kernels
)
{
...
...
@@ -150,8 +161,18 @@ Node* MLUPostprocessPass::InsertCastAfter(const std::string& op_type,
cast_op
->
Attach
(
op_desc
,
inst_node
->
AsStmt
().
op
()
->
scope
());
auto
v_places
=
graph
->
valid_places
();
for
(
auto
it
=
v_places
.
begin
();
it
!=
v_places
.
end
();)
{
if
(
it
->
target
!=
TARGET
(
kMLU
)
&&
it
->
target
!=
TARGET
(
kHost
)
&&
it
->
target
!=
TARGET
(
kX86
))
{
it
=
v_places
.
erase
(
it
);
}
else
{
++
it
;
}
}
// create kernels
auto
kernels
=
cast_op
->
CreateKernels
(
graph
->
valid_places
()
);
auto
kernels
=
cast_op
->
CreateKernels
(
v_places
);
std
::
vector
<
std
::
unique_ptr
<
KernelBase
>>
selected_kernels
;
bool
is_found
=
false
;
for
(
auto
&
kernel
:
kernels
)
{
...
...
lite/kernels/host/multiclass_nms_compute.cc
浏览文件 @
91de3b45
...
...
@@ -369,6 +369,7 @@ void MulticlassNmsCompute::Run() {
}
}
else
{
outs
->
Resize
({
static_cast
<
int64_t
>
(
num_kept
),
out_dim
});
(
void
)
outs
->
mutable_data
<
float
>
();
int
offset
=
0
;
int
*
oindices
=
nullptr
;
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
lite/kernels/mlu/bridges/CMakeLists.txt
浏览文件 @
91de3b45
...
...
@@ -3,7 +3,7 @@ if(NOT LITE_WITH_MLU)
endif
()
lite_cc_library
(
subgraph_bridge_utility_mlu SRCS utility.cc DEPS
${
mlu_builder_libs
}
tensor
)
lite_cc_library
(
subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS
${
mlu_builder_libs
}
)
lite_cc_library
(
subgraph_bridge_tensor_mlu SRCS tensor.cc DEPS
${
mlu_builder_libs
}
subgraph_bridge_utility_mlu
)
lite_cc_library
(
subgraph_bridge_graph_mlu SRCS graph.cc DEPS subgraph_bridge_utility_mlu subgraph_bridge_tensor_mlu
)
set
(
mlu_subgraph_bridge_deps subgraph_bridge_registry subgraph_bridge_engine subgraph_bridge_utility_mlu subgraph_bridge_graph_mlu
)
...
...
@@ -49,6 +49,6 @@ lite_cc_test(test_fc_converter_mlu SRCS fc_op_test.cc DEPS scope optimizer targe
lite_cc_test
(
test_scale_converter_mlu SRCS scale_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program
${
mlu_subgraph_bridges
}
subgraph_compute_mlu subgraph_test_helper_mlu
)
lite_cc_test
(
test_interp_converter_mlu SRCS interpolate_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program
${
mlu_subgraph_bridges
}
subgraph_compute_mlu subgraph_test_helper_mlu
)
lite_cc_test
(
test_concat_converter_mlu SRCS concat_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program
${
mlu_subgraph_bridges
}
subgraph_compute_mlu subgraph_test_helper_mlu
)
lite_cc_test
(
test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program
${
mlu_subgraph_bridges
}
subgraph_compute_mlu subgraph_test_helper_mlu
)
#
lite_cc_test(test_transpose_converter_mlu SRCS transpose_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program ${mlu_subgraph_bridges} subgraph_compute_mlu subgraph_test_helper_mlu)
lite_cc_test
(
test_dropout_converter_mlu SRCS dropout_op_test.cc DEPS scope optimizer target_wrapper_host model_parser program
${
mlu_subgraph_bridges
}
subgraph_compute_mlu subgraph_test_helper_mlu
)
message
(
STATUS
"+++++ mlu_subgraph_bridges:
${
mlu_subgraph_bridges
}
"
)
lite/kernels/mlu/bridges/act_op.cc
浏览文件 @
91de3b45
...
...
@@ -60,6 +60,7 @@ int ActConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_tensor
->
mlu_tensor
()));
}
graph
->
FuseOp
(
activation_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
activation_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/batch_norm_op.cc
浏览文件 @
91de3b45
...
...
@@ -81,6 +81,8 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph
->
BindConstData
(
mean_var_name
,
mean
);
graph
->
FuseOp
(
bn_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
bn_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/concat_op.cc
浏览文件 @
91de3b45
...
...
@@ -60,6 +60,7 @@ int ConcatConverter(void* ctx, OpLite* op, KernelBase* kernel) {
&
outputs
,
1
));
graph
->
FuseOp
(
concat_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
concat_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/conv_op.cc
浏览文件 @
91de3b45
...
...
@@ -278,6 +278,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
graph
->
BindConstData
(
filter_var_name
,
filter
);
graph
->
FuseOp
(
conv_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
conv_op
));
return
REBUILD_WHEN_SHAPE_CHANGED
;
}
...
...
lite/kernels/mlu/bridges/elementwise_ops.cc
浏览文件 @
91de3b45
...
...
@@ -117,6 +117,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
graph
->
FuseOp
(
elementwise_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
elementwise_op
));
cnmlBaseOp_t
act_op
;
if
(
op_type
==
"fusion_elementwise_add_activation"
)
{
auto
mid_tensor
=
graph
->
GetNode
(
out_var_name
+
"_mid"
);
...
...
@@ -127,6 +128,7 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
mid_tensor
->
mlu_tensor
(),
output_tensor
->
mlu_tensor
()));
graph
->
FuseOp
(
act_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
act_op
));
}
return
REBUILD_WHEN_SHAPE_CHANGED
;
}
...
...
lite/kernels/mlu/bridges/fc_op.cc
浏览文件 @
91de3b45
...
...
@@ -160,6 +160,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
1
/
*
min_element
(
weight_scale
.
begin
(),
weight_scale
.
end
()));
graph
->
FuseOp
(
fc_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
fc_op
));
return
REBUILD_WHEN_SHAPE_CHANGED
;
}
...
...
lite/kernels/mlu/bridges/graph.h
浏览文件 @
91de3b45
...
...
@@ -49,9 +49,6 @@ class Graph {
~
Graph
()
{
FreeConstData
();
CNML_CALL
(
cnmlDestroyFusionOp
(
&
fusion_op_
));
for
(
auto
op
:
ops_
)
{
CNML_CALL
(
cnmlDestroyBaseOp
(
&
op
));
}
#if PRINT_HW_TIME
CNRT_CALL
(
cnrtDestroyNotifier
(
&
notifier_start_
));
CNRT_CALL
(
cnrtDestroyNotifier
(
&
notifier_end_
));
...
...
@@ -234,7 +231,6 @@ class Graph {
std
::
vector
<
void
*>
output_addrs_
;
std
::
vector
<
std
::
shared_ptr
<
MLUTensor
>>
input_tensors_
;
std
::
vector
<
std
::
shared_ptr
<
MLUTensor
>>
output_tensors_
;
std
::
vector
<
cnmlBaseOp_t
>
ops_
;
cnmlFusionOp_t
fusion_op_
;
std
::
vector
<
void
*>
const_data_storage_
;
#if PRINT_HW_TIME
...
...
lite/kernels/mlu/bridges/interpolate_op.cc
浏览文件 @
91de3b45
...
...
@@ -85,6 +85,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
nn_param
));
CNML_CALL
(
cnmlDestroyNearestNeighborOpParam
(
&
nn_param
));
graph
->
FuseOp
(
interp_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
interp_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/pool_op.cc
浏览文件 @
91de3b45
...
...
@@ -121,6 +121,7 @@ int PoolConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_tensor
->
mlu_tensor
()));
CNML_CALL
(
cnmlDestroyPoolOpParam
(
&
pool_param
));
graph
->
FuseOp
(
pool_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
pool_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/scale_op.cc
浏览文件 @
91de3b45
...
...
@@ -61,6 +61,7 @@ int ScaleConverter(void* ctx, OpLite* op, KernelBase* kernel) {
alpha_tensor
->
mlu_tensor
(),
beta_tensor
->
mlu_tensor
()));
graph
->
FuseOp
(
scale_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
scale_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/softmax_op.cc
浏览文件 @
91de3b45
...
...
@@ -55,6 +55,7 @@ int SoftmaxConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph
->
GetNode
(
x_var_name
)
->
mlu_tensor
(),
output_tensor
->
mlu_tensor
()));
graph
->
FuseOp
(
softmax_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
softmax_op
));
return
SUCCESS
;
}
...
...
lite/kernels/mlu/bridges/test_helper.cc
浏览文件 @
91de3b45
...
...
@@ -89,8 +89,9 @@ void LaunchOp(const std::shared_ptr<lite::OpLite> op,
}
graph
.
Compile
(
CNML_MLU270
,
1
);
graph
.
Compute
(
forward_param
,
queue_
);
CNRT_CALL
(
cnrtSyncQueue
(
queue_
));
for
(
auto
&
output_name
:
output_var_names
)
{
auto
output_tensor
=
scope
->
FindMutableTensor
(
output_name
);
Tensor
temp_out
;
...
...
lite/kernels/mlu/bridges/transpose_op.cc
浏览文件 @
91de3b45
...
...
@@ -61,7 +61,7 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CHECK
(
graph
->
HasNode
(
x_var_name
));
auto
input_tensor
=
graph
->
GetNode
(
x_var_name
);
cnmlBaseOp_t
transpose_op
_
{
nullptr
};
cnmlBaseOp_t
transpose_op
{
nullptr
};
cnmlNdTransposeOpParam_t
transpose_param
{
nullptr
};
...
...
@@ -69,12 +69,13 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
&
transpose_param
,
axis_nhwc
.
data
(),
axis_nhwc
.
size
()));
// Use cnmlCreatexxxOpForward to create op.
CNML_CALL
(
cnmlCreateNdTransposeProOp
(
&
transpose_op
_
,
CNML_CALL
(
cnmlCreateNdTransposeProOp
(
&
transpose_op
,
input_tensor
->
mlu_tensor
(),
output_tensor
->
mlu_tensor
(),
transpose_param
));
graph
->
FuseOp
(
transpose_op_
);
graph
->
FuseOp
(
transpose_op
);
CNML_CALL
(
cnmlDestroyBaseOp
(
&
transpose_op
));
return
SUCCESS
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录