Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a5b73e42
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a5b73e42
编写于
4月 10, 2020
作者:
B
baolei.an
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[LITE][BM] fix reshape infer shape issue, test=develop
上级
40a31442
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
470 addition
and
34 deletion
+470
-34
lite/api/CMakeLists.txt
lite/api/CMakeLists.txt
+5
-1
lite/api/_paddle_use_ops.h
lite/api/_paddle_use_ops.h
+1
-0
lite/api/test_yolov3_lite_bm.cc
lite/api/test_yolov3_lite_bm.cc
+100
-0
lite/kernels/bm/bridges/CMakeLists.txt
lite/kernels/bm/bridges/CMakeLists.txt
+6
-0
lite/kernels/bm/bridges/assign_value_op.cc
lite/kernels/bm/bridges/assign_value_op.cc
+17
-3
lite/kernels/bm/bridges/conv_op.cc
lite/kernels/bm/bridges/conv_op.cc
+0
-1
lite/kernels/bm/bridges/conv_transpose_op.cc
lite/kernels/bm/bridges/conv_transpose_op.cc
+3
-0
lite/kernels/bm/bridges/elementwise_ops.cc
lite/kernels/bm/bridges/elementwise_ops.cc
+0
-1
lite/kernels/bm/bridges/interpolate_op.cc
lite/kernels/bm/bridges/interpolate_op.cc
+1
-0
lite/kernels/bm/bridges/matmul_op.cc
lite/kernels/bm/bridges/matmul_op.cc
+90
-0
lite/kernels/bm/bridges/multiclass_nms_op.cc
lite/kernels/bm/bridges/multiclass_nms_op.cc
+21
-12
lite/kernels/bm/bridges/paddle_use_bridges.h
lite/kernels/bm/bridges/paddle_use_bridges.h
+5
-0
lite/kernels/bm/bridges/reduce_full_op.cc
lite/kernels/bm/bridges/reduce_full_op.cc
+5
-0
lite/kernels/bm/bridges/shape_op.cc
lite/kernels/bm/bridges/shape_op.cc
+61
-0
lite/kernels/bm/bridges/split_op.cc
lite/kernels/bm/bridges/split_op.cc
+100
-0
lite/kernels/bm/bridges/transpose_op.cc
lite/kernels/bm/bridges/transpose_op.cc
+26
-3
lite/kernels/bm/subgraph_compute.cc
lite/kernels/bm/subgraph_compute.cc
+25
-12
lite/kernels/bm/subgraph_compute.h
lite/kernels/bm/subgraph_compute.h
+1
-0
lite/operators/conv_transpose_op.cc
lite/operators/conv_transpose_op.cc
+2
-0
lite/operators/reshape_op.cc
lite/operators/reshape_op.cc
+1
-1
未找到文件。
lite/api/CMakeLists.txt
浏览文件 @
a5b73e42
...
...
@@ -190,7 +190,11 @@ if(WITH_TESTING)
lite_cc_test
(
test_classify_lite_bm SRCS test_classify_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${
ops
}
${
host_kernels
}
${
bm_kernels
}
${
bm_bridges
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/resnet50
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/classify
)
lite_cc_test
(
test_yolov3_lite_bm SRCS test_yolov3_lite_bm.cc
DEPS mir_passes lite_api_test_helper paddle_api_full paddle_api_light gflags utils
${
ops
}
${
host_kernels
}
${
bm_kernels
}
${
bm_bridges
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/yolov3
)
endif
()
endif
()
endif
()
...
...
lite/api/_paddle_use_ops.h
浏览文件 @
a5b73e42
...
...
@@ -63,6 +63,7 @@ USE_LITE_OP(swish)
USE_LITE_OP
(
log
)
USE_LITE_OP
(
exp
)
USE_LITE_OP
(
conv2d_transpose
)
USE_LITE_OP
(
depthwise_conv2d_transpose
)
USE_LITE_OP
(
negative
)
USE_LITE_OP
(
pad2d
)
USE_LITE_OP
(
power
)
...
...
lite/api/test_yolov3_lite_bm.cc
0 → 100644
浏览文件 @
a5b73e42
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <gtest/gtest.h>
#include <fstream>
#include <vector>
#include "lite/api/cxx_api.h"
#include "lite/api/paddle_use_kernels.h"
#include "lite/api/paddle_use_ops.h"
#include "lite/api/paddle_use_passes.h"
#include "lite/api/test_helper.h"
#include "lite/core/op_registry.h"
DEFINE_string
(
input_img_txt_path
,
""
,
"if set input_img_txt_path, read the img filename as input."
);
namespace
paddle
{
namespace
lite
{
void
TestModel
(
const
std
::
vector
<
Place
>&
valid_places
)
{
lite
::
Predictor
predictor
;
std
::
vector
<
std
::
string
>
passes
;
predictor
.
Build
(
FLAGS_model_dir
,
FLAGS_model_dir
+
"/model"
,
FLAGS_model_dir
+
"/params"
,
valid_places
,
passes
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
FLAGS_im_height
,
FLAGS_im_width
})));
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
auto
item_size
=
input_tensor
->
dims
().
production
();
if
(
FLAGS_input_img_txt_path
.
empty
())
{
for
(
int
i
=
0
;
i
<
item_size
;
i
++
)
{
data
[
i
]
=
1
;
}
}
else
{
std
::
fstream
fs
(
FLAGS_input_img_txt_path
,
std
::
ios
::
in
);
if
(
!
fs
.
is_open
())
{
LOG
(
FATAL
)
<<
"open input_img_txt error."
;
}
for
(
int
i
=
0
;
i
<
item_size
;
i
++
)
{
fs
>>
data
[
i
];
}
}
auto
*
image_tensor
=
predictor
.
GetInput
(
1
);
image_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
2
})));
data
=
image_tensor
->
mutable_data
<
float
>
();
data
[
0
]
=
FLAGS_im_height
;
data
[
1
]
=
FLAGS_im_width
;
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
}
auto
start
=
GetCurrentUS
();
for
(
int
i
=
0
;
i
<
FLAGS_repeats
;
++
i
)
{
predictor
.
Run
();
}
LOG
(
INFO
)
<<
"================== Speed Report ==================="
;
LOG
(
INFO
)
<<
"Model: "
<<
FLAGS_model_dir
<<
", threads num "
<<
FLAGS_threads
<<
", warmup: "
<<
FLAGS_warmup
<<
", repeats: "
<<
FLAGS_repeats
<<
", spend "
<<
(
GetCurrentUS
()
-
start
)
/
FLAGS_repeats
/
1000.0
<<
" ms in average."
;
auto
out
=
predictor
.
GetOutputs
();
FILE
*
fp
=
fopen
(
"result.txt"
,
"wb"
);
for
(
int
i
=
0
;
i
<
out
.
size
();
i
++
)
{
auto
*
out_data
=
out
[
i
]
->
data
<
float
>
();
for
(
int
j
=
0
;
j
<
out
[
i
]
->
numel
();
j
++
)
{
fprintf
(
fp
,
"%f
\n
"
,
out_data
[
j
]);
}
}
fclose
(
fp
);
}
TEST
(
Yolov3
,
test_bm
)
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kBM
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kX86
),
PRECISION
(
kFloat
)}});
TestModel
(
valid_places
);
}
}
// namespace lite
}
// namespace paddle
lite/kernels/bm/bridges/CMakeLists.txt
浏览文件 @
a5b73e42
...
...
@@ -32,6 +32,9 @@ lite_cc_library(subgraph_bridge_squeeze_op_bm SRCS squeeze_op.cc DEPS ${bm_subgr
lite_cc_library
(
subgraph_bridge_cast_op_bm SRCS cast_op.cc DEPS
${
bm_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_fill_constant_op_bm SRCS fill_constant_op.cc DEPS
${
bm_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_assign_value_op_bm SRCS assign_value_op.cc DEPS
${
bm_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_shape_op_bm SRCS shape_op.cc DEPS
${
bm_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_split_op_bm SRCS split_op.cc DEPS
${
bm_subgraph_bridge_deps
}
)
lite_cc_library
(
subgraph_bridge_matmul_op_bm SRCS matmul_op.cc DEPS
${
bm_subgraph_bridge_deps
}
)
set
(
bm_subgraph_bridges
subgraph_bridge_registry
...
...
@@ -62,4 +65,7 @@ set(bm_subgraph_bridges
subgraph_bridge_cast_op_bm
subgraph_bridge_fill_constant_op_bm
subgraph_bridge_assign_value_op_bm
subgraph_bridge_shape_op_bm
subgraph_bridge_split_op_bm
subgraph_bridge_matmul_op_bm
CACHE INTERNAL
"bm_subgraph_bridges"
)
lite/kernels/bm/bridges/assign_value_op.cc
浏览文件 @
a5b73e42
...
...
@@ -40,17 +40,31 @@ int AssignValueConverter(void* ctx, OpLite* op, KernelBase* kernel) {
i_output_shape_data
[
i
]
=
static_cast
<
int
>
(
output_dims
[
i
]);
buffer_size
*=
i_output_shape_data
[
i
];
}
auto
fp32_values
=
op_info
->
GetAttr
<
std
::
vector
<
float
>>
(
"fp32_values"
);
std
::
vector
<
float
>
fp32_values
;
std
::
vector
<
int
>
int32_values
;
float
*
assign_data
=
reinterpret_cast
<
float
*>
(
malloc
(
buffer_size
*
sizeof
(
float
)));
CHECK
(
assign_data
!=
nullptr
);
CHECK_EQ
(
buffer_size
,
fp32_values
.
size
());
bm_data_type_t
data_type
=
static_cast
<
bm_data_type_t
>
(
DTYPE_FP32
);
fp32_values
=
op_info
->
GetAttr
<
std
::
vector
<
float
>>
(
"fp32_values"
);
if
(
0
!=
fp32_values
.
size
())
{
for
(
int
i
=
0
;
i
<
fp32_values
.
size
();
i
++
)
{
assign_data
[
i
]
=
fp32_values
[
i
];
}
}
else
{
int32_values
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"int32_values"
);
data_type
=
static_cast
<
bm_data_type_t
>
(
DTYPE_INT32
);
CHECK_EQ
(
buffer_size
,
int32_values
.
size
());
for
(
int
i
=
0
;
i
<
int32_values
.
size
();
i
++
)
{
assign_data
[
i
]
=
int32_values
[
i
];
}
}
bm_add_const_tensor
(
graph
->
GetCompilerHandle
(),
static_cast
<
const
char
*>
(
output_var_name
.
c_str
()),
const_cast
<
const
int
*>
(
i_output_shape_data
.
data
()),
output_dims
.
size
(),
static_cast
<
bm_data_type_t
>
(
DTYPE_FP32
)
,
data_type
,
reinterpret_cast
<
const
void
*>
(
assign_data
));
graph
->
AddNode
(
output_var_name
);
return
SUCCESS
;
...
...
lite/kernels/bm/bridges/conv_op.cc
浏览文件 @
a5b73e42
...
...
@@ -91,7 +91,6 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
dilations
[
1
],
static_cast
<
int
>
(
has_bias
));
graph
->
AddNode
(
output_var_name
);
LOG
(
INFO
)
<<
output_var_name
<<
input_dims
<<
" "
<<
output_dims
;
return
SUCCESS
;
}
...
...
lite/kernels/bm/bridges/conv_transpose_op.cc
浏览文件 @
a5b73e42
...
...
@@ -108,3 +108,6 @@ int ConvTransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
REGISTER_SUBGRAPH_BRIDGE
(
conv2d_transpose
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
ConvTransposeConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
depthwise_conv2d_transpose
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
ConvTransposeConverter
);
lite/kernels/bm/bridges/elementwise_ops.cc
浏览文件 @
a5b73e42
...
...
@@ -65,7 +65,6 @@ int ElementwiseConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
output_dims
=
output
->
dims
();
const
int64_t
*
output_shape_data
=
const_cast
<
const
int64_t
*>
(
&
output_dims
.
data
()[
0
]);
LOG
(
INFO
)
<<
x_dims
<<
" "
<<
output_dims
;
std
::
vector
<
int32_t
>
i_output_shape_data
(
output_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
output_dims
.
size
();
i
++
)
{
i_output_shape_data
[
i
]
=
static_cast
<
int
>
(
output_shape_data
[
i
]);
...
...
lite/kernels/bm/bridges/interpolate_op.cc
浏览文件 @
a5b73e42
...
...
@@ -54,6 +54,7 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
else
{
type
=
0
;
}
is_int
=
false
;
if
(
type
==
2
&&
is_int
)
{
add_upsample_layer
(
graph
->
GetCompilerHandle
(),
const_cast
<
const
int
*>
(
&
i_x_shape_data
[
0
]),
...
...
lite/kernels/bm/bridges/matmul_op.cc
0 → 100644
浏览文件 @
a5b73e42
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include <bmcompiler_op_code.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
bm
{
int
MatMulConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
scope
=
op
->
scope
();
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
auto
unique_op_name
=
lite
::
subgraph
::
bm
::
UniqueName
(
op_type
);
// input
auto
x_var_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindVar
(
x_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
x_dims
=
x
->
dims
();
const
int64_t
*
x_shape_data
=
const_cast
<
const
int64_t
*>
(
&
x_dims
.
data
()[
0
]);
std
::
vector
<
int32_t
>
i_x_shape_data
(
x_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
x_dims
.
size
();
i
++
)
{
i_x_shape_data
[
i
]
=
static_cast
<
int
>
(
x_shape_data
[
i
]);
}
auto
y_var_name
=
op_info
->
Input
(
"Y"
).
front
();
auto
y
=
scope
->
FindVar
(
y_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
y_dims
=
y
->
dims
();
const
int64_t
*
y_shape_data
=
const_cast
<
const
int64_t
*>
(
&
y_dims
.
data
()[
0
]);
std
::
vector
<
int32_t
>
i_y_shape_data
(
y_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
y_dims
.
size
();
i
++
)
{
i_y_shape_data
[
i
]
=
static_cast
<
int
>
(
y_shape_data
[
i
]);
}
// output
auto
output_var_name
=
op_info
->
Output
(
"Out"
).
front
();
bool
transpose_x
=
op_info
->
GetAttr
<
bool
>
(
"transpose_X"
);
bool
transpose_y
=
op_info
->
GetAttr
<
bool
>
(
"transpose_Y"
);
float
alpha
=
op_info
->
GetAttr
<
float
>
(
"alpha"
);
LOG
(
INFO
)
<<
x_dims
<<
" "
<<
y_dims
<<
" "
<<
alpha
<<
" "
<<
transpose_x
<<
" "
<<
transpose_y
;
#if 0
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(x_var_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
scale,
static_cast<const char*>(unique_op_scale_name.c_str()),
BINARY_MUL,
0);
add_const_binary_layer(graph->GetCompilerHandle(),
static_cast<const char*>(unique_op_scale_name.c_str()),
const_cast<const int*>(&i_x_shape_data[0]),
x_dims.size(),
bias,
static_cast<const char*>(output_var_name.c_str()),
BINARY_ADD,
0);
#endif
graph
->
AddNode
(
output_var_name
);
return
SUCCESS
;
}
}
// namespace bm
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
matmul
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
MatMulConverter
);
lite/kernels/bm/bridges/multiclass_nms_op.cc
浏览文件 @
a5b73e42
...
...
@@ -45,14 +45,6 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
i_score_shape_data
[
i
]
=
static_cast
<
int32_t
>
(
score_dims
[
i
]);
}
auto
out_var_name
=
op_info
->
Output
(
"Out"
).
front
();
auto
out
=
scope
->
FindVar
(
out_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
out_dims
=
out
->
dims
();
std
::
vector
<
int32_t
>
i_out_shape_data
(
out_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
out_dims
.
size
();
i
++
)
{
i_out_shape_data
[
i
]
=
static_cast
<
int32_t
>
(
out_dims
[
i
]);
}
auto
background_label
=
op_info
->
GetAttr
<
int
>
(
"background_label"
);
auto
keep_top_k
=
op_info
->
GetAttr
<
int
>
(
"keep_top_k"
);
auto
nms_top_k
=
op_info
->
GetAttr
<
int
>
(
"nms_top_k"
);
...
...
@@ -64,6 +56,26 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
normalized
=
op_info
->
GetAttr
<
bool
>
(
"normalized"
);
}
auto
out_var_name
=
op_info
->
Output
(
"Out"
).
front
();
auto
out
=
scope
->
FindVar
(
out_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
std
::
vector
<
int64_t
>
vec_out_dim
(
score_dims
.
size
());
if
(
3
==
score_dims
.
size
())
{
vec_out_dim
[
0
]
=
score_dims
[
0
];
// batch_size
vec_out_dim
[
1
]
=
keep_top_k
;
vec_out_dim
[
2
]
=
6
;
}
else
{
vec_out_dim
[
0
]
=
keep_top_k
;
vec_out_dim
[
1
]
=
6
;
}
DDimLite
out_dims
(
vec_out_dim
);
out
->
Resize
(
out_dims
);
out
->
mutable_data
<
float
>
();
std
::
vector
<
int32_t
>
i_out_shape_data
(
out_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
out_dims
.
size
();
i
++
)
{
i_out_shape_data
[
i
]
=
static_cast
<
int32_t
>
(
out_dims
[
i
]);
}
user_cpu_param_t
bm_param
;
bm_param
.
op_type
=
USER_PADDLE_MULTICLASS_NMS
;
bm_param
.
u
.
multiclass_nms_param
.
background_label
=
background_label
;
...
...
@@ -88,12 +100,9 @@ int MultiClassNMSConverter(void* ctx, OpLite* op, KernelBase* kernel) {
int32_t
*
out_shape
[
1
];
int32_t
out_dim
[
1
];
const
char
*
out_name
[
1
];
i_out_shape_data
[
0
]
=
keep_top_k
;
i_out_shape_data
[
1
]
=
6
;
out_shape
[
0
]
=
&
i_out_shape_data
[
0
];
out_dim
[
0
]
=
2
;
out_dim
[
0
]
=
out_dims
.
size
()
;
out_name
[
0
]
=
static_cast
<
const
char
*>
(
out_var_name
.
c_str
());
add_user_cpu_layer
(
graph
->
GetCompilerHandle
(),
input_num
,
in_shape
,
...
...
lite/kernels/bm/bridges/paddle_use_bridges.h
浏览文件 @
a5b73e42
...
...
@@ -48,8 +48,13 @@ USE_SUBGRAPH_BRIDGE(slice, kBM);
USE_SUBGRAPH_BRIDGE
(
conv2d_transpose
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
reduce_sum
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
reduce_mean
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
reduce_max
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
squeeze
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
squeeze2
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
cast
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
fill_constant
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
assign_value
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
depthwise_conv2d_transpose
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
shape
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
split
,
kBM
);
USE_SUBGRAPH_BRIDGE
(
matmul
,
kBM
);
lite/kernels/bm/bridges/reduce_full_op.cc
浏览文件 @
a5b73e42
...
...
@@ -49,6 +49,8 @@ int ReduceFullConverter(void* ctx, OpLite* op, KernelBase* kernel) {
op_code
=
REDUCE_SUM
;
}
else
if
(
op_type
==
"reduce_mean"
)
{
op_code
=
REDUCE_MEAN
;
}
else
if
(
op_type
==
"reduce_max"
)
{
op_code
=
REDUCE_MAX
;
}
add_reduce_full_layer
(
graph
->
GetCompilerHandle
(),
...
...
@@ -75,3 +77,6 @@ REGISTER_SUBGRAPH_BRIDGE(reduce_sum,
REGISTER_SUBGRAPH_BRIDGE
(
reduce_mean
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
ReduceFullConverter
);
REGISTER_SUBGRAPH_BRIDGE
(
reduce_max
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
ReduceFullConverter
);
lite/kernels/bm/bridges/shape_op.cc
0 → 100644
浏览文件 @
a5b73e42
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_defs.h>
#include <bmcompiler_if.h>
#include <bmcompiler_if_lite.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
bm
{
int
ShapeConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
scope
=
op
->
scope
();
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
// input
auto
x_var_name
=
op_info
->
Input
(
"Input"
).
front
();
auto
x
=
scope
->
FindVar
(
x_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
x_dims
=
x
->
dims
();
// output
auto
output_var_name
=
op_info
->
Output
(
"Out"
).
front
();
std
::
vector
<
int32_t
>
i_x_shape_data
(
x_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
x_dims
.
size
();
i
++
)
{
i_x_shape_data
[
i
]
=
static_cast
<
int32_t
>
(
x_dims
[
i
]);
}
add_shape_ref_layer
(
graph
->
GetCompilerHandle
(),
static_cast
<
const
char
*>
(
x_var_name
.
c_str
()),
const_cast
<
const
int
*>
(
i_x_shape_data
.
data
()),
x_dims
.
size
(),
static_cast
<
const
char
*>
(
output_var_name
.
c_str
()));
graph
->
AddNode
(
output_var_name
);
return
SUCCESS
;
}
}
// namespace bm
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
shape
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
ShapeConverter
);
lite/kernels/bm/bridges/split_op.cc
0 → 100755
浏览文件 @
a5b73e42
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <bmcompiler_if.h>
#include <bmcompiler_op_code.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
subgraph
{
namespace
bm
{
int
SplitConverter
(
void
*
ctx
,
OpLite
*
op
,
KernelBase
*
kernel
)
{
CHECK
(
ctx
!=
nullptr
);
CHECK
(
op
!=
nullptr
);
auto
graph
=
static_cast
<
Graph
*>
(
ctx
);
auto
scope
=
op
->
scope
();
auto
op_info
=
op
->
op_info
();
auto
op_type
=
op_info
->
Type
();
// input
auto
x_var_name
=
op_info
->
Input
(
"X"
).
front
();
auto
x
=
scope
->
FindVar
(
x_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
auto
x_dims
=
x
->
dims
();
const
int64_t
*
x_shape_data
=
const_cast
<
const
int64_t
*>
(
&
x_dims
.
data
()[
0
]);
std
::
vector
<
int32_t
>
i_x_shape_data
(
x_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
x_dims
.
size
();
i
++
)
{
i_x_shape_data
[
i
]
=
static_cast
<
int
>
(
x_shape_data
[
i
]);
}
// output
auto
output_names
=
op_info
->
Output
(
"Out"
);
auto
axis
=
op_info
->
GetAttr
<
int
>
(
"axis"
);
auto
num
=
op_info
->
GetAttr
<
int
>
(
"num"
);
auto
sections
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"sections"
);
if
(
0
==
num
)
{
num
=
sections
.
size
();
}
if
(
0
==
sections
.
size
())
{
for
(
size_t
i
=
0
;
i
<
num
;
i
++
)
{
sections
.
push_back
(
x_dims
[
axis
]
/
num
);
}
}
int
**
shape
=
new
int
*
[
num
];
int
*
dim
=
new
int
[
num
];
const
char
**
name
=
new
const
char
*
[
num
];
for
(
size_t
i
=
0
;
i
<
num
;
i
++
)
{
auto
out
=
scope
->
FindVar
(
output_names
[
i
])
->
GetMutable
<
lite
::
Tensor
>
();
name
[
i
]
=
static_cast
<
const
char
*>
(
output_names
[
i
].
c_str
());
auto
out_dims
=
out
->
dims
();
shape
[
i
]
=
new
int
[
out_dims
.
size
()];
for
(
size_t
j
=
0
;
j
<
out_dims
.
size
();
j
++
)
{
shape
[
i
][
j
]
=
out_dims
[
j
];
}
dim
[
i
]
=
out_dims
.
size
();
}
add_tf_split_layer
(
graph
->
GetCompilerHandle
(),
const_cast
<
const
int
*>
(
&
i_x_shape_data
[
0
]),
x_dims
.
size
(),
static_cast
<
const
char
*>
(
x_var_name
.
c_str
()),
num
,
shape
,
dim
,
name
,
x_dims
.
size
(),
axis
,
const_cast
<
const
int
*>
(
&
sections
[
0
]),
num
);
for
(
size_t
i
=
0
;
i
<
num
;
i
++
)
{
graph
->
AddNode
(
output_names
[
i
]);
delete
[]
shape
[
i
];
}
delete
[]
shape
;
delete
[]
name
;
delete
[]
dim
;
return
SUCCESS
;
}
}
// namespace bm
}
// namespace subgraph
}
// namespace lite
}
// namespace paddle
REGISTER_SUBGRAPH_BRIDGE
(
split
,
kBM
,
paddle
::
lite
::
subgraph
::
bm
::
SplitConverter
);
lite/kernels/bm/bridges/transpose_op.cc
浏览文件 @
a5b73e42
...
...
@@ -15,6 +15,7 @@
#include <bmcompiler_defs.h>
#include <bmcompiler_if.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
namespace
paddle
{
...
...
@@ -39,11 +40,20 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
const
int64_t
*
output_shape_data
=
const_cast
<
const
int64_t
*>
(
&
output_dims
.
data
()[
0
]);
std
::
vector
<
int32_t
>
i_x_shape_data
(
x_dims
.
size
());
std
::
vector
<
int32_t
>
i_output_shape_data
(
output
_dims
.
size
());
std
::
vector
<
int32_t
>
i_output_shape_data
(
x
_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
x_dims
.
size
();
i
++
)
{
i_x_shape_data
[
i
]
=
static_cast
<
int
>
(
x_shape_data
[
i
]);
}
for
(
size_t
i
=
0
;
i
<
output_dims
.
size
();
i
++
)
{
auto
out_name
=
output_var_name
;
if
(
x_dims
.
size
()
>
output_dims
.
size
())
{
for
(
size_t
i
=
0
;
i
<
(
x_dims
.
size
()
-
output_dims
.
size
());
i
++
)
{
i_output_shape_data
[
i
]
=
1
;
}
out_name
=
lite
::
subgraph
::
bm
::
UniqueName
(
op_type
);
}
for
(
size_t
i
=
(
x_dims
.
size
()
-
output_dims
.
size
());
i
<
output_dims
.
size
();
i
++
)
{
i_output_shape_data
[
i
]
=
static_cast
<
int
>
(
output_shape_data
[
i
]);
}
auto
axis
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
"axis"
);
...
...
@@ -53,9 +63,22 @@ int TransposeConverter(void* ctx, OpLite* op, KernelBase* kernel) {
const_cast
<
const
int
*>
(
&
i_x_shape_data
[
0
]),
x_dims
.
size
(),
DTYPE_FP32
,
static_cast
<
const
char
*>
(
out
put_var
_name
.
c_str
()),
static_cast
<
const
char
*>
(
out_name
.
c_str
()),
NULL
,
const_cast
<
const
int
*>
(
&
axis
[
0
]));
if
(
x_dims
.
size
()
>
output_dims
.
size
())
{
std
::
vector
<
int32_t
>
i_real_output_shape_data
(
output_dims
.
size
());
for
(
size_t
i
=
0
;
i
<
output_dims
.
size
();
i
++
)
{
i_real_output_shape_data
[
i
]
=
static_cast
<
int
>
(
output_shape_data
[
i
]);
}
add_reshape_layer_v2
(
graph
->
GetCompilerHandle
(),
static_cast
<
const
char
*>
(
out_name
.
c_str
()),
const_cast
<
const
int
*>
(
&
i_output_shape_data
[
0
]),
i_output_shape_data
.
size
(),
static_cast
<
const
char
*>
(
output_var_name
.
c_str
()),
const_cast
<
const
int
*>
(
&
i_real_output_shape_data
[
0
]),
output_dims
.
size
());
}
graph
->
AddNode
(
output_var_name
);
return
SUCCESS
;
}
...
...
lite/kernels/bm/subgraph_compute.cc
浏览文件 @
a5b73e42
...
...
@@ -88,18 +88,27 @@ int SubgraphEngine::BuildDeviceProgram() {
// output
origin_odims_
.
resize
(
output_names_
.
size
());
origin_otensors_
.
resize
(
output_names_
.
size
());
device_outputs_
.
resize
(
output_names_
.
size
());
for
(
size_t
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
origin_otensors_
[
i
]
=
scope_
->
FindMutableTensor
(
net_info_
->
output_names
[
i
]);
CHECK
(
origin_otensors_
[
i
]);
origin_odims_
[
i
]
=
origin_otensors_
[
i
]
->
dims
();
origin_otensors_
[
i
]
->
mutable_data
<
float
>
();
device_outputs_
.
resize
(
net_info_
->
output_num
);
int
out_index
=
0
;
for
(
int
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
outname_map_
.
insert
(
std
::
pair
<
std
::
string
,
int
>
(
output_names_
[
i
],
i
));
}
for
(
int
i
=
0
;
i
<
net_info_
->
output_num
;
i
++
)
{
Tensor
*
t_cur
=
scope_
->
FindMutableTensor
(
net_info_
->
output_names
[
i
]);
CHECK
(
t_cur
!=
nullptr
);
bm_device_mem_t
*
p_mem
=
static_cast
<
bm_device_mem_t
*>
(
malloc
(
sizeof
(
bm_device_mem_t
)));
CHECK
(
p_mem
!=
nullptr
);
CHECK_EQ
(
bm_malloc_device_byte
(
bm_hd_
,
p_mem
,
origin_otensors_
[
i
]
->
memory_size
()),
BM_SUCCESS
);
if
(
outname_map_
.
find
(
net_info_
->
output_names
[
i
])
!=
outname_map_
.
end
())
{
origin_otensors_
[
out_index
]
=
t_cur
;
origin_odims_
[
out_index
]
=
origin_otensors_
[
out_index
]
->
dims
();
origin_otensors_
[
out_index
]
->
mutable_data
<
float
>
();
out_index
+=
1
;
}
CHECK_EQ
(
bm_malloc_device_byte
(
bm_hd_
,
p_mem
,
net_info_
->
max_output_bytes
[
i
]),
BM_SUCCESS
);
bmrt_tensor_with_device
(
&
device_outputs_
[
i
],
*
p_mem
,
net_info_
->
output_dtypes
[
i
],
...
...
@@ -123,10 +132,14 @@ int SubgraphEngine::LaunchDeviceProgram() {
true
,
false
);
bm_thread_sync
(
bm_hd_
);
int
out_index
=
0
;
for
(
size_t
i
=
0
;
i
<
device_outputs_
.
size
();
i
++
)
{
bm_memcpy_d2s
(
bm_hd_
,
const_cast
<
void
*>
(
origin_otensors_
[
i
]
->
raw_data
()),
device_outputs_
[
i
].
device_mem
);
if
(
outname_map_
.
find
(
net_info_
->
output_names
[
i
])
!=
outname_map_
.
end
())
{
bm_memcpy_d2s
(
bm_hd_
,
const_cast
<
void
*>
(
origin_otensors_
[
out_index
]
->
raw_data
()),
device_outputs_
[
i
].
device_mem
);
out_index
++
;
}
}
return
0
;
}
...
...
lite/kernels/bm/subgraph_compute.h
浏览文件 @
a5b73e42
...
...
@@ -51,6 +51,7 @@ class SubgraphEngine : public subgraph::Engine {
void
*
bmrt_hd_
;
std
::
vector
<
bm_tensor_t
>
device_inputs_
;
std
::
vector
<
bm_tensor_t
>
device_outputs_
;
std
::
map
<
std
::
string
,
int
>
outname_map_
;
const
char
**
net_names_
;
const
bm_net_info_t
*
net_info_
;
bm_handle_t
bm_hd_
;
...
...
lite/operators/conv_transpose_op.cc
浏览文件 @
a5b73e42
...
...
@@ -157,3 +157,5 @@ bool ConvTransposeOpLite::AttachImpl(const cpp::OpDesc& op_desc,
REGISTER_LITE_OP
(
conv2d_transpose
,
paddle
::
lite
::
operators
::
ConvTransposeOpLite
);
REGISTER_LITE_OP
(
depthwise_conv2d_transpose
,
paddle
::
lite
::
operators
::
ConvTransposeOpLite
);
lite/operators/reshape_op.cc
浏览文件 @
a5b73e42
...
...
@@ -37,7 +37,7 @@ bool ReshapeOp::InferShapeImpl() const {
for
(
size_t
i
=
0
;
i
<
shape_tensor_vct
.
size
();
i
++
)
{
final_shape
[
i
]
=
shape_tensor_vct
[
i
]
->
data
<
int
>
()[
0
];
}
}
else
if
(
shape_tensor
!=
nullptr
)
{
}
else
if
(
shape_tensor
!=
nullptr
&&
shape_tensor
->
data
<
int
>
()
!=
nullptr
)
{
auto
*
shape_tensor_data
=
shape_tensor
->
data
<
int
>
();
final_shape
=
std
::
vector
<
int
>
(
shape_tensor_data
,
shape_tensor_data
+
shape_tensor
->
numel
());
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录