Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
a30b9743
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a30b9743
编写于
7月 16, 2020
作者:
D
dingweihao
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle-Lite
into dwh_dev
上级
6c9d5b4a
00b0344b
变更
56
隐藏空白更改
内联
并排
Showing
56 changed file
with
530 addition
and
230 deletion
+530
-230
cmake/external/flatbuffers.cmake
cmake/external/flatbuffers.cmake
+1
-1
lite/api/test_yolov3_lite_bm.cc
lite/api/test_yolov3_lite_bm.cc
+3
-3
lite/backends/arm/math/activation.cc
lite/backends/arm/math/activation.cc
+0
-18
lite/backends/arm/math/activation.h
lite/backends/arm/math/activation.h
+0
-6
lite/core/mir/fusion/conv_bn_fuser.cc
lite/core/mir/fusion/conv_bn_fuser.cc
+2
-1
lite/core/mir/fusion/transpose_softmax_transpose_fuser.cc
lite/core/mir/fusion/transpose_softmax_transpose_fuser.cc
+6
-5
lite/core/mir/graph_visualize_pass.cc
lite/core/mir/graph_visualize_pass.cc
+5
-3
lite/core/program.cc
lite/core/program.cc
+2
-2
lite/core/program.h
lite/core/program.h
+2
-1
lite/kernels/arm/CMakeLists.txt
lite/kernels/arm/CMakeLists.txt
+0
-1
lite/kernels/bm/bridges/batch_norm_op.cc
lite/kernels/bm/bridges/batch_norm_op.cc
+13
-4
lite/kernels/bm/bridges/density_prior_box_op.cc
lite/kernels/bm/bridges/density_prior_box_op.cc
+1
-0
lite/kernels/bm/bridges/interpolate_op.cc
lite/kernels/bm/bridges/interpolate_op.cc
+2
-0
lite/kernels/bm/subgraph_compute.cc
lite/kernels/bm/subgraph_compute.cc
+3
-2
lite/kernels/host/CMakeLists.txt
lite/kernels/host/CMakeLists.txt
+1
-0
lite/kernels/host/activation_grad_compute.cc
lite/kernels/host/activation_grad_compute.cc
+98
-0
lite/kernels/host/activation_grad_compute.h
lite/kernels/host/activation_grad_compute.h
+21
-3
lite/kernels/mlu/bridges/conv_op.cc
lite/kernels/mlu/bridges/conv_op.cc
+2
-3
lite/kernels/mlu/bridges/conv_op_test.cc
lite/kernels/mlu/bridges/conv_op_test.cc
+6
-4
lite/kernels/mlu/bridges/fc_op.cc
lite/kernels/mlu/bridges/fc_op.cc
+2
-2
lite/kernels/mlu/bridges/fc_op_test.cc
lite/kernels/mlu/bridges/fc_op_test.cc
+6
-5
lite/kernels/mlu/bridges/lrn_op.cc
lite/kernels/mlu/bridges/lrn_op.cc
+1
-2
lite/kernels/mlu/bridges/lrn_op_test.cc
lite/kernels/mlu/bridges/lrn_op_test.cc
+4
-3
lite/kernels/mlu/subgraph_compute.h
lite/kernels/mlu/subgraph_compute.h
+3
-2
lite/model_parser/base/block_desc.h
lite/model_parser/base/block_desc.h
+8
-2
lite/model_parser/base/op_desc.h
lite/model_parser/base/op_desc.h
+3
-1
lite/model_parser/base/program_desc.h
lite/model_parser/base/program_desc.h
+4
-1
lite/model_parser/base/vector_view.h
lite/model_parser/base/vector_view.h
+1
-0
lite/model_parser/compatible_pb.cc
lite/model_parser/compatible_pb.cc
+1
-1
lite/model_parser/flatbuffers/CMakeLists.txt
lite/model_parser/flatbuffers/CMakeLists.txt
+3
-6
lite/model_parser/flatbuffers/block_desc.cc
lite/model_parser/flatbuffers/block_desc.cc
+16
-4
lite/model_parser/flatbuffers/block_desc.h
lite/model_parser/flatbuffers/block_desc.h
+34
-9
lite/model_parser/flatbuffers/io.cc
lite/model_parser/flatbuffers/io.cc
+37
-0
lite/model_parser/flatbuffers/io.h
lite/model_parser/flatbuffers/io.h
+28
-0
lite/model_parser/flatbuffers/op_desc.h
lite/model_parser/flatbuffers/op_desc.h
+4
-5
lite/model_parser/flatbuffers/program_desc.cc
lite/model_parser/flatbuffers/program_desc.cc
+9
-2
lite/model_parser/flatbuffers/program_desc.h
lite/model_parser/flatbuffers/program_desc.h
+44
-5
lite/model_parser/flatbuffers/var_desc.h
lite/model_parser/flatbuffers/var_desc.h
+8
-8
lite/model_parser/flatbuffers/vector_view.h
lite/model_parser/flatbuffers/vector_view.h
+17
-5
lite/model_parser/general/block_desc.cc
lite/model_parser/general/block_desc.cc
+12
-0
lite/model_parser/general/block_desc.h
lite/model_parser/general/block_desc.h
+4
-8
lite/model_parser/general/program_desc.cc
lite/model_parser/general/program_desc.cc
+6
-0
lite/model_parser/general/program_desc.h
lite/model_parser/general/program_desc.h
+10
-5
lite/model_parser/model_parser.cc
lite/model_parser/model_parser.cc
+4
-4
lite/model_parser/naive_buffer/block_desc.h
lite/model_parser/naive_buffer/block_desc.h
+0
-10
lite/model_parser/naive_buffer/program_desc.h
lite/model_parser/naive_buffer/program_desc.h
+0
-5
lite/operators/activation_grad_ops.cc
lite/operators/activation_grad_ops.cc
+2
-4
lite/operators/deformable_conv_op.h
lite/operators/deformable_conv_op.h
+1
-1
lite/operators/max_pool_with_index_op.h
lite/operators/max_pool_with_index_op.h
+1
-1
lite/tests/kernels/activation_grad_compute_test.cc
lite/tests/kernels/activation_grad_compute_test.cc
+51
-17
lite/tests/kernels/elementwise_grad_compute_test.cc
lite/tests/kernels/elementwise_grad_compute_test.cc
+0
-40
lite/tests/kernels/sequence_conv_compute_test.cc
lite/tests/kernels/sequence_conv_compute_test.cc
+26
-3
lite/tools/build_android.sh
lite/tools/build_android.sh
+2
-0
lite/tools/build_bm.sh
lite/tools/build_bm.sh
+8
-11
lite/tools/build_ios.sh
lite/tools/build_ios.sh
+1
-0
lite/tools/check_api_approvals.sh
lite/tools/check_api_approvals.sh
+1
-1
未找到文件。
cmake/external/flatbuffers.cmake
浏览文件 @
a30b9743
...
...
@@ -97,7 +97,7 @@ function(compile_flatbuffers_schema_to_cpp_opt TARGET SRC_FBS OPT)
OUTPUT
${
GEN_HEADER
}
COMMAND
"
${
FLATBUFFERS_FLATC_EXECUTABLE
}
"
--cpp --gen-mutable --gen-object-api --reflect-names
--
cpp-ptr-type flatbuffers::unique_ptr
# Used to test with C++98 STL
s
--
force-empty --force-empty-vector
s
${
OPT
}
-I
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/tests/include_test"
-o
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
${
SRC_FBS_DIR
}
"
...
...
lite/api/test_yolov3_lite_bm.cc
浏览文件 @
a30b9743
...
...
@@ -59,9 +59,9 @@ void TestModel(const std::vector<Place>& valid_places) {
}
auto
*
image_tensor
=
predictor
.
GetInput
(
1
);
image_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
2
})));
data
=
image_tensor
->
mutable_data
<
floa
t
>
();
data
[
0
]
=
FLAGS_im_height
;
data
[
1
]
=
FLAGS_im_width
;
auto
*
data_1
=
image_tensor
->
mutable_data
<
in
t
>
();
data
_1
[
0
]
=
FLAGS_im_height
;
data
_1
[
1
]
=
FLAGS_im_width
;
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
...
...
lite/backends/arm/math/activation.cc
浏览文件 @
a30b9743
...
...
@@ -763,24 +763,6 @@ void act_thresholded_relu<float>(
}
}
#ifdef LITE_WITH_TRAIN
template
<
>
void
act_square_grad
(
const
float
*
din
,
const
float
*
dout_grad
,
float
*
din_grad
,
int
size
,
int
threads
)
{
const
float
*
ptr_out_grad
=
dout_grad
;
float
*
ptr_in_grad
=
din_grad
;
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
ptr_in_grad
[
0
]
=
ptr_out_grad
[
0
]
*
2.0
*
din
[
0
];
ptr_out_grad
++
;
ptr_in_grad
++
;
din
++
;
}
}
#endif
}
// namespace math
}
// namespace arm
}
// namespace lite
...
...
lite/backends/arm/math/activation.h
浏览文件 @
a30b9743
...
...
@@ -90,12 +90,6 @@ template <typename T>
void
act_thresholded_relu
(
const
T
*
din
,
T
*
dout
,
int
size
,
float
threshold
,
int
threads
);
#ifdef LITE_WITH_TRAIN
template
<
typename
T
>
void
act_square_grad
(
const
T
*
din
,
const
T
*
dout_grad
,
T
*
din_grad
,
int
size
,
int
threads
);
#endif
}
// namespace math
}
// namespace arm
}
// namespace lite
...
...
lite/core/mir/fusion/conv_bn_fuser.cc
浏览文件 @
a30b9743
...
...
@@ -192,7 +192,8 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
}
else
if
(
is_weight_quantization
)
{
std
::
string
scale_name
=
conv_weight_name
+
"_quant_scale"
;
if
(
conv_op_desc
->
HasAttr
(
scale_name
))
{
auto
scale
=
conv_op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
scale_name
);
std
::
vector
<
float
>
scale
=
conv_op_desc
->
GetAttr
<
std
::
vector
<
float
>>
(
scale_name
);
CHECK_EQ
(
scale
.
size
(),
alpha_tensor
.
numel
());
for
(
size_t
i
=
0
;
i
<
scale
.
size
();
i
++
)
{
scale
[
i
]
*=
alpha_data
[
i
];
...
...
lite/core/mir/fusion/transpose_softmax_transpose_fuser.cc
浏览文件 @
a30b9743
...
...
@@ -84,11 +84,12 @@ cpp::OpDesc TransposeSoftmaxTransposeFuser::GenOpDesc(
op_desc
.
SetInput
(
"X"
,
{
matched
.
at
(
"x1"
)
->
arg
()
->
name
});
op_desc
.
SetOutput
(
"Out"
,
{
matched
.
at
(
"out"
)
->
arg
()
->
name
});
op_desc
.
SetAttr
(
"axis"
,
matched
.
at
(
"transpose1"
)
->
stmt
()
->
op_info
()
->
GetAttr
<
std
::
vector
<
int
>>
(
"axis"
)
.
back
());
*
(
matched
.
at
(
"transpose1"
)
->
stmt
()
->
op_info
()
->
GetAttr
<
std
::
vector
<
int
>>
(
"axis"
)
.
end
()
-
1
));
return
op_desc
;
}
...
...
lite/core/mir/graph_visualize_pass.cc
浏览文件 @
a30b9743
...
...
@@ -62,15 +62,17 @@ std::string Visualize(mir::SSAGraph* graph) {
<<
string_trunc
(
op_info
->
GetAttr
<
std
::
string
>
(
attr_name
))
<<
"
\"
"
;
break
;
case
AttrType
::
FLOATS
:
{
auto
vals
=
op_info
->
GetAttr
<
std
::
vector
<
float
>>
(
attr_name
);
std
::
vector
<
float
>
vals
=
op_info
->
GetAttr
<
std
::
vector
<
float
>>
(
attr_name
);
os
<<
":floats: {"
+
Join
(
vals
,
","
)
<<
"}"
;
}
break
;
case
AttrType
::
INTS
:
{
auto
vals
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
attr_name
);
std
::
vector
<
int
>
vals
=
op_info
->
GetAttr
<
std
::
vector
<
int
>>
(
attr_name
);
os
<<
":ints: {"
+
Join
(
vals
,
","
)
+
"}"
;
}
break
;
case
AttrType
::
STRINGS
:
{
auto
vals
=
op_info
->
GetAttr
<
std
::
vector
<
std
::
string
>>
(
attr_name
);
std
::
vector
<
std
::
string
>
vals
=
op_info
->
GetAttr
<
std
::
vector
<
std
::
string
>>
(
attr_name
);
os
<<
":strings: {"
+
string_trunc
(
Join
(
vals
,
","
))
<<
"}"
;
}
break
;
default:
...
...
lite/core/program.cc
浏览文件 @
a30b9743
...
...
@@ -195,7 +195,7 @@ void Program::Build(const cpp::ProgramDesc& prog) {
CHECK
(
ops_
.
empty
())
<<
"Executor duplicate Build found"
;
// Create operators.
auto
program
=
prog
;
auto
&
program
=
prog
;
CHECK
(
program
.
BlocksSize
());
auto
&
main_block
=
*
program
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
for
(
size_t
i
=
0
;
i
<
main_block
.
OpsSize
();
++
i
)
{
...
...
@@ -262,7 +262,7 @@ void Program::PrepareWorkspace(const cpp::ProgramDesc& prog,
}
};
auto
program
=
prog
;
auto
&
program
=
prog
;
CHECK
(
program
.
BlocksSize
());
for
(
size_t
b
=
0
;
b
<
program
.
BlocksSize
();
++
b
)
{
auto
&
main_block
=
*
program
.
GetBlock
<
cpp
::
BlockDesc
>
(
b
);
...
...
lite/core/program.h
浏览文件 @
a30b9743
...
...
@@ -46,7 +46,8 @@ struct Program {
const
std
::
shared_ptr
<
Scope
>&
root
,
const
std
::
vector
<
Place
>&
valid_places
,
const
std
::
vector
<
std
::
string
>&
var_names
=
{})
:
scope_
(
root
),
valid_places_
(
valid_places
),
desc_
(
desc
)
{
:
scope_
(
root
),
valid_places_
(
valid_places
)
{
desc_
.
CopyFrom
(
desc
);
CHECK
(
scope_
)
<<
"scope should be init first"
;
VLOG
(
4
)
<<
"prepare work"
;
PrepareWorkspace
(
desc
,
var_names
);
...
...
lite/kernels/arm/CMakeLists.txt
浏览文件 @
a30b9743
...
...
@@ -103,7 +103,6 @@ add_kernel(deformable_conv_compute_arm ARM extra SRCS deformable_conv_compute.cc
add_kernel
(
mean_compute_arm ARM extra SRCS mean_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
mean_grad_compute_arm ARM train SRCS mean_grad_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
activation_grad_compute_arm ARM train SRCS activation_grad_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
elementwise_grad_compute_arm ARM train SRCS elementwise_grad_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
mul_grad_compute_arm ARM train SRCS mul_grad_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
add_kernel
(
sgd_compute_arm ARM train SRCS sgd_compute.cc DEPS
${
lite_kernel_deps
}
math_arm
)
...
...
lite/kernels/bm/bridges/batch_norm_op.cc
浏览文件 @
a30b9743
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <bmcompiler_if.h>
#include <math.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
...
...
@@ -64,10 +65,16 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
*
bias_data
=
bias
->
mutable_data
<
float
>
();
auto
*
mean_data
=
mean
->
mutable_data
<
float
>
();
auto
*
variance_data
=
variance
->
mutable_data
<
float
>
();
float
*
new_bias
=
static_cast
<
float
*>
(
malloc
(
bias
->
memory_size
()));
float
*
new_scale
=
static_cast
<
float
*>
(
malloc
(
scale
->
memory_size
()));
CHECK
(
new_bias
!=
nullptr
);
CHECK
(
new_scale
!=
nullptr
);
for
(
int
c
=
0
;
c
<
channel_size
;
c
++
)
{
float
inv_scale
=
1.
f
/
(
std
::
sqrt
(
variance_data
[
c
]
+
epsilon
));
bias_data
[
c
]
=
bias_data
[
c
]
-
inv_scale
*
scale_data
[
c
]
*
mean_data
[
c
];
scale_data
[
c
]
=
inv_scale
*
scale_data
[
c
];
new_bias
[
c
]
=
bias_data
[
c
]
-
inv_scale
*
scale_data
[
c
]
*
mean_data
[
c
];
new_scale
[
c
]
=
inv_scale
*
scale_data
[
c
];
}
const
int
input_num
=
1
;
...
...
@@ -86,11 +93,13 @@ int BatchNormConverter(void* ctx, OpLite* op, KernelBase* kernel) {
output_dims
.
size
(),
static_cast
<
const
char
*>
(
output_var_name
.
c_str
()),
static_cast
<
const
char
*>
(
unique_op_name
.
c_str
()),
static_cast
<
const
float
*>
(
scale
->
mutable_data
<
float
>
()
),
static_cast
<
const
float
*>
(
bias
->
mutable_data
<
float
>
()
),
static_cast
<
const
float
*>
(
new_scale
),
static_cast
<
const
float
*>
(
new_bias
),
1
,
1
,
1
);
free
(
new_scale
);
free
(
new_bias
);
delete
[]
shape
;
delete
[]
name
;
delete
[]
dim
;
...
...
lite/kernels/bm/bridges/density_prior_box_op.cc
浏览文件 @
a30b9743
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <bmcompiler_if.h>
#include <math.h>
#include "lite/kernels/bm/bridges/graph.h"
#include "lite/kernels/bm/bridges/utility.h"
#include "lite/kernels/npu/bridges/registry.h"
...
...
lite/kernels/bm/bridges/interpolate_op.cc
浏览文件 @
a30b9743
...
...
@@ -76,6 +76,8 @@ int InterpolateConverter(void* ctx, OpLite* op, KernelBase* kernel) {
static_cast
<
const
char
*>
(
output_var_name
.
c_str
()),
0
,
0
,
0
,
0
,
type
);
}
graph
->
AddNode
(
output_var_name
);
...
...
lite/kernels/bm/subgraph_compute.cc
浏览文件 @
a30b9743
...
...
@@ -76,9 +76,10 @@ bool SubgraphEngine::BuildDeviceProgram() {
return
false
;
}
}
std
::
string
net_name
=
"bmnetc_f32umodel"
;
std
::
string
net_name
=
"bmnet_f32bmodel"
;
auto
unique_net_name
=
lite
::
subgraph
::
bm
::
UniqueName
(
net_name
);
__bmcompile_opt
(
graph
.
GetCompilerHandle
(),
const_cast
<
char
*>
(
net_name
.
c_str
()),
1
);
graph
.
GetCompilerHandle
(),
const_cast
<
char
*>
(
unique_net_name
.
c_str
()),
2
);
void
*
bmodel_data
=
nullptr
;
unsigned
int
data_size
=
0
;
bm_hd_
=
static_cast
<
bm_handle_t
>
(
ctx
.
GetHandle
());
...
...
lite/kernels/host/CMakeLists.txt
浏览文件 @
a30b9743
...
...
@@ -19,6 +19,7 @@ add_kernel(read_from_array_compute_host Host extra SRCS read_from_array_compute.
add_kernel
(
assign_compute_host Host extra SRCS assign_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
retinanet_detection_output_compute_host Host extra SRCS retinanet_detection_output_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
where_index_compute_host Host extra SRCS where_index_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
activation_grad_compute_host Host train SRCS activation_grad_compute.cc DEPS
${
lite_kernel_deps
}
)
if
(
LITE_BUILD_EXTRA
)
lite_cc_test
(
test_where_index_compute_host SRCS where_index_compute.cc DEPS where_index_compute_host
)
...
...
lite/kernels/host/activation_grad_compute.cc
0 → 100644
浏览文件 @
a30b9743
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/host/activation_grad_compute.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
host
{
void
SquareGradCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
CHECK
(
param
.
X
);
auto
out_grad_dims
=
param
.
Out_grad
->
dims
();
auto
out_grad_data
=
param
.
Out_grad
->
data
<
float
>
();
auto
x_data
=
param
.
X
->
data
<
float
>
();
auto
x_grad_data
=
param
.
X_grad
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out_grad_dims
.
production
();
i
++
)
{
x_grad_data
[
i
]
=
out_grad_data
[
i
]
*
2.0
*
x_data
[
i
];
}
}
void
ReluGradCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
CHECK
(
param
.
X
);
auto
out_grad_dims
=
param
.
Out_grad
->
dims
();
auto
out_grad_data
=
param
.
Out_grad
->
data
<
float
>
();
auto
x_data
=
param
.
X
->
data
<
float
>
();
auto
x_grad_data
=
param
.
X_grad
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out_grad_dims
.
production
();
i
++
)
{
x_grad_data
[
i
]
=
x_data
[
i
]
>
0
?
out_grad_data
[
i
]
:
0.0
;
}
}
void
TanhGradCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
CHECK
(
param
.
Out
);
auto
out_grad_dims
=
param
.
Out_grad
->
dims
();
auto
out_grad_data
=
param
.
Out_grad
->
data
<
float
>
();
auto
out_data
=
param
.
Out
->
data
<
float
>
();
auto
x_grad_data
=
param
.
X_grad
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
out_grad_dims
.
production
();
i
++
)
{
x_grad_data
[
i
]
=
out_grad_data
[
i
]
*
(
static_cast
<
float
>
(
1.0
)
-
out_data
[
i
]
*
out_data
[
i
]);
}
}
}
// namespace host
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
square_grad
,
kHost
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
host
::
SquareGradCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Out@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"X@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
relu_grad
,
kHost
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
host
::
SquareGradCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Out@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"X@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
tanh_grad
,
kHost
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
host
::
SquareGradCompute
,
def
)
.
BindInput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Out@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"X@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
lite/kernels/
arm
/activation_grad_compute.h
→
lite/kernels/
host
/activation_grad_compute.h
浏览文件 @
a30b9743
...
...
@@ -20,9 +20,9 @@
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
namespace
host
{
class
SquareGradCompute
:
public
KernelLite
<
TARGET
(
k
ARM
),
PRECISION
(
kFloat
)
>
{
class
SquareGradCompute
:
public
KernelLite
<
TARGET
(
k
Host
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationGradParam
;
...
...
@@ -31,7 +31,25 @@ class SquareGradCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
virtual
~
SquareGradCompute
()
=
default
;
};
}
// namespace arm
class
ReluGradCompute
:
public
KernelLite
<
TARGET
(
kHost
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationGradParam
;
void
Run
()
override
;
virtual
~
ReluGradCompute
()
=
default
;
};
class
TanhGradCompute
:
public
KernelLite
<
TARGET
(
kHost
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationGradParam
;
void
Run
()
override
;
virtual
~
TanhGradCompute
()
=
default
;
};
}
// namespace host
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/mlu/bridges/conv_op.cc
浏览文件 @
a30b9743
...
...
@@ -107,8 +107,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
CNML_FILTER
,
CNML_NCHW
,
graph
->
FPType
());
const
auto
weight_scale
=
op_info
->
GetAttr
<
std
::
vector
<
float
>>
(
"weight_scale"
);
const
auto
weight_scale
=
op_info
->
GetInputScale
(
filter_var_name
);
if
(
filter
->
precision
()
==
PrecisionType
::
kUnk
||
filter
->
precision
()
==
PrecisionType
::
kInt8
)
{
...
...
@@ -162,7 +161,7 @@ int ConvConverter(void* ctx, OpLite* op, KernelBase* kernel) {
graph
->
BindConstData
(
bias_var_name
,
bias
);
}
const
auto
input_scale
=
op_info
->
Get
Attr
<
float
>
(
"input_scale"
)
;
const
auto
input_scale
=
op_info
->
Get
InputScale
(
input_var_name
)[
0
]
;
bool
use_first_conv
=
false
;
if
(
lite
::
TargetWrapperMlu
::
UseFirstConv
()
&&
input_dims
[
1
]
==
3
)
{
...
...
lite/kernels/mlu/bridges/conv_op_test.cc
浏览文件 @
a30b9743
...
...
@@ -224,8 +224,10 @@ void test_conv(int bs,
opdesc_mlu
.
SetAttr
(
"groups"
,
groups
);
opdesc_mlu
.
SetAttr
(
"fuse_relu"
,
static_cast
<
bool
>
(
fuse_relu
));
opdesc_mlu
.
SetAttr
(
"weight_scale"
,
std
::
vector
<
float
>
(
oc
,
filter_scale
));
opdesc_mlu
.
SetAttr
(
"input_scale"
,
input_scale
);
OpInfo
op_info
(
opdesc_mlu
);
op_info
.
SetInputScale
(
filter_int_var_name
,
std
::
vector
<
float
>
(
oc
,
filter_scale
));
op_info
.
SetInputScale
(
input_var_name
,
{
input_scale
});
if
(
has_bias
)
{
if
(
is_channel_bias
)
{
...
...
@@ -234,7 +236,7 @@ void test_conv(int bs,
bias
->
Resize
({
output_shape
});
}
FillTensor
<
float
>
(
bias
);
op
desc_mlu
.
SetInput
(
"Bias"
,
{
bias_var_name
});
op
_info
.
SetInput
(
"Bias"
,
{
bias_var_name
});
}
for
(
int
i
=
0
;
i
<
bs
;
i
++
)
{
...
...
@@ -248,7 +250,7 @@ void test_conv(int bs,
}
// create and convert op to MLU model, then run it on MLU
auto
op
=
CreateOp
<
operators
::
ConvOpLite
>
(
op
desc_mlu
,
&
scope
);
auto
op
=
CreateOp
<
operators
::
ConvOpLite
>
(
op
_info
,
&
scope
);
LaunchOp
(
op
,
{
input_var_name
},
{
output_var_name
});
// compare results
auto
*
output_data
=
output
->
mutable_data
<
float
>
();
...
...
lite/kernels/mlu/bridges/fc_op.cc
浏览文件 @
a30b9743
...
...
@@ -68,7 +68,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
auto
w_tensor
=
graph
->
AddNode
(
w_var_name
,
cnml_w_shape
,
CNML_FILTER
,
CNML_NCHW
,
graph
->
FPType
());
auto
input_scale
=
op_info
->
Get
Attr
<
float
>
(
"input_scale"
)
;
auto
input_scale
=
op_info
->
Get
InputScale
(
x_var_name
)[
0
]
;
auto
output_tensor
=
graph
->
AddNode
(
output_var_name
,
output
->
dims
().
Vectorize
(),
...
...
@@ -101,7 +101,7 @@ int FCConverter(void* ctx, OpLite* op, KernelBase* kernel) {
bias_tensor
?
bias_tensor
->
mlu_tensor
()
:
nullptr
));
graph
->
SetComputingDataType
(
fc_op
,
graph
->
GetNode
(
x_var_name
)
->
mlu_tensor
(),
1
/
input_scale
);
auto
weight_scale
=
op_info
->
Get
Attr
<
std
::
vector
<
float
>>
(
"weight_scale"
);
auto
weight_scale
=
op_info
->
Get
InputScale
(
w_var_name
);
// LOG(INFO) << "W precision " << int(w->precision());
if
(
w
->
precision
()
==
PrecisionType
::
kUnk
||
...
...
lite/kernels/mlu/bridges/fc_op_test.cc
浏览文件 @
a30b9743
...
...
@@ -131,14 +131,15 @@ void test_fc(const std::vector<int64_t>& input_shape,
fc_op_desc_mlu
.
SetOutput
(
"Out"
,
{
out_var_name
});
fc_op_desc_mlu
.
SetAttr
(
"in_num_col_dims"
,
static_cast
<
int
>
(
in_num_col_dims
));
fc_op_desc_mlu
.
SetAttr
(
"weight_scale"
,
std
::
vector
<
float
>
(
w_shape
[
1
],
w_scale
));
fc_op_desc_mlu
.
SetAttr
(
"input_scale"
,
input_scale
);
OpInfo
op_info
(
fc_op_desc_mlu
);
op_info
.
SetInputScale
(
w_int_var_name
,
std
::
vector
<
float
>
(
w_shape
[
1
],
w_scale
));
op_info
.
SetInputScale
(
input_var_name
,
{
input_scale
});
if
(
has_bias
)
{
fc_op_desc_mlu
.
SetInput
(
"Bias"
,
{
bias_var_name
});
op_info
.
SetInput
(
"Bias"
,
{
bias_var_name
});
}
auto
fc_op_mlu
=
CreateOp
<
operators
::
FcOpLite
>
(
fc_op_desc_mlu
,
&
scope
);
auto
fc_op_mlu
=
CreateOp
<
operators
::
FcOpLite
>
(
op_info
,
&
scope
);
Tensor
input_tmp
,
out_tmp
;
input_tmp
.
Resize
(
input_shape
);
...
...
lite/kernels/mlu/bridges/lrn_op.cc
浏览文件 @
a30b9743
...
...
@@ -49,8 +49,7 @@ int LrnConverter(void* ctx, OpLite* op, KernelBase* kernel) {
<<
"Unsuport WithinChannel"
;
}
auto
local_size
=
op_info
->
GetAttr
<
int
>
(
"n"
);
CHECK
(
op_info
->
HasAttr
(
"input_scale"
));
auto
input_scale
=
op_info
->
GetAttr
<
float
>
(
"input_scale"
);
auto
input_scale
=
op_info
->
GetInputScale
(
x_var_name
)[
0
];
VLOG
(
5
)
<<
"lrn input scale: "
<<
input_scale
;
cnmlLrnOpParam_t
param
;
...
...
lite/kernels/mlu/bridges/lrn_op_test.cc
浏览文件 @
a30b9743
...
...
@@ -178,9 +178,10 @@ void test_lrn(float alpha,
opdesc
.
SetAttr
(
"k"
,
k
);
opdesc
.
SetAttr
(
"n"
,
local_size
);
opdesc
.
SetAttr
(
"norm_region"
,
norm_region
);
opdesc
.
SetAttr
<
float
>
(
"input_scale"
,
(
*
dmax
-
*
dmin
)
/
255.
f
);
OpInfo
op_info
(
opdesc
);
op_info
.
SetInputScale
(
x_var_name
,
{(
*
dmax
-
*
dmin
)
/
255.
f
});
auto
op
=
CreateOp
<
operators
::
LrnOpLite
>
(
op
desc
,
&
scope
);
auto
op
=
CreateOp
<
operators
::
LrnOpLite
>
(
op
_info
,
&
scope
);
// baseline
lrn_compute_ref
(
op
);
...
...
@@ -213,7 +214,7 @@ void test_lrn(float alpha,
auto
output_data
=
output_trans
.
mutable_data
<
float
>
();
auto
*
output_ref_data
=
out_ref
->
mutable_data
<
float
>
();
for
(
size_t
i
=
0
;
i
<
out
->
data_size
();
i
++
)
{
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
1
e-4
);
EXPECT_NEAR
(
output_data
[
i
],
output_ref_data
[
i
],
5
e-4
);
}
}
...
...
lite/kernels/mlu/subgraph_compute.h
浏览文件 @
a30b9743
...
...
@@ -54,10 +54,11 @@ class SubgraphEngine : public subgraph::Engine {
VLOG
(
4
)
<<
"[MLU] PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL is "
<<
GetBoolFromEnv
(
"PADDLE_LITE_MLU_SAVE_OFFLINE_MODEL"
);
VLOG
(
4
)
<<
"[MLU] PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE is "
<<
GetBoolFromEnv
(
"PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE"
);
<<
GetBoolFromEnv
(
"PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE"
,
true
);
VLOG
(
4
)
<<
"[MLU] LITE_DISABLE_MLU_CAST is "
<<
GetBoolFromEnv
(
"LITE_DISABLE_MLU_CAST"
);
if
(
GetBoolFromEnv
(
"PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE"
))
{
if
(
GetBoolFromEnv
(
"PADDLE_LITE_MLU_DISABLE_BATCH_SIZE_CHANGEABLE"
,
true
))
{
disable_batch_size_changeable_
=
true
;
}
}
...
...
lite/model_parser/base/block_desc.h
浏览文件 @
a30b9743
...
...
@@ -54,10 +54,16 @@ class BlockDescWriteAPI {
virtual
void
SetForwardBlockIdx
(
int32_t
idx
)
{
NotImplemented
();
}
template
<
typename
T
>
T
*
AddVar
();
T
*
AddVar
()
{
NotImplemented
();
return
nullptr
;
}
template
<
typename
T
>
T
*
AddOp
();
T
*
AddOp
()
{
NotImplemented
();
return
nullptr
;
}
virtual
~
BlockDescWriteAPI
()
=
default
;
...
...
lite/model_parser/base/op_desc.h
浏览文件 @
a30b9743
...
...
@@ -73,7 +73,9 @@ class OpDescWriteAPI {
}
template
<
typename
T
>
void
SetAttr
(
const
std
::
string
&
name
,
const
T
&
v
);
void
SetAttr
(
const
std
::
string
&
name
,
const
T
&
v
)
{
NotImplemented
();
}
virtual
~
OpDescWriteAPI
()
=
default
;
...
...
lite/model_parser/base/program_desc.h
浏览文件 @
a30b9743
...
...
@@ -40,7 +40,10 @@ class ProgramDescWriteAPI {
virtual
void
SetVersion
(
int64_t
version
)
{
NotImplemented
();
}
template
<
typename
T
>
T
*
AddBlock
();
T
*
AddBlock
()
{
NotImplemented
();
return
nullptr
;
}
virtual
~
ProgramDescWriteAPI
()
=
default
;
...
...
lite/model_parser/base/vector_view.h
浏览文件 @
a30b9743
...
...
@@ -57,6 +57,7 @@ class VectorView {
public:
typedef
vector_view
::
VectorTraits
<
T
,
U
>
Traits
;
explicit
VectorView
(
typename
Traits
::
vector_type
const
*
cvec
)
{
CHECK
(
cvec
);
cvec_
=
cvec
;
}
typename
Traits
::
subscript_return_type
operator
[](
size_t
i
)
const
{
...
...
lite/model_parser/compatible_pb.cc
浏览文件 @
a30b9743
...
...
@@ -277,7 +277,7 @@ void OpAttrsCppToAny(const cpp::OpDesc &cpp_desc, OpDescType *any_desc) {
template <> \
void TransformProgramDescCppToAny<NT::T>(const cpp::T &cpp_desc, \
NT::T *any_desc) { \
auto
desc = cpp_desc;
\
auto
&desc = cpp_desc;
\
if (desc.HasVersion()) { \
any_desc->SetVersion(desc.Version()); \
} \
...
...
lite/model_parser/flatbuffers/CMakeLists.txt
浏览文件 @
a30b9743
...
...
@@ -8,9 +8,6 @@ endfunction()
lite_fbs_library
(
fbs_op_desc SRCS op_desc.cc FBS_DEPS framework_fbs_header
)
lite_fbs_library
(
fbs_var_desc SRCS var_desc.cc FBS_DEPS framework_fbs_header
)
lite_fbs_library
(
fbs_block_desc SRCS block_desc.cc FBS_DEPS framework_fbs_header
)
lite_fbs_library
(
fbs_program_desc SRCS program_desc.cc FBS_DEPS framework_fbs_header
)
lite_cc_test
(
test_vector_view SRCS vector_view_test.cc
)
if
(
TARGET test_vector_view
)
add_dependencies
(
test_vector_view framework_fbs_header
)
endif
()
lite_cc_library
(
fbs_program_desc SRCS program_desc.cc DEPS fbs_op_desc fbs_var_desc fbs_block_desc
)
lite_cc_library
(
fbs_io SRCS io.cc DEPS fbs_program_desc
)
lite_cc_test
(
test_vector_view SRCS vector_view_test.cc DEPS fbs_program_desc
)
lite/model_parser/flatbuffers/block_desc.cc
浏览文件 @
a30b9743
...
...
@@ -19,15 +19,27 @@ namespace lite {
namespace
fbs
{
template
<
>
proto
::
VarDesc
*
BlockDesc
::
GetVar
<
proto
::
VarDesc
>
(
int32_t
idx
)
{
proto
::
VarDesc
const
*
BlockDesc
::
GetVar
<
proto
::
VarDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
VarsSize
())
<<
"idx >= vars.size()"
;
return
const_cast
<
proto
::
VarDesc
*>
(
desc_
->
vars
()
->
Get
(
idx
)
);
return
desc_
->
vars
()
->
Get
(
idx
);
}
template
<
>
proto
::
OpDesc
*
BlockDesc
::
GetOp
<
proto
::
OpDesc
>
(
int32_t
idx
)
{
proto
::
OpDesc
const
*
BlockDesc
::
GetOp
<
proto
::
OpDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
OpsSize
())
<<
"idx >= ops.size()"
;
return
const_cast
<
proto
::
OpDesc
*>
(
desc_
->
ops
()
->
Get
(
idx
));
return
desc_
->
ops
()
->
Get
(
idx
);
}
template
<
>
VarDesc
const
*
BlockDesc
::
GetVar
<
VarDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
VarsSize
())
<<
"idx >= vars.size()"
;
return
&
vars_
[
idx
];
}
template
<
>
OpDesc
const
*
BlockDesc
::
GetOp
<
OpDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
OpsSize
())
<<
"idx >= ops.size()"
;
return
&
ops_
[
idx
];
}
}
// namespace fbs
...
...
lite/model_parser/flatbuffers/block_desc.h
浏览文件 @
a30b9743
...
...
@@ -14,8 +14,11 @@
#pragma once
#include <vector>
#include "lite/model_parser/base/block_desc.h"
#include "lite/model_parser/flatbuffers/framework_generated.h"
#include "lite/model_parser/flatbuffers/op_desc.h"
#include "lite/model_parser/flatbuffers/var_desc.h"
#include "lite/utils/all.h"
namespace
paddle
{
...
...
@@ -24,7 +27,17 @@ namespace fbs {
class
BlockDesc
:
public
BlockDescAPI
{
public:
explicit
BlockDesc
(
proto
::
BlockDesc
*
desc
)
:
desc_
(
desc
)
{
CHECK
(
desc_
);
}
explicit
BlockDesc
(
proto
::
BlockDesc
const
*
desc
)
:
desc_
(
desc
)
{
CHECK
(
desc_
);
vars_
.
reserve
(
VarsSize
());
ops_
.
reserve
(
OpsSize
());
for
(
size_t
idx
=
0
;
idx
<
VarsSize
();
++
idx
)
{
vars_
.
push_back
(
VarDesc
(
desc_
->
vars
()
->
Get
(
idx
)));
}
for
(
size_t
idx
=
0
;
idx
<
OpsSize
();
++
idx
)
{
ops_
.
push_back
(
OpDesc
(
desc_
->
ops
()
->
Get
(
idx
)));
}
}
int32_t
Idx
()
const
override
{
return
desc_
->
idx
();
}
...
...
@@ -33,11 +46,12 @@ class BlockDesc : public BlockDescAPI {
size_t
VarsSize
()
const
override
{
return
desc_
->
vars
()
->
size
();
}
template
<
typename
T
>
T
*
GetVar
(
int32_t
idx
)
;
T
const
*
GetVar
(
int32_t
idx
)
const
;
template
<
typename
T
>
T
const
*
GetVar
(
int32_t
idx
)
const
{
return
GetVar
<
T
>
(
idx
);
T
*
GetVar
(
int32_t
idx
)
{
NotImplemented
();
return
nullptr
;
}
size_t
OpsSize
()
const
override
{
...
...
@@ -47,21 +61,32 @@ class BlockDesc : public BlockDescAPI {
}
template
<
typename
T
>
T
*
GetOp
(
int32_t
idx
)
;
T
const
*
GetOp
(
int32_t
idx
)
const
;
template
<
typename
T
>
T
const
*
GetOp
(
int32_t
idx
)
const
{
return
GetOp
<
T
>
(
idx
);
T
*
GetOp
(
int32_t
idx
)
{
NotImplemented
();
return
nullptr
;
}
const
std
::
vector
<
VarDesc
>&
GetVars
()
const
{
return
vars_
;
}
int32_t
ForwardBlockIdx
()
const
override
{
return
desc_
->
forward_block_idx
();
}
BlockDesc
()
=
delete
;
BlockDesc
()
{
NotImplemented
();
}
private:
proto
::
BlockDesc
*
desc_
;
// not_own
proto
::
BlockDesc
const
*
desc_
;
// not_own
std
::
vector
<
VarDesc
>
vars_
;
std
::
vector
<
OpDesc
>
ops_
;
private:
void
NotImplemented
()
const
{
LOG
(
FATAL
)
<<
"The additional interfaces of BlockDesc is temporarily "
"unavailable in read-only mode."
;
}
};
}
// namespace fbs
...
...
lite/
kernels/arm/activation_grad_compute
.cc
→
lite/
model_parser/flatbuffers/io
.cc
浏览文件 @
a30b9743
...
...
@@ -12,41 +12,26 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/arm/activation_grad_compute.h"
#include "lite/backends/arm/math/funcs.h"
#include "lite/model_parser/flatbuffers/io.h"
#include <memory>
#include <utility>
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
namespace
fbs
{
void
SquareGradCompute
::
Run
()
{
auto
&
param
=
this
->
Param
<
param_t
>
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
auto
out_grad_dims
=
param
.
Out_grad
->
dims
();
auto
out_grad_data
=
param
.
Out_grad
->
data
<
float
>
();
auto
x_data
=
param
.
X
->
data
<
float
>
();
auto
x_grad_data
=
param
.
X_grad
->
mutable_data
<
float
>
();
lite
::
arm
::
math
::
act_square_grad
<
float
>
(
x_data
,
out_grad_data
,
x_grad_data
,
out_grad_dims
.
production
(),
ctx
.
threads
());
void
LoadModel
(
const
std
::
string
&
path
,
ProgramDesc
*
prog
)
{
FILE
*
file
=
fopen
(
path
.
c_str
(),
"rb"
);
fseek
(
file
,
0
,
SEEK_END
);
int64_t
size
=
ftell
(
file
);
rewind
(
file
);
char
*
data
=
new
char
[
size
];
size
=
fread
(
data
,
1
,
size
,
file
);
fclose
(
file
);
std
::
unique_ptr
<
char
[]
>
buf
(
data
);
prog
->
Init
(
std
::
move
(
buf
));
}
}
// namespace arm
}
// namespace kernels
}
// namespace fbs
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
square_grad
,
kARM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
SquareGradCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindInput
(
"Out@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"X@GRAD"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
lite/model_parser/flatbuffers/io.h
0 → 100644
浏览文件 @
a30b9743
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "lite/model_parser/flatbuffers/program_desc.h"
namespace
paddle
{
namespace
lite
{
namespace
fbs
{
void
LoadModel
(
const
std
::
string
&
path
,
ProgramDesc
*
prog
);
}
// namespace fbs
}
// namespace lite
}
// namespace paddle
lite/model_parser/flatbuffers/op_desc.h
浏览文件 @
a30b9743
...
...
@@ -30,7 +30,7 @@ namespace fbs {
class
OpDesc
:
public
OpDescAPI
{
public:
explicit
OpDesc
(
proto
::
OpDesc
*
desc
)
:
desc_
(
desc
)
{
CHECK
(
desc_
);
}
explicit
OpDesc
(
proto
::
OpDesc
const
*
desc
)
:
desc_
(
desc
)
{
CHECK
(
desc_
);
}
std
::
string
Type
()
const
override
{
return
desc_
->
type
()
->
str
();
}
...
...
@@ -95,7 +95,7 @@ class OpDesc : public OpDescAPI {
OpDescAPI
::
AttrType
GetAttrType
(
const
std
::
string
&
name
)
const
override
{
const
auto
&
attr
=
desc_
->
attrs
()
->
LookupByKey
(
name
.
c_str
());
CHECK
(
attr
);
CHECK
(
attr
)
<<
"Can not find attr: "
<<
name
;
return
static_cast
<
OpDescAPI
::
AttrType
>
(
attr
->
type
());
}
...
...
@@ -124,10 +124,8 @@ class OpDesc : public OpDescAPI {
template
<
typename
T
>
typename
lite
::
OpDataTypeTrait
<
T
,
Flatbuffers
>::
RT
GetAttr
(
size_t
idx
)
const
;
OpDesc
()
=
delete
;
private:
proto
::
OpDesc
*
desc_
;
proto
::
OpDesc
const
*
desc_
;
// To reduce overhead, we expect to use namespace aliasing to make cpp::Desc
// and flatbuffers::Desc replace each other. However, there is no direct
...
...
@@ -138,6 +136,7 @@ class OpDesc : public OpDescAPI {
// caused by different building options.
public:
OpDesc
()
{
NotImplemented
();
}
bool
HasInput
(
const
std
::
string
&
param
)
const
{
return
desc_
->
inputs
()
->
LookupByKey
(
param
.
c_str
())
!=
nullptr
;
}
...
...
lite/model_parser/flatbuffers/program_desc.cc
浏览文件 @
a30b9743
...
...
@@ -19,9 +19,16 @@ namespace lite {
namespace
fbs
{
template
<
>
proto
::
BlockDesc
*
ProgramDesc
::
GetBlock
<
proto
::
BlockDesc
>
(
int32_t
idx
)
{
proto
::
BlockDesc
const
*
ProgramDesc
::
GetBlock
<
proto
::
BlockDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
BlocksSize
())
<<
"idx >= blocks.size()"
;
return
const_cast
<
proto
::
BlockDesc
*>
(
desc_
->
blocks
()
->
Get
(
idx
));
return
desc_
->
blocks
()
->
Get
(
idx
);
}
template
<
>
BlockDesc
const
*
ProgramDesc
::
GetBlock
<
BlockDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
BlocksSize
())
<<
"idx >= blocks.size()"
;
return
&
blocks_
[
idx
];
}
}
// namespace fbs
...
...
lite/model_parser/flatbuffers/program_desc.h
浏览文件 @
a30b9743
...
...
@@ -15,7 +15,10 @@
#pragma once
#include <memory>
#include <utility>
#include <vector>
#include "lite/model_parser/base/program_desc.h"
#include "lite/model_parser/flatbuffers/block_desc.h"
#include "lite/model_parser/flatbuffers/framework_generated.h"
#include "lite/utils/all.h"
...
...
@@ -26,18 +29,40 @@ namespace fbs {
class
ProgramDesc
:
public
ProgramDescAPI
{
public:
ProgramDesc
()
=
default
;
explicit
ProgramDesc
(
proto
::
ProgramDesc
*
desc
)
:
desc_
(
desc
)
{
CHECK
(
desc
);
}
explicit
ProgramDesc
(
std
::
unique_ptr
<
const
char
[]
>
buf
)
{
Init
(
std
::
move
(
buf
));
}
size_t
BlocksSize
()
const
override
{
return
desc_
->
blocks
()
->
size
();
}
void
Init
(
std
::
unique_ptr
<
const
char
[]
>
buf
)
{
CHECK
(
buf
.
get
()
!=
nullptr
);
buf_
=
std
::
move
(
buf
);
desc_
=
proto
::
GetProgramDesc
(
buf_
.
get
());
blocks_
.
reserve
(
BlocksSize
());
for
(
size_t
idx
=
0
;
idx
<
BlocksSize
();
++
idx
)
{
blocks_
.
push_back
(
BlockDesc
(
desc_
->
blocks
()
->
Get
(
idx
)));
}
}
void
CopyFrom
(
const
ProgramDesc
&
other
)
{
size_t
length
=
strlen
(
static_cast
<
const
char
*>
(
other
.
raw_buf
()));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
length
]);
memcpy
(
buf
.
get
(),
other
.
raw_buf
(),
length
);
Init
(
std
::
move
(
buf
));
}
template
<
typename
T
>
T
*
GetBlock
(
int32_t
idx
)
;
T
const
*
GetBlock
(
int32_t
idx
)
const
;
template
<
typename
T
>
T
const
*
GetBlock
(
int32_t
idx
)
const
{
return
GetBlock
<
T
>
(
idx
);
T
*
GetBlock
(
int32_t
idx
)
{
NotImplemented
();
return
nullptr
;
}
const
std
::
vector
<
BlockDesc
>&
GetBlocks
()
const
{
return
blocks_
;
}
bool
HasVersion
()
const
override
{
return
desc_
->
version
()
!=
nullptr
;
}
int64_t
Version
()
const
override
{
...
...
@@ -45,8 +70,22 @@ class ProgramDesc : public ProgramDescAPI {
return
desc_
->
version
()
->
version
();
}
proto
::
ProgramDesc
const
*
raw_desc
()
const
{
return
desc_
;
}
const
void
*
raw_buf
()
const
{
return
buf_
.
get
();
}
private:
proto
::
ProgramDesc
*
desc_
;
// not_own
proto
::
ProgramDesc
const
*
desc_
;
std
::
unique_ptr
<
const
char
[]
>
buf_
;
std
::
vector
<
BlockDesc
>
blocks_
;
private:
ProgramDesc
&
operator
=
(
const
ProgramDesc
&
)
=
delete
;
ProgramDesc
(
const
ProgramDesc
&
)
=
delete
;
void
NotImplemented
()
const
{
LOG
(
FATAL
)
<<
"The additional interfaces of ProgramDesc is temporarily "
"unavailable in read-only mode."
;
}
};
}
// namespace fbs
...
...
lite/model_parser/flatbuffers/var_desc.h
浏览文件 @
a30b9743
...
...
@@ -27,7 +27,7 @@ namespace fbs {
class
VarDesc
:
public
VarDescAPI
{
public:
explicit
VarDesc
(
proto
::
VarDesc
*
desc
)
:
desc_
(
desc
)
{}
explicit
VarDesc
(
proto
::
VarDesc
const
*
desc
)
:
desc_
(
desc
)
{}
std
::
string
Name
()
const
override
{
return
desc_
->
name
()
->
str
();
}
...
...
@@ -48,10 +48,14 @@ class VarDesc : public VarDescAPI {
return
dims_vec
;
}
VarDesc
()
=
delete
;
VarDescAPI
::
Type
GetDataType
()
const
{
CHECK
(
GetType
()
==
VarDescAPI
::
Type
::
LOD_TENSOR
);
return
static_cast
<
VarDescAPI
::
Type
>
(
desc_
->
type
()
->
lod_tensor
()
->
tensor
()
->
data_type
());
}
private:
proto
::
VarDesc
*
desc_
;
proto
::
VarDesc
const
*
desc_
;
// To reduce overhead, we expect to use namespace aliasing to make cpp::Desc
// and flatbuffers::Desc replace each other. However, there is no direct
...
...
@@ -62,10 +66,7 @@ class VarDesc : public VarDescAPI {
// caused by different building options.
public:
VarDescAPI
::
Type
GetDataType
()
const
{
NotImplemented
();
return
data_type_
;
}
VarDesc
()
{
NotImplemented
();
}
void
SetDataType
(
Type
data_type
)
{
NotImplemented
();
}
void
SetShape
(
const
std
::
vector
<
int64_t
>&
dims
)
{
NotImplemented
();
}
...
...
@@ -74,7 +75,6 @@ class VarDesc : public VarDescAPI {
LOG
(
FATAL
)
<<
"The additional interfaces of VarDesc is temporarily "
"unavailable in read-only mode."
;
}
Type
data_type_
;
std
::
vector
<
int64_t
>
shape_
;
};
...
...
lite/model_parser/flatbuffers/vector_view.h
浏览文件 @
a30b9743
...
...
@@ -104,20 +104,32 @@ class VectorView<std::string, Flatbuffers> {
explicit
VectorView
(
typename
Traits
::
vector_type
const
*
cvec
)
{
cvec_
=
cvec
;
}
std
::
string
operator
[](
size_t
i
)
const
{
return
cvec_
->
operator
[](
i
)
->
str
();
}
std
::
string
operator
[](
size_t
i
)
const
{
CHECK
(
cvec_
);
return
cvec_
->
operator
[](
i
)
->
str
();
}
vector_view
::
FBSStrIterator
begin
()
const
{
CHECK
(
cvec_
);
return
vector_view
::
FBSStrIterator
(
cvec_
->
begin
());
}
vector_view
::
FBSStrIterator
end
()
const
{
CHECK
(
cvec_
);
return
vector_view
::
FBSStrIterator
(
cvec_
->
end
());
}
size_t
size
()
const
{
return
cvec_
->
size
();
}
size_t
size
()
const
{
if
(
cvec_
==
nullptr
)
{
return
0
;
}
return
cvec_
->
size
();
}
operator
std
::
vector
<
std
::
string
>
()
const
{
VLOG
(
5
)
<<
"Copying elements out of VectorView will damage performance."
;
std
::
vector
<
std
::
string
>
tmp
;
tmp
.
reserve
(
cvec_
->
size
());
for
(
auto
val
:
*
cvec_
)
{
tmp
.
push_back
(
val
->
str
());
tmp
.
reserve
(
size
());
if
(
cvec_
!=
nullptr
)
{
for
(
auto
val
:
*
cvec_
)
{
tmp
.
push_back
(
val
->
str
());
}
}
return
tmp
;
}
...
...
lite/model_parser/general/block_desc.cc
浏览文件 @
a30b9743
...
...
@@ -24,6 +24,12 @@ VarDesc* BlockDesc::GetVar<VarDesc>(int32_t idx) {
return
&
vars_
[
idx
];
}
template
<
>
VarDesc
const
*
BlockDesc
::
GetVar
<
VarDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
VarsSize
())
<<
"idx >= vars.size()"
;
return
&
vars_
[
idx
];
}
template
<
>
VarDesc
*
BlockDesc
::
AddVar
<
VarDesc
>
()
{
vars_
.
emplace_back
();
...
...
@@ -36,6 +42,12 @@ OpDesc* BlockDesc::GetOp<OpDesc>(int32_t idx) {
return
&
ops_
[
idx
];
}
template
<
>
OpDesc
const
*
BlockDesc
::
GetOp
<
OpDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
OpsSize
())
<<
"idx >= ops.size()"
;
return
&
ops_
[
idx
];
}
template
<
>
OpDesc
*
BlockDesc
::
AddOp
<
OpDesc
>
()
{
ops_
.
emplace_back
();
...
...
lite/model_parser/general/block_desc.h
浏览文件 @
a30b9743
...
...
@@ -46,12 +46,10 @@ class BlockDesc : public BlockDescAPI {
template
<
typename
T
>
T
*
GetVar
(
int32_t
idx
);
std
::
vector
<
VarDesc
>&
GetVars
()
{
return
vars_
;
}
template
<
typename
T
>
T
const
*
GetVar
(
int32_t
idx
)
const
{
return
GetVar
<
T
>
(
idx
);
}
T
const
*
GetVar
(
int32_t
idx
)
const
;
std
::
vector
<
VarDesc
>&
GetVars
()
{
return
vars_
;
}
template
<
typename
T
>
T
*
AddVar
();
...
...
@@ -64,9 +62,7 @@ class BlockDesc : public BlockDescAPI {
T
*
GetOp
(
int32_t
idx
);
template
<
typename
T
>
T
const
*
GetOp
(
int32_t
idx
)
const
{
return
GetOp
<
T
>
(
idx
);
}
T
const
*
GetOp
(
int32_t
idx
)
const
;
template
<
typename
T
>
T
*
AddOp
();
...
...
lite/model_parser/general/program_desc.cc
浏览文件 @
a30b9743
...
...
@@ -24,6 +24,12 @@ BlockDesc* ProgramDesc::GetBlock<BlockDesc>(int32_t idx) {
return
&
blocks_
[
idx
];
}
template
<
>
BlockDesc
const
*
ProgramDesc
::
GetBlock
<
BlockDesc
>
(
int32_t
idx
)
const
{
CHECK_LT
(
idx
,
BlocksSize
())
<<
"idx >= blocks.size()"
;
return
&
blocks_
[
idx
];
}
template
<
>
BlockDesc
*
ProgramDesc
::
AddBlock
<
BlockDesc
>
()
{
blocks_
.
emplace_back
();
...
...
lite/model_parser/general/program_desc.h
浏览文件 @
a30b9743
...
...
@@ -30,6 +30,13 @@ class ProgramDesc : public ProgramDescAPI {
public:
ProgramDesc
()
=
default
;
void
CopyFrom
(
const
ProgramDesc
&
other
)
{
version_
=
other
.
Version
();
blocks_
=
other
.
blocks
();
}
const
std
::
vector
<
BlockDesc
>&
blocks
()
const
{
return
blocks_
;
}
size_t
BlocksSize
()
const
override
{
return
blocks_
.
size
();
}
void
ClearBlocks
()
override
{
blocks_
.
clear
();
}
...
...
@@ -37,12 +44,10 @@ class ProgramDesc : public ProgramDescAPI {
template
<
typename
T
>
T
*
GetBlock
(
int32_t
idx
);
std
::
vector
<
BlockDesc
>&
GetBlocks
()
{
return
blocks_
;
}
template
<
typename
T
>
T
const
*
GetBlock
(
int32_t
idx
)
const
{
return
GetBlock
<
T
>
(
idx
);
}
T
const
*
GetBlock
(
int32_t
idx
)
const
;
std
::
vector
<
BlockDesc
>&
GetBlocks
()
{
return
blocks_
;
}
template
<
typename
T
>
T
*
AddBlock
();
...
...
lite/model_parser/model_parser.cc
浏览文件 @
a30b9743
...
...
@@ -176,7 +176,7 @@ void LoadCombinedParamsPb(const std::string &path,
const
cpp
::
ProgramDesc
&
cpp_prog
,
bool
params_from_memory
)
{
CHECK
(
scope
);
auto
prog
=
cpp_prog
;
auto
&
prog
=
cpp_prog
;
auto
&
main_block_desc
=
*
prog
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
// Get vars
...
...
@@ -310,7 +310,7 @@ void SaveModelPb(const std::string &model_dir,
void
SaveCombinedParamsPb
(
const
std
::
string
&
path
,
const
lite
::
Scope
&
exec_scope
,
const
cpp
::
ProgramDesc
&
cpp_prog
)
{
auto
prog
=
cpp_prog
;
auto
&
prog
=
cpp_prog
;
auto
&
main_block_desc
=
*
prog
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
// Get vars
...
...
@@ -526,7 +526,7 @@ void SaveCombinedParamsNaive(const std::string &path,
naive_buffer
::
proto
::
CombinedParamsDesc
pt_desc
(
&
table
);
naive_buffer
::
CombinedParamsDesc
desc
(
&
pt_desc
);
auto
prog
=
cpp_prog
;
auto
&
prog
=
cpp_prog
;
auto
&
main_block_desc
=
*
prog
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
// set unique_var_names to avoid saving shared params repeatedly
std
::
set
<
std
::
string
>
unique_var_names
;
...
...
@@ -681,7 +681,7 @@ void LoadCombinedParamsNaive(const std::string &path,
}
// Check all params loaded
auto
prog
=
cpp_prog
;
auto
&
prog
=
cpp_prog
;
auto
&
main_block_desc
=
*
prog
.
GetBlock
<
cpp
::
BlockDesc
>
(
0
);
for
(
size_t
i
=
0
;
i
<
main_block_desc
.
VarsSize
();
++
i
)
{
auto
&
var
=
*
main_block_desc
.
GetVar
<
cpp
::
VarDesc
>
(
i
);
...
...
lite/model_parser/naive_buffer/block_desc.h
浏览文件 @
a30b9743
...
...
@@ -55,11 +55,6 @@ class BlockDesc : public BlockDescAPI {
template
<
typename
T
>
T
*
GetVar
(
int32_t
idx
);
template
<
typename
T
>
T
const
*
GetVar
(
int32_t
idx
)
const
{
return
GetVar
<
T
>
(
idx
);
}
template
<
typename
T
>
T
*
AddVar
();
...
...
@@ -70,11 +65,6 @@ class BlockDesc : public BlockDescAPI {
template
<
typename
T
>
T
*
GetOp
(
int32_t
idx
);
template
<
typename
T
>
T
const
*
GetOp
(
int32_t
idx
)
const
{
return
GetOp
<
T
>
(
idx
);
}
template
<
typename
T
>
T
*
AddOp
();
...
...
lite/model_parser/naive_buffer/program_desc.h
浏览文件 @
a30b9743
...
...
@@ -45,11 +45,6 @@ class ProgramDesc : public ProgramDescAPI {
template
<
typename
T
>
T
*
GetBlock
(
int32_t
idx
);
template
<
typename
T
>
T
const
*
GetBlock
(
int32_t
idx
)
const
{
return
GetBlock
<
T
>
(
idx
);
}
template
<
typename
T
>
T
*
AddBlock
();
...
...
lite/operators/activation_grad_ops.cc
浏览文件 @
a30b9743
...
...
@@ -41,15 +41,11 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
if
(
opdesc
.
HasInput
(
"X"
))
{
auto
X_name
=
opdesc
.
Input
(
"X"
).
front
();
param_
.
X
=
GetVar
<
lite
::
Tensor
>
(
scope
,
X_name
);
}
else
{
param_
.
X
=
param_
.
X_grad
;
}
if
(
opdesc
.
HasInput
(
"Out"
))
{
auto
Out_name
=
opdesc
.
Input
(
"Out"
).
front
();
param_
.
Out
=
GetVar
<
lite
::
Tensor
>
(
scope
,
Out_name
);
}
else
{
param_
.
Out
=
param_
.
Out_grad
;
}
return
true
;
...
...
@@ -60,3 +56,5 @@ bool ActivationGradOp::AttachImpl(const cpp::OpDesc& opdesc,
}
// namespace paddle
REGISTER_LITE_OP
(
square_grad
,
paddle
::
lite
::
operators
::
ActivationGradOp
);
REGISTER_LITE_OP
(
relu_grad
,
paddle
::
lite
::
operators
::
ActivationGradOp
);
REGISTER_LITE_OP
(
tanh_grad
,
paddle
::
lite
::
operators
::
ActivationGradOp
);
lite/operators/deformable_conv_op.h
浏览文件 @
a30b9743
...
...
@@ -83,7 +83,7 @@ class DeformableConvOpLite : public OpLite {
param_
.
conv_param
.
filter
=
scope
->
FindVar
(
Filter
)
->
GetMutable
<
lite
::
Tensor
>
();
param_
.
conv_param
.
strides
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
);
auto
dilations
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"dilations"
);
param_
.
conv_param
.
groups
=
op_desc
.
GetAttr
<
int
>
(
"groups"
);
param_
.
conv_param
.
dilations
=
std
::
make_shared
<
std
::
vector
<
int
>>
(
dilations
);
...
...
lite/operators/max_pool_with_index_op.h
浏览文件 @
a30b9743
...
...
@@ -54,7 +54,7 @@ class MaxPoolWithIndexOpLite : public OpLite {
param_
.
ksize
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"ksize"
);
param_
.
global_pooling
=
op_desc
.
GetAttr
<
bool
>
(
"global_pooling"
);
param_
.
strides
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
op_desc
.
GetAttr
<
std
::
vector
<
int
>>
(
"paddings"
);
if
(
op_desc
.
HasAttr
(
"adaptive"
))
{
param_
.
adaptive
=
op_desc
.
GetAttr
<
bool
>
(
"adaptive"
);
}
...
...
lite/tests/kernels/activation_grad_compute_test.cc
浏览文件 @
a30b9743
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/
arm
/activation_grad_compute.h"
#include "lite/kernels/
host
/activation_grad_compute.h"
#include <gtest/gtest.h>
#include "lite/core/op_registry.h"
#include "lite/kernels/arm/activation_compute.h"
...
...
@@ -20,13 +20,11 @@
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
arm
{
using
param_t
=
operators
::
ActivationParam
;
using
grad_param_t
=
operators
::
ActivationGradParam
;
using
kernel_t
=
SquareCompute
;
using
grad_kernel_t
=
SquareGradCompute
;
template
<
class
kernel_t
,
class
grad_kernel_t
>
class
ActivationGradTester
{
public:
explicit
ActivationGradTester
(
DDim
dims
)
:
dims_
(
dims
)
{}
...
...
@@ -71,22 +69,28 @@ class ActivationGradTester {
void
run_backward
(
grad_param_t
*
param
,
grad_kernel_t
*
kernel
,
const
std
::
vector
<
float
>&
in_vec
,
const
std
::
vector
<
float
>&
out_vec
,
const
std
::
vector
<
float
>&
out_grad_vec
,
float
*
in_grad_vec
)
{
Tensor
x
;
Tensor
out
;
Tensor
x_grad
;
Tensor
out_grad
;
x
.
Resize
(
dims_
);
out
.
Resize
(
dims_
);
x_grad
.
Resize
(
dims_
);
out_grad
.
Resize
(
dims_
);
auto
*
x_data
=
x
.
mutable_data
<
float
>
();
auto
*
out_data
=
out
.
mutable_data
<
float
>
();
auto
*
out_grad_data
=
out_grad
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
x_data
[
i
]
=
in_vec
[
i
];
out_data
[
i
]
=
out_vec
[
i
];
out_grad_data
[
i
]
=
out_grad_vec
[
i
];
}
param
->
X
=
&
x
;
param
->
Out
=
&
out
;
param
->
X_grad
=
&
x_grad
;
param
->
Out_grad
=
&
out_grad
;
kernel
->
SetParam
(
*
param
);
...
...
@@ -102,7 +106,9 @@ class ActivationGradTester {
std
::
vector
<
float
>
x
(
dims_
.
production
());
std
::
vector
<
float
>
out
(
dims_
.
production
());
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
x
[
i
]
=
1.0
*
static_cast
<
float
>
(
i
%
128
)
*
0.3
f
-
1.1
;
x
[
i
]
=
static_cast
<
float
>
(
i
%
3
-
2.0
)
/
2.0
*
0.333
+
static_cast
<
float
>
(
i
%
19
-
10.0
)
/
10.0
*
0.333
+
static_cast
<
float
>
(
i
%
39
-
20.0
)
/
20.0
*
0.333
+
0.001213
;
}
this
->
run_forward
(
&
param_
,
&
kernel_
,
x
,
out
.
data
());
...
...
@@ -120,7 +126,8 @@ class ActivationGradTester {
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
out_grad
[
i
]
=
1.0
;
}
this
->
run_backward
(
&
grad_param_
,
&
grad_kernel_
,
x
,
out_grad
,
x_grad
.
data
());
this
->
run_backward
(
&
grad_param_
,
&
grad_kernel_
,
x
,
out
,
out_grad
,
x_grad
.
data
());
for
(
int
i
=
0
;
i
<
dims_
.
production
();
i
++
)
{
EXPECT_NEAR
(
x_grad
[
i
],
(
out_delta
[
i
]
-
out
[
i
])
/
delta
,
max_grad_delta
);
...
...
@@ -137,31 +144,58 @@ class ActivationGradTester {
grad_param_t
grad_param_
;
};
void
TestNormalCase
(
DDim
dims
)
{
std
::
unique_ptr
<
ActivationGradTester
>
tester
(
new
ActivationGradTester
(
dims
));
void
TestSquareGrad
(
DDim
dims
)
{
LOG
(
INFO
)
<<
"Test Square grad"
;
std
::
unique_ptr
<
ActivationGradTester
<
arm
::
SquareCompute
,
host
::
SquareGradCompute
>>
tester
(
new
ActivationGradTester
<
arm
::
SquareCompute
,
host
::
SquareGradCompute
>
(
dims
));
tester
->
prepare_kernel
();
float
delta
=
0.001
;
float
max_grad_delta
=
0.005
;
tester
->
check_grad
(
delta
,
max_grad_delta
);
}
TEST
(
activation_grad_arm
,
compute
)
{
LOG
(
INFO
)
<<
"Test Square grad"
;
void
TestReluGrad
(
DDim
dims
)
{
LOG
(
INFO
)
<<
"Test Relu grad"
;
std
::
unique_ptr
<
ActivationGradTester
<
arm
::
ReluCompute
,
host
::
ReluGradCompute
>>
tester
(
new
ActivationGradTester
<
arm
::
ReluCompute
,
host
::
ReluGradCompute
>
(
dims
));
tester
->
prepare_kernel
();
float
delta
=
0.001
;
float
max_grad_delta
=
0.005
;
tester
->
check_grad
(
delta
,
max_grad_delta
);
}
void
TestTanhGrad
(
DDim
dims
)
{
LOG
(
INFO
)
<<
"Test Tanh grad"
;
std
::
unique_ptr
<
ActivationGradTester
<
arm
::
TanhCompute
,
host
::
TanhGradCompute
>>
tester
(
new
ActivationGradTester
<
arm
::
TanhCompute
,
host
::
TanhGradCompute
>
(
dims
));
tester
->
prepare_kernel
();
float
delta
=
0.001
;
float
max_grad_delta
=
0.005
;
tester
->
check_grad
(
delta
,
max_grad_delta
);
}
TEST
(
activation_grad_host
,
compute
)
{
DeviceInfo
::
Init
();
for
(
auto
n
:
{
2
})
{
for
(
auto
c
:
{
2
})
{
for
(
auto
h
:
{
2
})
{
for
(
auto
w
:
{
2
})
{
TestNormalCase
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
for
(
auto
n
:
{
2
,
1
})
{
for
(
auto
c
:
{
2
,
9
})
{
for
(
auto
h
:
{
2
,
1
})
{
for
(
auto
w
:
{
2
,
10
})
{
TestSquareGrad
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
TestReluGrad
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
TestTanhGrad
(
DDim
(
std
::
vector
<
int64_t
>
({
n
,
c
,
h
,
w
})));
}
}
}
}
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
square
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square_grad
,
k
ARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
square_grad
,
k
Host
,
kFloat
,
kNCHW
,
def
);
lite/tests/kernels/elementwise_grad_compute_test.cc
浏览文件 @
a30b9743
...
...
@@ -215,18 +215,6 @@ class ElementwiseAddGradTester {
fill_data_rand
(
y
.
data
(),
-
1.
f
,
1.
f
,
y_dims_
.
production
());
this
->
run_forward
(
&
param_
,
&
kernel_
,
x
,
y
,
out
.
data
());
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"x_"
<<
i
<<
": "
<<
x
[
i
];
}
for
(
int
i
=
0
;
i
<
y_dims_
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"y_"
<<
i
<<
": "
<<
y
[
i
];
}
for
(
int
i
=
0
;
i
<
out_dims_
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"out_"
<<
i
<<
": "
<<
out
[
i
];
}
// backward
std
::
vector
<
float
>
out_grad
(
out_dims_
.
production
());
std
::
vector
<
float
>
x_grad
(
x_dims_
.
production
());
...
...
@@ -242,14 +230,6 @@ class ElementwiseAddGradTester {
x_grad
.
data
(),
y_grad
.
data
());
for
(
int
i
=
0
;
i
<
x_grad
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"x_grad_"
<<
i
<<
": "
<<
x_grad
[
i
];
}
for
(
int
i
=
0
;
i
<
y_grad
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"y_grad_"
<<
i
<<
": "
<<
y_grad
[
i
];
}
// get numeric gradient
std
::
vector
<
float
>
x_delta
(
x_dims_
.
production
());
std
::
vector
<
float
>
y_delta
(
y_dims_
.
production
());
...
...
@@ -443,18 +423,6 @@ class ElementwiseSubGradTester {
fill_data_rand
(
y
.
data
(),
-
1.
f
,
1.
f
,
y_dims_
.
production
());
this
->
run_forward
(
&
param_
,
&
kernel_
,
x
,
y
,
out
.
data
());
for
(
int
i
=
0
;
i
<
x_dims_
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"x_"
<<
i
<<
": "
<<
x
[
i
];
}
for
(
int
i
=
0
;
i
<
y_dims_
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"y_"
<<
i
<<
": "
<<
y
[
i
];
}
for
(
int
i
=
0
;
i
<
out_dims_
.
production
();
i
++
)
{
LOG
(
INFO
)
<<
"out_"
<<
i
<<
": "
<<
out
[
i
];
}
// backward
std
::
vector
<
float
>
out_grad
(
out_dims_
.
production
());
std
::
vector
<
float
>
x_grad
(
x_dims_
.
production
());
...
...
@@ -470,14 +438,6 @@ class ElementwiseSubGradTester {
x_grad
.
data
(),
y_grad
.
data
());
for
(
int
i
=
0
;
i
<
x_grad
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"x_grad_"
<<
i
<<
": "
<<
x_grad
[
i
];
}
for
(
int
i
=
0
;
i
<
y_grad
.
size
();
i
++
)
{
LOG
(
INFO
)
<<
"y_grad_"
<<
i
<<
": "
<<
y_grad
[
i
];
}
// get numeric gradient
std
::
vector
<
float
>
x_delta
(
x_dims_
.
production
());
std
::
vector
<
float
>
y_delta
(
y_dims_
.
production
());
...
...
lite/tests/kernels/sequence_conv_compute_test.cc
浏览文件 @
a30b9743
...
...
@@ -85,21 +85,31 @@ class SequenceConvComputeTester : public arena::TestCase {
auto
output_dims
=
output
->
dims
();
auto
output_data
=
output
->
mutable_data
<
float
>
();
std
::
vector
<
std
::
vector
<
float
>>
res
;
if
(
contextStart_
==
-
2
)
{
if
(
contextStart_
==
-
2
&&
lod_
.
size
()
==
1
&&
lod_
[
0
]
==
std
::
vector
<
uint64_t
>
({
0
,
4
}))
{
res
=
{{
-
0.08867277
f
,
-
0.17257819
f
,
-
0.2564836
f
},
{
0.194508
f
,
0.05720823
f
,
-
0.08009153
f
},
{
0.73512584
f
,
0.5749428
f
,
0.41475973
f
},
{
0.5635012
f
,
0.49485126
f
,
0.42620137
f
}};
}
else
if
(
contextStart_
==
-
1
)
{
}
else
if
(
contextStart_
==
-
1
&&
lod_
.
size
()
==
1
&&
lod_
[
0
]
==
std
::
vector
<
uint64_t
>
({
0
,
4
}))
{
res
=
{{
0.194508
f
,
0.05720823
f
,
-
0.08009153
f
},
{
0.73512584
f
,
0.5749428
f
,
0.41475973
f
},
{
0.5635012
f
,
0.49485126
f
,
0.42620137
f
},
{
0.2517162
f
,
0.23646072
f
,
0.22120519
f
}};
}
else
if
(
contextStart_
==
0
)
{
}
else
if
(
contextStart_
==
0
&&
lod_
.
size
()
==
1
&&
lod_
[
0
]
==
std
::
vector
<
uint64_t
>
({
0
,
4
}))
{
res
=
{{
0.73512584
f
,
0.5749428
f
,
0.41475973
f
},
{
0.5635012
f
,
0.49485126
f
,
0.42620137
f
},
{
0.2517162
f
,
0.23646072
f
,
0.22120519
f
},
{
0.02574372
f
,
0.03337148
f
,
0.04099924
f
}};
}
else
if
(
contextStart_
==
-
1
&&
lod_
.
size
()
==
1
&&
lod_
[
0
]
==
std
::
vector
<
uint64_t
>
({
0
,
2
,
4
}))
{
res
=
{{
0.194508
,
0.05720823
,
-
0.08009153
},
{
0.7093821
,
0.57208234
,
0.43478262
},
{
0.19450802
,
0.17925248
,
0.16399695
},
{
0.2517162
,
0.23646072
,
0.22120519
}};
}
else
{
fprintf
(
stderr
,
"not supported contextStart_
\n
"
);
exit
(
-
1
);
...
...
@@ -136,12 +146,25 @@ void TestNormalCase(Place place, float abs_error = 2e-5) {
}
}
void
TestBatchCase
(
Place
place
,
float
abs_error
=
2e-5
)
{
std
::
vector
<
std
::
vector
<
uint64_t
>>
lod
{{
0
,
2
,
4
}};
std
::
vector
<
int64_t
>
dims
{
4
,
5
};
std
::
vector
<
int
>
candidate_pad_idx
{
-
1
};
for
(
int
pad_idx
:
candidate_pad_idx
)
{
std
::
unique_ptr
<
arena
::
TestCase
>
tester
(
new
SequenceConvComputeTester
(
place
,
"def"
,
lod
,
DDim
(
dims
),
pad_idx
,
1
,
3
,
3
));
arena
::
Arena
arena
(
std
::
move
(
tester
),
place
,
abs_error
);
arena
.
TestPrecision
();
}
}
TEST
(
sequence_conv
,
precision
)
{
#ifdef LITE_WITH_ARM
float
abs_error
=
2e-5
;
Place
place
(
TARGET
(
kARM
));
TestNormalCase
(
place
,
abs_error
);
TestBatchCase
(
place
,
abs_error
);
#endif
}
...
...
lite/tools/build_android.sh
浏览文件 @
a30b9743
...
...
@@ -269,6 +269,7 @@ function main {
if
[
-z
"
$1
"
]
;
then
# compiling result contains light_api lib only, recommanded.
make_tiny_publish_so
$ARCH
$TOOLCHAIN
$ANDROID_STL
exit
0
fi
# Parse command line.
...
...
@@ -358,6 +359,7 @@ function main {
done
# compiling result contains light_api lib only, recommanded.
make_tiny_publish_so
exit
0
}
main
$@
lite/tools/build_bm.sh
浏览文件 @
a30b9743
...
...
@@ -43,7 +43,7 @@ function prepare_thirdparty {
# clone bmlibs
if
[
!
-d
${
workspace
}
/third-party/bmlibs
]
;
then
git clone https://github.com/AnBaolei1984/bmlibs.git
${
workspace
}
/third-party/bmlibs
fi
fi
}
# for code gen, a source file is generated after a test, but is dependended by some targets in cmake.
...
...
@@ -70,6 +70,13 @@ function build_bm {
mkdir
-p
$build_dir
cd
$build_dir
if
[
$TARGET_NAME
==
"BM1684"
]
;
then
BM_SDK_ROOT
=
"
$workspace
/third-party/bmlibs/bm_sc5_libs"
else
BM_SDK_ROOT
=
"
$workspace
/third-party/bmlibs/bm_sc3_libs"
fi
echo
$BM_SDK_ROOT
prepare_workspace
cmake ..
\
${
CMAKE_COMMON_OPTIONS
}
\
...
...
@@ -95,17 +102,7 @@ function main {
case
$i
in
--target_name
=
*
)
TARGET_NAME
=
"
${
i
#*=
}
"
shift
;;
#--bm_sdk_root=*)
# BM_SDK_ROOT="${i#*=}"
# shift
# ;;
bm
)
build_bm
shift
;;
*
)
# unknown option
print_usage
exit
1
...
...
lite/tools/build_ios.sh
浏览文件 @
a30b9743
...
...
@@ -152,6 +152,7 @@ function main {
esac
done
make_ios
$ARCH
exit
0
}
main
$@
lite/tools/check_api_approvals.sh
浏览文件 @
a30b9743
...
...
@@ -71,7 +71,7 @@ function CheckLibSizeDiff() {
if
[
$diff_size
-gt
10485
]
;
then
echo_line
=
"Your PR has increased basic inference lib for
$diff_size
Byte, exceeding maximum requirement of 10485 Byte (0.01M). You need Superjomn's (Yunchunwei) approval or you can contact DannyIsFunny(HuZhiqiang).
\n
"
echo
"****************"
echo
-e
"
${
echo_li
st
[@]
}
"
echo
-e
"
${
echo_li
ne
[@]
}
"
echo
"There is an approved errors."
echo
"****************"
exit
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录