Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
b0de9835
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
b0de9835
编写于
6月 27, 2019
作者:
S
Shixiaowei02
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'incubate/lite' of
http://10.87.145.36/inference/paddlelite
into shixiaowei02/calib
上级
5e215995
b184667e
变更
37
隐藏空白更改
内联
并排
Showing
37 changed file
with
1120 addition
and
100 deletion
+1120
-100
paddle/fluid/inference/analysis/dot.h
paddle/fluid/inference/analysis/dot.h
+1
-1
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+6
-3
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+14
-6
paddle/fluid/lite/api/cxx_api_bin.cc
paddle/fluid/lite/api/cxx_api_bin.cc
+17
-9
paddle/fluid/lite/api/cxx_api_bin_int8.cc
paddle/fluid/lite/api/cxx_api_bin_int8.cc
+77
-0
paddle/fluid/lite/api/model_optimize_tool.cc
paddle/fluid/lite/api/model_optimize_tool.cc
+76
-0
paddle/fluid/lite/api/paddle_api_test.cc
paddle/fluid/lite/api/paddle_api_test.cc
+2
-0
paddle/fluid/lite/api/paddle_place.h
paddle/fluid/lite/api/paddle_place.h
+1
-1
paddle/fluid/lite/api/paddle_use_kernels.h
paddle/fluid/lite/api/paddle_use_kernels.h
+7
-0
paddle/fluid/lite/api/paddle_use_ops.h
paddle/fluid/lite/api/paddle_use_ops.h
+4
-0
paddle/fluid/lite/api/paddle_use_passes.h
paddle/fluid/lite/api/paddle_use_passes.h
+2
-0
paddle/fluid/lite/core/CMakeLists.txt
paddle/fluid/lite/core/CMakeLists.txt
+1
-1
paddle/fluid/lite/core/mir/CMakeLists.txt
paddle/fluid/lite/core/mir/CMakeLists.txt
+2
-0
paddle/fluid/lite/core/mir/fusion/fc_fuser.cc
paddle/fluid/lite/core/mir/fusion/fc_fuser.cc
+1
-1
paddle/fluid/lite/core/mir/precision_cast_transform_pass.cc
paddle/fluid/lite/core/mir/precision_cast_transform_pass.cc
+166
-0
paddle/fluid/lite/core/mir/precision_cast_transform_pass.h
paddle/fluid/lite/core/mir/precision_cast_transform_pass.h
+61
-0
paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
+51
-7
paddle/fluid/lite/core/mir/trans_weigths_pass.cc
paddle/fluid/lite/core/mir/trans_weigths_pass.cc
+171
-0
paddle/fluid/lite/core/mir/trans_weigths_pass.h
paddle/fluid/lite/core/mir/trans_weigths_pass.h
+85
-0
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+21
-18
paddle/fluid/lite/gen_code/CMakeLists.txt
paddle/fluid/lite/gen_code/CMakeLists.txt
+2
-0
paddle/fluid/lite/gen_code/gen_code.cc
paddle/fluid/lite/gen_code/gen_code.cc
+15
-0
paddle/fluid/lite/gen_code/paddle_code_generator.cc
paddle/fluid/lite/gen_code/paddle_code_generator.cc
+49
-0
paddle/fluid/lite/kernels/arm/CMakeLists.txt
paddle/fluid/lite/kernels/arm/CMakeLists.txt
+2
-1
paddle/fluid/lite/kernels/arm/calib_compute.cc
paddle/fluid/lite/kernels/arm/calib_compute.cc
+29
-22
paddle/fluid/lite/kernels/arm/calib_compute.h
paddle/fluid/lite/kernels/arm/calib_compute.h
+15
-2
paddle/fluid/lite/kernels/arm/calib_compute_test.cc
paddle/fluid/lite/kernels/arm/calib_compute_test.cc
+2
-1
paddle/fluid/lite/kernels/arm/conv_compute.cc
paddle/fluid/lite/kernels/arm/conv_compute.cc
+29
-4
paddle/fluid/lite/kernels/arm/fc_compute.cc
paddle/fluid/lite/kernels/arm/fc_compute.cc
+115
-2
paddle/fluid/lite/kernels/arm/fc_compute.h
paddle/fluid/lite/kernels/arm/fc_compute.h
+23
-0
paddle/fluid/lite/operators/calib_op.cc
paddle/fluid/lite/operators/calib_op.cc
+2
-6
paddle/fluid/lite/operators/calib_op_test.cc
paddle/fluid/lite/operators/calib_op_test.cc
+3
-5
paddle/fluid/lite/operators/conv_op.h
paddle/fluid/lite/operators/conv_op.h
+11
-0
paddle/fluid/lite/operators/fc_op.h
paddle/fluid/lite/operators/fc_op.h
+11
-0
paddle/fluid/lite/operators/op_params.h
paddle/fluid/lite/operators/op_params.h
+11
-8
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+33
-0
paddle/fluid/lite/utils/string.h
paddle/fluid/lite/utils/string.h
+2
-2
未找到文件。
paddle/fluid/inference/analysis/dot.h
浏览文件 @
b0de9835
...
...
@@ -25,7 +25,7 @@
#include <vector>
// #include "paddle/fluid/lite/utils/logging.h"
// #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include <glog/logging.h>
#include <glog/logging.h>
// NOLINT
// #endif
namespace
paddle
{
...
...
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
b0de9835
...
...
@@ -104,7 +104,7 @@ file(WRITE ${offline_lib_registry_file} "") # clean
# LIGHT_DEPS: LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
# HVY_DEPS: NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
function
(
lite_cc_library TARGET
)
set
(
options
""
)
set
(
options
STATIC static SHARED shared
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS
HVY_DEPS ARGS
)
...
...
@@ -120,8 +120,11 @@ function(lite_cc_library TARGET)
LIGHT_DEPS
${
args_LIGHT_DEPS
}
HVY_DEPS
${
args_HVY_DEPS
}
)
cc_library
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
)
if
(
${
args_SHARED
}
OR
${
args_shared
}
)
cc_library
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
SHARED
)
else
()
cc_library
(
${
TARGET
}
SRCS
${
args_SRCS
}
DEPS
${
deps
}
${
args_DEPS
}
)
endif
()
# collect targets need to compile for lite
add_dependencies
(
lite_compile_deps
${
TARGET
}
)
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
b0de9835
...
...
@@ -100,14 +100,12 @@ lite_cc_test(test_apis_lite SRCS apis_test.cc
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/lite_naive_model
--optimized_model=
${
LITE_MODEL_DIR
}
/lite_naive_model_opt SERIAL
)
lite_cc_library
(
cxx_api_impl_lite SRCS cxx_api_impl.cc DEPS cxx_api_lite
)
lite_cc_library
(
light_api_impl_lite SRCS light_api_impl.cc DEPS light_api_lite
)
lite_cc_library
(
paddle_api_lite SRCS paddle_api.cc DEPS op_params_lite
)
lite_cc_library
(
paddle_api_full SRCS
paddle_api.cc DEPS cxx_api_impl_lite light_api_impl
_lite
)
lite_cc_library
(
paddle_api_light SRCS
paddle_api.cc DEPS light_api_impl
_lite
)
lite_cc_library
(
paddle_api_full SRCS
cxx_api_impl.cc DEPS cxx_api_lite paddle_api_lite light_api
_lite
)
lite_cc_library
(
paddle_api_light SRCS
light_api_impl.cc DEPS light_api_lite paddle_api
_lite
)
lite_cc_test
(
test_paddle_api_lite SRCS paddle_api_test.cc DEPS cxx_api_lite light_api_lite paddle_api_full
lite_cc_test
(
test_paddle_api_lite SRCS paddle_api_test.cc DEPS paddle_api_full paddle_api_light
${
ops_lite
}
ARM_DEPS
${
arm_kernels
}
X86_DEPS
${
x86_kernels
}
...
...
@@ -120,3 +118,13 @@ endif()
#X86_DEPS operator
#DEPS light_api_lite model_parser_lite target_wrapper_host mir_passes
#ARM_DEPS ${arm_kernels})
lite_cc_binary
(
cxx_api_lite_bin SRCS cxx_api_bin_int8.cc
DEPS
cxx_api_lite
model_parser_lite
target_wrapper_host
mir_passes
${
ops_lite
}
${
host_kernels
}
ARM_DEPS
${
arm_kernels
}
)
lite_cc_binary
(
model_optimize_tool SRCS model_optimize_tool.cc DEPS paddle_api_full
)
paddle/fluid/lite/api/cxx_api_bin.cc
浏览文件 @
b0de9835
...
...
@@ -29,16 +29,18 @@ double time_diff(Time t1, Time t2) {
return
counter
.
count
()
/
1000.0
;
}
void
Run
(
const
char
*
model_dir
,
int
repeat
,
int
thread_num
)
{
void
Run
(
const
char
*
model_dir
,
int
repeat
)
{
#ifdef LITE_WITH_ARM
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
thread_num
);
#endif
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kInt8
)},
});
predictor
.
Build
(
model_dir
,
Place
{
TARGET
(
kARM
),
PRECISION
(
k
Float
)},
predictor
.
Build
(
model_dir
,
Place
{
TARGET
(
kARM
),
PRECISION
(
k
Int8
)},
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
...
...
@@ -48,8 +50,6 @@ void Run(const char* model_dir, int repeat, int thread_num) {
data
[
i
]
=
1
;
}
for
(
int
i
=
0
;
i
<
10
;
i
++
)
predictor
.
Run
();
auto
time1
=
time
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
predictor
.
Run
();
auto
time2
=
time
();
...
...
@@ -68,8 +68,8 @@ void Run(const char* model_dir, int repeat, int thread_num) {
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
CHECK_EQ
(
argc
,
4
)
<<
"usage: ./cmd <model_dir> <repeat> <thread_num
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
])
,
std
::
stoi
(
argv
[
3
])
);
CHECK_EQ
(
argc
,
3
)
<<
"usage: ./cmd <model_dir> <repeat
>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
]));
return
0
;
}
...
...
@@ -93,13 +93,18 @@ USE_LITE_OP(fake_dequantize_max_abs);
USE_LITE_KERNEL
(
feed
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_KERNEL
(
fetch
,
kHost
,
kAny
,
kAny
,
def
);
USE_LITE_OP
(
calib
);
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
fc
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
fc
,
kARM
,
kInt8
,
kNCHW
,
int8out
);
USE_LITE_KERNEL
(
fc
,
kARM
,
kInt8
,
kNCHW
,
fp32out
);
USE_LITE_KERNEL
(
mul
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
scale
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
int8_out
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
fp32_out
);
USE_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
relu
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kFloat
,
kNCHW
,
def
);
...
...
@@ -107,6 +112,9 @@ USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
elementwise_add
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
softmax
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
fp32_to_int8
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
int8_to_fp32
);
// USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
// USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
#endif // LITE_WITH_ARM
...
...
paddle/fluid/lite/api/cxx_api_bin_int8.cc
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/api/cxx_api.h"
#include <chrono> // NOLINT
#include "paddle/fluid/lite/api/paddle_use_kernels.h"
#include "paddle/fluid/lite/api/paddle_use_ops.h"
#include "paddle/fluid/lite/api/paddle_use_passes.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
}
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
Run
(
const
char
*
model_dir
,
int
repeat
)
{
#ifdef LITE_WITH_ARM
DeviceInfo
::
Init
();
#endif
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kInt8
)},
});
predictor
.
Build
(
model_dir
,
Place
{
TARGET
(
kARM
),
PRECISION
(
kInt8
)},
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
input_tensor
->
dims
().
production
();
i
++
)
{
data
[
i
]
=
1
;
}
auto
time1
=
time
();
for
(
int
i
=
0
;
i
<
repeat
;
i
++
)
predictor
.
Run
();
auto
time2
=
time
();
std
::
cout
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
repeat
<<
"ms"
<<
std
::
endl
;
auto
*
out
=
predictor
.
GetOutput
(
0
);
LOG
(
INFO
)
<<
out
<<
" memory size "
<<
out
->
data_size
();
LOG
(
INFO
)
<<
"out "
<<
out
->
data
<
float
>
()[
0
];
LOG
(
INFO
)
<<
"out "
<<
out
->
data
<
float
>
()[
1
];
LOG
(
INFO
)
<<
"dims "
<<
out
->
dims
();
LOG
(
INFO
)
<<
"out data size: "
<<
out
->
data_size
();
}
}
// namespace lite
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
CHECK_EQ
(
argc
,
3
)
<<
"usage: ./cmd <model_dir> <repeat>"
;
paddle
::
lite
::
Run
(
argv
[
1
],
std
::
stoi
(
argv
[
2
]));
return
0
;
}
paddle/fluid/lite/api/model_optimize_tool.cc
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "paddle/fluid/lite/api/paddle_api.h"
#include "paddle/fluid/lite/api/paddle_use_kernels.h"
#include "paddle/fluid/lite/api/paddle_use_ops.h"
#include "paddle/fluid/lite/api/paddle_use_passes.h"
#include "paddle/fluid/lite/utils/string.h"
DEFINE_string
(
model_dir
,
""
,
"path of the model"
);
DEFINE_string
(
optimize_out
,
""
,
"path of the output optimized model"
);
DEFINE_string
(
valid_targets
,
"ARM"
,
"The targets this model optimized for, should be one of (arm, "
"opencl, x86), splitted by space"
);
DEFINE_bool
(
int8_mode
,
false
,
"Support Int8 quantitative mode"
);
namespace
paddle
{
namespace
lite_api
{
void
Main
()
{
lite_api
::
CxxConfig
config
;
config
.
set_model_dir
(
FLAGS_model_dir
);
std
::
vector
<
Place
>
valid_places
;
auto
target_reprs
=
lite
::
Split
(
FLAGS_valid_targets
,
" "
);
for
(
auto
&
target_repr
:
target_reprs
)
{
if
(
target_repr
==
"arm"
)
{
valid_places
.
emplace_back
(
TARGET
(
kARM
));
}
else
if
(
target_repr
==
"opencl"
)
{
valid_places
.
emplace_back
(
TARGET
(
kOpenCL
));
}
else
if
(
target_repr
==
"x86"
)
{
valid_places
.
emplace_back
(
TARGET
(
kX86
));
}
else
{
LOG
(
FATAL
)
<<
lite
::
string_format
(
"Wrong target '%s' found, please check the command flag "
"'valid_targets'"
,
target_repr
.
c_str
());
}
}
CHECK
(
!
valid_places
.
empty
())
<<
"At least one target should be set, should set the "
"command argument 'valid_targets'"
;
if
(
FLAGS_int8_mode
)
{
LOG
(
WARNING
)
<<
"Int8 mode is only support by ARM target"
;
valid_places
.
push_back
(
Place
{
TARGET
(
kARM
),
PRECISION
(
kInt8
)});
config
.
set_preferred_place
(
Place
{
TARGET
(
kARM
),
PRECISION
(
kInt8
)});
}
config
.
set_valid_places
(
valid_places
);
auto
predictor
=
lite_api
::
CreatePaddlePredictor
(
config
);
predictor
->
SaveOptimizedModel
(
FLAGS_optimize_out
);
}
}
// namespace lite_api
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
paddle
::
lite_api
::
Main
();
return
0
;
}
paddle/fluid/lite/api/paddle_api_test.cc
浏览文件 @
b0de9835
...
...
@@ -56,6 +56,7 @@ TEST(CxxApi, run) {
predictor
->
SaveOptimizedModel
(
FLAGS_model_dir
+
".opt2"
);
}
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
TEST
(
LightApi
,
run
)
{
lite_api
::
MobileConfig
config
;
config
.
set_model_dir
(
FLAGS_model_dir
+
".opt2"
);
...
...
@@ -79,6 +80,7 @@ TEST(LightApi, run) {
EXPECT_NEAR
(
out
[
0
],
50.2132
,
1e-3
);
EXPECT_NEAR
(
out
[
1
],
-
28.8729
,
1e-3
);
}
#endif
}
// namespace lite_api
}
// namespace paddle
paddle/fluid/lite/api/paddle_place.h
浏览文件 @
b0de9835
...
...
@@ -83,7 +83,7 @@ struct Place {
int16_t
device
{
0
};
// device ID
Place
()
=
default
;
Place
(
TargetType
target
,
PrecisionType
precision
,
Place
(
TargetType
target
,
PrecisionType
precision
=
PRECISION
(
kFloat
)
,
DataLayoutType
layout
=
DATALAYOUT
(
kNCHW
),
int16_t
device
=
0
)
:
target
(
target
),
precision
(
precision
),
layout
(
layout
),
device
(
device
)
{}
...
...
paddle/fluid/lite/api/paddle_use_kernels.h
浏览文件 @
b0de9835
...
...
@@ -38,6 +38,13 @@ USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
transpose
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
transpose2
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
batch_norm
,
kARM
,
kFloat
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
fp32_to_int8
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
int8_to_fp32
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
int8_out
);
USE_LITE_KERNEL
(
conv2d
,
kARM
,
kInt8
,
kNCHW
,
fp32_out
);
USE_LITE_KERNEL
(
fc
,
kARM
,
kInt8
,
kNCHW
,
int8out
);
USE_LITE_KERNEL
(
fc
,
kARM
,
kInt8
,
kNCHW
,
fp32out
);
#endif
#ifdef LITE_WITH_X86
...
...
paddle/fluid/lite/api/paddle_use_ops.h
浏览文件 @
b0de9835
...
...
@@ -38,3 +38,7 @@ USE_LITE_OP(batch_norm)
USE_LITE_OP
(
fusion_elementwise_sub_activation
)
USE_LITE_OP
(
transpose
)
USE_LITE_OP
(
transpose2
)
USE_LITE_OP
(
fake_quantize_moving_average_abs_max
);
USE_LITE_OP
(
fake_dequantize_max_abs
);
USE_LITE_OP
(
calib
);
paddle/fluid/lite/api/paddle_use_passes.h
浏览文件 @
b0de9835
...
...
@@ -31,3 +31,5 @@ USE_MIR_PASS(identity_scale_eliminate_pass);
USE_MIR_PASS
(
lite_conv_elementwise_add_activation_fuse_pass
);
USE_MIR_PASS
(
lite_elementwise_add_activation_fuse_pass
);
USE_MIR_PASS
(
lite_quant_dequant_fuse_pass
);
USE_MIR_PASS
(
precision_cast_transform_pass
);
USE_MIR_PASS
(
trans_weight_pass
);
paddle/fluid/lite/core/CMakeLists.txt
浏览文件 @
b0de9835
...
...
@@ -31,7 +31,7 @@ cc_library(types_lite SRCS types.cc)
cc_library
(
type_system SRCS type_system.cc DEPS
${
tensor_lite
}
target_wrapper_lite
)
lite_cc_library
(
program_lite SRCS program.cc
DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite
DEPS op_lite kernel_lite compatible_pb_lite model_parser_lite
${
ops_lite
}
HVY_DEPS framework_proto
PROFILE_DEPS basic_profiler_lite
)
cc_library
(
optimizer_lite SRCS optimizer.cc DEPS mir_pass_manager model_parser_lite program_lite
)
...
...
paddle/fluid/lite/core/mir/CMakeLists.txt
浏览文件 @
b0de9835
...
...
@@ -18,10 +18,12 @@ cc_library(mir_passes
static_kernel_pick_pass.cc
variable_place_inference_pass.cc
type_target_transform_pass.cc
precision_cast_transform_pass.cc
io_copy_kernel_pick_pass.cc
graph_visualize_pass.cc
generate_program_pass.cc
argument_type_display_pass.cc
trans_weigths_pass.cc
demo_pass.cc
runtime_context_assign_pass.cc
DEPS mir_pass types_lite context_lite
${
mir_fusers
}
)
...
...
paddle/fluid/lite/core/mir/fusion/fc_fuser.cc
浏览文件 @
b0de9835
...
...
@@ -60,7 +60,7 @@ void FcFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) {
}
cpp
::
OpDesc
FcFuser
::
GenOpDesc
(
const
key2nodes_t
&
matched
)
{
cpp
::
OpDesc
op_desc
;
cpp
::
OpDesc
op_desc
=
*
matched
.
at
(
"mul"
)
->
stmt
()
->
op_info
()
;
op_desc
.
SetType
(
"fc"
);
op_desc
.
SetInput
(
"Input"
,
{
matched
.
at
(
"x"
)
->
arg
()
->
name
});
op_desc
.
SetInput
(
"W"
,
{
matched
.
at
(
"W"
)
->
arg
()
->
name
});
...
...
paddle/fluid/lite/core/mir/precision_cast_transform_pass.cc
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/mir/precision_cast_transform_pass.h"
#include <list>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
void
PrecisionCastPass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
// Start from inputs of the graph, those should have place set.
std
::
list
<
Node
*>
nodes
;
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
nodes
.
push_back
(
&
node
);
}
for
(
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsStmt
())
continue
;
auto
inlinks
=
node
->
inlinks
;
for
(
auto
*
in
:
inlinks
)
{
ComplementInputs
(
graph
.
get
(),
node
,
in
);
}
}
VLOG
(
3
)
<<
"
\n
"
<<
Visualize
(
graph
.
get
());
}
void
PrecisionCastPass
::
ComplementInputs
(
SSAGraph
*
graph
,
Node
*
inst_node
,
Node
*
in
)
{
// If this input is out of date.
if
(
inst_node
->
inlinks
.
end
()
==
std
::
find
(
inst_node
->
inlinks
.
begin
(),
inst_node
->
inlinks
.
end
(),
in
))
return
;
CHECK
(
inst_node
->
IsStmt
());
auto
&
inst
=
inst_node
->
AsStmt
();
CHECK
(
in
->
IsRoleSet
());
CHECK
(
in
->
IsArg
());
auto
in_arg_name
=
in
->
AsArg
().
name
;
std
::
string
tmp
;
CHECK
(
inst
.
op_info
()
->
GetInputArgname
(
in_arg_name
,
&
tmp
));
auto
decl_arg_type
=
inst
.
picked_kernel
().
GetInputDeclType
(
tmp
);
CHECK
(
in
->
AsArg
().
type
);
LOG
(
INFO
)
<<
inst
.
picked_kernel
().
name
();
// if (!in->AsArg().is_weight && !PrecisionCompatibleTo(*in->AsArg().type,
// *decl_arg_type)) {
if
(
!
PrecisionCompatibleTo
(
*
in
->
AsArg
().
type
,
*
decl_arg_type
))
{
LOG
(
INFO
)
<<
"found Target unmatched tensor: "
<<
in
->
AsArg
().
name
<<
" for kernel "
<<
inst
.
op
()
->
DebugString
()
<<
" "
<<
*
in
->
AsArg
().
type
<<
" -> "
<<
*
decl_arg_type
;
// Add an Cast instruction to make the input compatible with other dist.
AddCastInst
(
*
in
->
AsArg
().
type
,
*
decl_arg_type
,
in
,
graph
,
inst_node
,
graph
->
valid_places
());
}
}
void
PrecisionCastPass
::
AddCastInst
(
const
Type
&
from
,
const
Type
&
to
,
Node
*
in
,
SSAGraph
*
graph
,
Node
*
inst_node
,
const
std
::
vector
<
Place
>&
valid_places
)
{
CHECK
(
!
valid_places
.
empty
())
<<
"valid_place should be set"
;
// var -> new_transform_op -> new_var -> inst
// So there will be a new Argument node and a new Cast Statement Node.
CHECK
(
in
->
IsArg
());
auto
node_id
=
[
&
]
{
return
graph
->
nodes
().
size
();
};
auto
cast_op_output_name
=
in
->
AsArg
().
name
+
"/trans/"
+
std
::
to_string
(
node_id
());
auto
*
cast_op_output_arg
=
graph
->
NewArgumentNode
(
cast_op_output_name
);
auto
*
cast_inst
=
graph
->
NewInstructNode
();
// create Op and kernels.
auto
cast_op
=
LiteOpRegistry
::
Global
().
Create
(
"calib"
);
CHECK
(
cast_op
)
<<
"create op ["
<<
cast_op
<<
"] failed"
;
// Create the new var manually.
inst_node
->
AsStmt
().
op
()
->
scope
()
->
Var
(
cast_op_output_name
);
// Create Calib Instruction.
cpp
::
OpDesc
op_desc
;
op_desc
.
SetType
(
"calib"
);
op_desc
.
SetInput
(
"Input"
,
{
in
->
AsArg
().
name
});
op_desc
.
SetOutput
(
"Out"
,
{
cast_op_output_name
});
CHECK
(
inst_node
->
AsStmt
().
op_info
()
->
HasAttr
(
"input_scale"
));
op_desc
.
SetAttr
(
"scale"
,
inst_node
->
AsStmt
().
op_info
()
->
GetAttr
<
float
>
(
"input_scale"
));
cast_op
->
Attach
(
op_desc
,
inst_node
->
AsStmt
().
op
()
->
scope
());
auto
kernels
=
cast_op
->
CreateKernels
(
valid_places
);
std
::
vector
<
std
::
unique_ptr
<
KernelBase
>>
selected_kernels
;
bool
is_found
=
false
;
for
(
auto
&
kernel
:
kernels
)
{
const
Type
*
in_arg_ty
=
kernel
->
GetInputDeclType
(
"Input"
);
const
Type
*
out_arg_ty
=
kernel
->
GetOutputDeclType
(
"Out"
);
if
(
in_arg_ty
->
precision
()
==
from
.
precision
()
&&
out_arg_ty
->
precision
()
==
to
.
precision
())
{
is_found
=
true
;
selected_kernels
.
emplace_back
(
std
::
move
(
kernel
));
// we pick the kernel
cast_inst
->
AsStmt
(
"calib"
,
std
::
move
(
selected_kernels
),
cast_op
);
break
;
}
}
CHECK
(
is_found
)
<<
"Can't find a Cast kernel for Cast op: "
<<
from
<<
":"
<<
in
->
AsArg
().
name
<<
"->"
<<
to
<<
":"
<<
inst_node
->
AsStmt
().
op_info
()
->
Type
();
// Remove the old link
RemoveDirectedLink
(
in
,
inst_node
);
// Update the original instruction OpDesc.
// Update its input to the io_copy_output_name
// Add new link, var -> new_inst, new_inst->newarg, newarg->inst
DirectedLink
(
in
,
cast_inst
);
DirectedLink
(
cast_inst
,
cast_op_output_arg
);
DirectedLink
(
cast_op_output_arg
,
inst_node
);
// reset opdesc and update kernel information
UpdateInputTo
(
inst_node
->
AsStmt
().
op
()
->
mutable_op_info
(),
in
->
AsArg
().
name
,
cast_op_output_name
);
// recreate the op
auto
original_selected_kernel
=
std
::
move
(
inst_node
->
AsStmt
().
kernels
().
front
());
auto
updated_op_info
=
*
inst_node
->
AsStmt
().
mutable_op_info
();
inst_node
->
AsStmt
().
ResetOp
(
updated_op_info
,
graph
->
valid_places
());
inst_node
->
AsStmt
().
kernels
().
clear
();
inst_node
->
AsStmt
().
kernels
().
emplace_back
(
std
::
move
(
original_selected_kernel
));
for
(
auto
&
kernel
:
inst_node
->
AsStmt
().
kernels
())
{
LOG
(
INFO
)
<<
"kernel info: "
<<
kernel
->
name
();
inst_node
->
AsStmt
().
op
()
->
AttachKernel
(
kernel
.
get
());
}
graph
->
CheckValid
();
}
void
PrecisionCastPass
::
SetValidPlaces
(
const
std
::
vector
<
Place
>&
valid_places
)
{
CHECK
(
!
valid_places
.
empty
());
valid_places_
=
valid_places
;
}
}
// namespace mir
}
// namespace lite
}
// namespace paddle
REGISTER_MIR_PASS
(
precision_cast_transform_pass
,
paddle
::
lite
::
mir
::
PrecisionCastPass
);
paddle/fluid/lite/core/mir/precision_cast_transform_pass.h
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/core/mir/pass.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
static
void
UpdateInputTo
(
cpp
::
OpDesc
*
desc
,
const
std
::
string
&
from
,
const
std
::
string
&
to
)
{
for
(
auto
&
item
:
*
desc
->
mutable_inputs
())
{
for
(
auto
&
input
:
item
.
second
)
{
if
(
input
==
from
)
{
input
=
to
;
}
}
}
}
/*
* The pass complement the necessary instruction to make data
* transferring or transformation between different places.
*/
class
PrecisionCastPass
:
public
ProgramPass
{
public:
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
void
ComplementInputs
(
SSAGraph
*
graph
,
Node
*
inst_node
,
Node
*
in
);
void
AddCastInst
(
const
Type
&
from
,
const
Type
&
to
,
Node
*
in
,
SSAGraph
*
graph
,
Node
*
inst_node
,
const
std
::
vector
<
Place
>&
valid_places
);
void
SetValidPlaces
(
const
std
::
vector
<
Place
>&
valid_places
);
const
std
::
vector
<
Place
>&
valid_places
()
const
{
return
valid_places_
;
}
private:
std
::
vector
<
Place
>
valid_places_
;
};
}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/mir/static_kernel_pick_pass.cc
浏览文件 @
b0de9835
...
...
@@ -33,9 +33,12 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
<<
"kernel_pick_factors should be specified first"
;
CHECK
(
graph
)
<<
"graph not valid"
;
// sort kernels by the factors.
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
if
(
!
node
.
IsStmt
())
continue
;
auto
&
instruct
=
node
.
AsStmt
();
// Get candidate kernels
std
::
vector
<
std
::
pair
<
size_t
,
std
::
unique_ptr
<
KernelBase
>>>
scored
;
CHECK
(
!
instruct
.
kernels
().
empty
())
<<
"No kernels found for "
<<
instruct
.
op_type
();
...
...
@@ -43,15 +46,56 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
size_t
score
=
KernelGrade
(
*
kernel
);
scored
.
emplace_back
(
score
,
std
::
move
(
kernel
));
}
std
::
sort
(
scored
.
begin
(),
scored
.
end
(),
KernelScoreCmp
);
// Move kernel back
// Just keep a single best kernel.
// TODO(Superjomn) reconsider this.
instruct
.
kernels
().
clear
();
instruct
.
kernels
().
emplace_back
(
std
::
move
(
scored
.
front
().
second
));
VLOG
(
2
)
<<
"pick "
<<
instruct
.
kernels
().
front
()
->
name
();
if
(
!
instruct
.
op_info
()
->
HasAttr
(
"enable_int8"
))
{
// Move kernel back
// Just keep a single best kernel.
// TODO(Superjomn) reconsider this.
instruct
.
kernels
().
emplace_back
(
std
::
move
(
scored
.
front
().
second
));
VLOG
(
2
)
<<
"pick "
<<
instruct
.
kernels
().
front
()
->
name
();
}
else
{
bool
out_type_int8
=
true
;
// Only if all ops linked to this op output has enable_int8 attr,
// then the op output type is int8, or fp32.
for
(
auto
*
out_n
:
node
.
outlinks
)
{
CHECK
(
out_n
->
IsArg
());
for
(
auto
*
tmp_op
:
out_n
->
outlinks
)
{
CHECK
(
tmp_op
->
IsStmt
());
if
(
!
tmp_op
->
AsStmt
().
op_info
()
->
HasAttr
(
"enable_int8"
))
{
out_type_int8
=
false
;
break
;
}
}
if
(
!
out_type_int8
)
break
;
}
// According to the out type, we pick the kernel.
auto
output_arguments
=
instruct
.
op_info
()
->
OutputArgumentNames
();
for
(
auto
&
candidate
:
scored
)
{
bool
all_output_type_match
=
true
;
auto
expect_output_type
=
out_type_int8
?
PRECISION
(
kInt8
)
:
PRECISION
(
kFloat
);
for
(
auto
&
arg_name
:
output_arguments
)
{
const
Type
*
out_arg_ty
=
candidate
.
second
->
GetOutputDeclType
(
arg_name
);
if
(
out_arg_ty
->
precision
()
!=
expect_output_type
)
{
all_output_type_match
=
false
;
}
}
if
(
all_output_type_match
)
{
instruct
.
kernels
().
emplace_back
(
std
::
move
(
candidate
.
second
));
VLOG
(
2
)
<<
"pick "
<<
instruct
.
kernels
().
front
()
->
name
();
break
;
}
}
CHECK
(
!
instruct
.
kernels
().
empty
())
<<
"No kernels found for "
<<
instruct
.
op_type
();
}
}
}
...
...
paddle/fluid/lite/core/mir/trans_weigths_pass.cc
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/mir/trans_weigths_pass.h"
#include <list>
#include <string>
#include <utility>
#include <vector>
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
void
TransWeightPass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
// Start from inputs of the graph, those should have place set.
std
::
list
<
Node
*>
nodes
;
for
(
auto
&
node
:
graph
->
mutable_nodes
())
{
nodes
.
push_back
(
&
node
);
}
for
(
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsStmt
())
continue
;
auto
&
instruct
=
node
->
AsStmt
();
if
(
!
instruct
.
op_info
()
->
HasAttr
(
"enable_int8"
))
{
continue
;
}
std
::
vector
<
std
::
string
>
output_arg_names
=
instruct
.
op_info
()
->
output_argnames
();
CHECK
(
output_arg_names
.
size
()
==
1
)
<<
"Currently, the op that supports int8 supports only one output"
;
// After static kernel select pass, there is only one kernel here.
const
Type
*
out_arg_ty
=
instruct
.
kernels
()[
0
]
->
GetOutputDeclType
(
output_arg_names
[
0
]);
auto
out_precision
=
out_arg_ty
->
precision
();
bool
out_type_int8
=
out_precision
==
PRECISION
(
kInt8
)
?
true
:
false
;
float
in_scale
,
out_scale
;
in_scale
=
instruct
.
op_info
()
->
GetAttr
<
float
>
(
"input_scale"
);
// Get next input op's input_scale
if
(
out_type_int8
)
{
LOG
(
INFO
)
<<
"output_type_int8"
;
auto
out_node
=
node
->
outlinks
.
front
();
CHECK
(
out_node
->
IsArg
());
auto
one_adj_op_node
=
out_node
->
outlinks
.
front
();
CHECK
(
one_adj_op_node
->
IsStmt
());
auto
&
one_adj_instruct
=
one_adj_op_node
->
AsStmt
();
CHECK
(
one_adj_instruct
.
op_info
()
->
HasAttr
(
"enable_int8"
));
CHECK
(
one_adj_instruct
.
op_info
()
->
HasAttr
(
"input_scale"
));
out_scale
=
one_adj_instruct
.
op_info
()
->
GetAttr
<
float
>
(
"input_scale"
);
instruct
.
mutable_op_info
()
->
SetAttr
(
"output_scale"
,
out_scale
);
}
else
{
LOG
(
INFO
)
<<
"output_type_fp32"
;
}
std
::
string
op_type
=
instruct
.
op_info
()
->
Type
();
std
::
vector
<
float
>
weight_scale
;
auto
*
scope
=
instruct
.
op
()
->
scope
();
if
(
op_type
==
"depthwise_conv2d"
||
op_type
==
"conv2d"
)
{
std
::
string
weight_var_name
=
instruct
.
op_info
()
->
Input
(
"Filter"
).
front
();
auto
conv_weight_t
=
scope
->
FindVar
(
weight_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
// till now, all the weight should be float32 type
float
*
conv_weight_d
=
conv_weight_t
->
mutable_data
<
float
>
();
int64_t
axis_size
=
conv_weight_t
->
dims
()[
0
];
int64_t
inner_size
=
conv_weight_t
->
data_size
()
/
axis_size
;
weight_scale
=
GetWeightScale
(
conv_weight_d
,
axis_size
,
inner_size
,
127.0
);
Tensor
temp_tensor
;
temp_tensor
.
Resize
(
conv_weight_t
->
dims
());
int8_t
*
temp_data
=
temp_tensor
.
mutable_data
<
int8_t
>
();
FP32ToInt8
(
conv_weight_d
,
temp_data
,
weight_scale
.
data
(),
axis_size
,
1
,
inner_size
);
conv_weight_t
->
CopyDataFrom
(
temp_tensor
);
}
else
if
(
op_type
==
"fc"
||
op_type
==
"mul"
)
{
std
::
string
weight_arg_name
=
"W"
;
if
(
op_type
==
"mul"
)
weight_arg_name
=
"Y"
;
std
::
string
weight_var_name
=
instruct
.
op_info
()
->
Input
(
weight_arg_name
).
front
();
auto
fc_weight_t
=
scope
->
FindVar
(
weight_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
// till now, all the weight should be float32 type
float
*
fc_weight_d
=
fc_weight_t
->
mutable_data
<
float
>
();
CHECK_EQ
(
fc_weight_t
->
dims
().
size
(),
2UL
);
int64_t
h
=
fc_weight_t
->
dims
()[
0
];
int64_t
w
=
fc_weight_t
->
data_size
()
/
h
;
Tensor
trans_w_t
,
int8_temp_t
;
trans_w_t
.
CopyDataFrom
(
*
fc_weight_t
);
float
*
trans_w_data
=
trans_w_t
.
mutable_data
<
float
>
();
int8_temp_t
.
Resize
(
fc_weight_t
->
dims
());
int8_t
*
int8_temp_data
=
int8_temp_t
.
mutable_data
<
int8_t
>
();
// trans weight for calc the weight scale.
for
(
int
i
=
0
;
i
<
h
;
i
++
)
{
for
(
int
j
=
0
;
j
<
w
;
j
++
)
{
trans_w_data
[
i
*
w
+
j
]
=
fc_weight_d
[
j
*
h
+
i
];
}
}
weight_scale
=
GetWeightScale
(
trans_w_data
,
w
,
h
,
127.0
);
int8_t
*
fc_weight_int8_d
=
fc_weight_t
->
mutable_data
<
int8_t
>
();
FP32ToInt8
(
trans_w_data
,
int8_temp_data
,
weight_scale
.
data
(),
w
,
1
,
h
);
// Retrans back
for
(
int
i
=
0
;
i
<
w
;
i
++
)
{
for
(
int
j
=
0
;
j
<
h
;
j
++
)
{
fc_weight_int8_d
[
i
*
h
+
j
]
=
int8_temp_data
[
j
*
w
+
i
];
}
}
}
// Convert fp32 bias to int8 bias
std
::
vector
<
std
::
string
>
input_arg_names
=
instruct
.
op_info
()
->
InputArgumentNames
();
if
(
std
::
find
(
input_arg_names
.
begin
(),
input_arg_names
.
end
(),
"Bias"
)
!=
input_arg_names
.
end
()
&&
instruct
.
op_info
()
->
Input
(
"Bias"
).
size
()
>
0
)
{
std
::
string
bias_var_name
=
instruct
.
op_info
()
->
Input
(
"Bias"
).
front
();
auto
bias_weight_t
=
scope
->
FindVar
(
bias_var_name
)
->
GetMutable
<
lite
::
Tensor
>
();
float
*
bias_weight_d
=
bias_weight_t
->
mutable_data
<
float
>
();
Tensor
temp_bias
;
temp_bias
.
Resize
(
bias_weight_t
->
dims
());
int
*
temp_bias_data
=
temp_bias
.
mutable_data
<
int
>
();
TransFP32BiasToInt32
(
bias_weight_d
,
temp_bias_data
,
temp_bias
.
data_size
(),
in_scale
,
weight_scale
);
bias_weight_t
->
CopyDataFrom
(
temp_bias
);
}
instruct
.
mutable_op_info
()
->
SetAttr
(
"weight_scale"
,
weight_scale
);
auto
original_selected_kernel
=
std
::
move
(
instruct
.
kernels
().
front
());
auto
updated_op_info
=
*
instruct
.
mutable_op_info
();
instruct
.
ResetOp
(
updated_op_info
,
graph
->
valid_places
());
instruct
.
kernels
().
clear
();
instruct
.
kernels
().
emplace_back
(
std
::
move
(
original_selected_kernel
));
for
(
auto
&
kernel
:
instruct
.
kernels
())
{
LOG
(
INFO
)
<<
"kernel info: "
<<
kernel
->
name
();
instruct
.
op
()
->
AttachKernel
(
kernel
.
get
());
}
}
}
void
TransWeightPass
::
SetValidPlaces
(
const
std
::
vector
<
Place
>&
valid_places
)
{
CHECK
(
!
valid_places
.
empty
());
valid_places_
=
valid_places
;
}
}
// namespace mir
}
// namespace lite
}
// namespace paddle
REGISTER_MIR_PASS
(
trans_weight_pass
,
paddle
::
lite
::
mir
::
TransWeightPass
);
paddle/fluid/lite/core/mir/trans_weigths_pass.h
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <cmath>
#include <memory>
#include <string>
#include <vector>
#include "paddle/fluid/lite/arm/math/saturate.h"
#include "paddle/fluid/lite/core/mir/pass.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
/*
* IoComplementPass complement the necessary instruction to make data
* transferring or transformation between different places.
*/
class
TransWeightPass
:
public
ProgramPass
{
public:
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
std
::
vector
<
float
>
GetWeightScale
(
float
*
in_data
,
int64_t
axis_size
,
int64_t
inner_size
,
float
scale_factor
)
{
std
::
vector
<
float
>
scale_out
(
axis_size
);
auto
calc_abs_max
=
[
&
](
float
*
in
,
size_t
data_size
)
->
float
{
float
max_data
=
0.0
;
for
(
size_t
i
=
0
;
i
<
data_size
;
i
++
)
{
if
(
max_data
<
std
::
abs
(
in
[
i
]))
max_data
=
std
::
abs
(
in
[
i
]);
}
return
max_data
;
};
for
(
int
c
=
0
;
c
<
axis_size
;
c
++
)
{
float
*
part_in
=
in_data
+
c
*
inner_size
;
scale_out
[
c
]
=
calc_abs_max
(
part_in
,
inner_size
)
/
scale_factor
;
}
return
scale_out
;
}
void
FP32ToInt8
(
const
float
*
din
,
int8_t
*
dout
,
const
float
*
scale
,
int
axis_size
,
int64_t
outer_size
,
int64_t
inner_size
)
{
int
loop_size
=
axis_size
*
outer_size
;
for
(
int
i
=
0
;
i
<
loop_size
;
++
i
)
{
float
inv_scale
=
1.
f
/
scale
[
i
%
axis_size
];
for
(
int
j
=
0
;
j
<
inner_size
;
++
j
)
{
dout
[
j
]
=
static_cast
<
int8_t
>
(
std
::
roundf
(
din
[
j
]
*
inv_scale
));
}
dout
+=
inner_size
;
din
+=
inner_size
;
}
}
void
TransFP32BiasToInt32
(
const
float
*
din
,
int
*
dout
,
size_t
data_size
,
float
in_scale
,
std
::
vector
<
float
>
weight_scale
)
{
CHECK
(
data_size
==
weight_scale
.
size
())
<<
"Bias data size should be equal toe the weight scale data size."
;
for
(
size_t
i
=
0
;
i
<
data_size
;
i
++
)
{
dout
[
i
]
=
static_cast
<
int
>
(
std
::
roundf
(
din
[
i
]
/
in_scale
/
weight_scale
[
i
]));
}
}
void
SetValidPlaces
(
const
std
::
vector
<
Place
>&
valid_places
);
const
std
::
vector
<
Place
>&
valid_places
()
const
{
return
valid_places_
;
}
private:
std
::
vector
<
Place
>
valid_places_
;
};
}
// namespace mir
}
// namespace lite
}
// namespace paddle
paddle/fluid/lite/core/optimizer.h
浏览文件 @
b0de9835
...
...
@@ -49,34 +49,37 @@ class Optimizer {
InitTargetTypeTransformPass
();
if
(
passes
.
empty
())
{
RunPasses
(
std
::
vector
<
std
::
string
>
{
{
"lite_quant_dequant_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
RunPasses
(
std
::
vector
<
std
::
string
>
{
{
"lite_quant_dequant_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
// This pass is disabled to force some opencl kernels selected for final
// running, otherwise, they will be fused to ARM fusion kernels, and the OpenCL
// devices will be discarded.
// TODO(Superjomn) Refine the fusion related design to select fusion kernels for
// devices automatically.
#ifndef LITE_WITH_OPENCL
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
#endif
"lite_fc_fuse_pass"
,
//
"identity_scale_eliminate_pass"
,
//
"lite_fc_fuse_pass"
,
//
"identity_scale_eliminate_pass"
,
//
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_OPENCL
"lite_elementwise_add_activation_fuse_pass"
,
//
"lite_elementwise_add_activation_fuse_pass"
,
//
#endif
#endif
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"argument_type_display_pass"
,
//
"type_target_transform_pass"
,
//
"variable_place_inference_pass"
,
//
"argument_type_display_pass"
,
//
"io_copy_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"runtime_context_assign_pass"
,
//
}});
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"argument_type_display_pass"
,
//
"type_target_transform_pass"
,
//
"variable_place_inference_pass"
,
//
"argument_type_display_pass"
,
//
"io_copy_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"precision_cast_transform_pass"
,
//
"argument_type_display_pass"
,
//
"trans_weight_pass"
,
//
"runtime_context_assign_pass"
,
//
"graph_visualze"
}});
}
else
{
RunPasses
(
passes
);
}
...
...
@@ -134,7 +137,7 @@ class Optimizer {
for
(
auto
&
x
:
passes
)
{
LOG
(
INFO
)
<<
"== Running pass "
<<
x
;
auto
*
pass
=
mir
::
PassManager
::
Global
().
LookUp
(
x
);
CHECK
(
pass
);
CHECK
(
pass
)
<<
"Can not find pass: "
<<
x
;
pass
->
Apply
(
graph_
);
}
}
...
...
paddle/fluid/lite/gen_code/CMakeLists.txt
浏览文件 @
b0de9835
...
...
@@ -26,3 +26,5 @@ if (NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
add_dependencies
(
__generated_code__ test_gen_code_lite
)
add_dependencies
(
__generated_code__ extern_lite_download_lite_naive_model_tar_gz
)
endif
()
lite_cc_binary
(
paddle_code_generator SRCS paddle_code_generator.cc DEPS model_parser_lite gen_code_lite
)
paddle/fluid/lite/gen_code/gen_code.cc
浏览文件 @
b0de9835
...
...
@@ -111,6 +111,15 @@ void Module::AddOpDescHelper(const std::string &op_id,
return
std
::
to_string
(
desc
.
GetAttr
<
bool
>
(
name
));
case
AttrType
::
STRING
:
return
"
\"
"
+
desc
.
GetAttr
<
std
::
string
>
(
name
)
+
"
\"
"
;
case
AttrType
::
FLOATS
:
{
auto
vals
=
desc
.
GetAttr
<
std
::
vector
<
float
>>
(
name
);
return
"{"
+
Join
(
vals
,
","
)
+
"}"
;
}
case
AttrType
::
INTS
:
{
auto
vals
=
desc
.
GetAttr
<
std
::
vector
<
int
>>
(
name
);
return
"{"
+
Join
(
vals
,
","
)
+
"}"
;
}
case
AttrType
::
STRINGS
:
{
std
::
vector
<
std
::
string
>
tmp
;
auto
vals
=
desc
.
GetAttr
<
std
::
vector
<
std
::
string
>>
(
name
);
...
...
@@ -137,8 +146,12 @@ void Module::AddOpDescHelper(const std::string &op_id,
return
"bool"
;
case
AttrType
::
STRING
:
return
"std::string"
;
case
AttrType
::
FLOATS
:
return
"std::vector<float>"
;
case
AttrType
::
STRINGS
:
return
"std::vector<std::string>"
;
case
AttrType
::
INTS
:
return
"std::vector<int>"
;
default:
LOG
(
FATAL
)
<<
"Unsupported attribute type: "
<<
static_cast
<
int
>
(
type
);
}
...
...
@@ -160,6 +173,8 @@ void Module::AddOp(const cpp::OpDesc &op) {
auto
op_name
=
OpUniqueName
();
AddOpDescHelper
(
op_name
,
op
);
LOG
(
INFO
)
<<
"add op "
<<
op_name
;
Line
(
string_format
(
"// Create Op: %s"
,
op
.
Type
().
c_str
()));
Line
(
string_format
(
"auto %s = lite::LiteOpRegistry::Global().Create(
\"
%s
\"
);"
,
...
...
paddle/fluid/lite/gen_code/paddle_code_generator.cc
0 → 100644
浏览文件 @
b0de9835
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gflags/gflags.h>
#include "paddle/fluid/lite/gen_code/gen_code.h"
#include "paddle/fluid/lite/model_parser/model_parser.h"
DEFINE_string
(
optimized_model
,
""
,
""
);
DEFINE_string
(
generated_code_file
,
"__generated_code__.cc"
,
""
);
namespace
paddle
{
namespace
lite
{
namespace
gencode
{
void
GenCode
(
const
std
::
string
&
model_dir
,
const
std
::
string
&
out_file
)
{
lite
::
Scope
scope
;
framework
::
proto
::
ProgramDesc
desc
;
LoadModel
(
model_dir
,
&
scope
,
&
desc
);
ProgramCodeGenerator
codegen
(
desc
,
scope
);
std
::
ofstream
file
(
out_file
);
file
<<
codegen
.
GenCode
();
file
.
close
();
}
}
// namespace gencode
}
// namespace lite
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
false
);
paddle
::
lite
::
gencode
::
GenCode
(
FLAGS_optimized_model
,
FLAGS_generated_code_file
);
return
0
;
}
paddle/fluid/lite/kernels/arm/CMakeLists.txt
浏览文件 @
b0de9835
...
...
@@ -31,7 +31,7 @@ lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm)
lite_cc_test
(
test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm
)
lite_cc_test
(
test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm
)
lite_cc_test
(
test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm
)
lite_cc_test
(
test_calib_compute_arm SRCS calib_compute_test.cc DEPS calib_compute_arm
)
#
lite_cc_test(test_calib_compute_arm SRCS calib_compute_test.cc DEPS calib_compute_arm)
lite_cc_test
(
test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm
)
set
(
arm_kernels
...
...
@@ -48,6 +48,7 @@ set(arm_kernels
concat_compute_arm
dropout_compute_arm
transpose_compute_arm
calib_compute_arm
)
set
(
arm_kernels
"
${
arm_kernels
}
"
CACHE INTERNAL
"arm kernels"
)
paddle/fluid/lite/kernels/arm/calib_compute.cc
浏览文件 @
b0de9835
...
...
@@ -23,26 +23,24 @@ namespace lite {
namespace
kernels
{
namespace
arm
{
void
CalibCompute
::
Run
()
{
void
CalibCompute
Fp32ToInt8
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
CalibParam
>
();
std
::
vector
<
float
>
scale
=
{
param
.
in_scale
};
if
(
param
.
in_dtype
==
PRECISION
(
kFloat
)
&&
param
.
out_dtype
==
PRECISION
(
kInt8
))
{
const
auto
*
din
=
param
.
input
->
data
<
float
>
();
auto
*
dout
=
param
.
output
->
mutable_data
<
signed
char
>
();
lite
::
arm
::
math
::
fp32_to_int8
(
din
,
dout
,
scale
.
data
(),
1
,
1
,
param
.
input
->
numel
());
return
;
}
if
(
param
.
in_dtype
==
PRECISION
(
kInt8
)
&&
param
.
out_dtype
==
PRECISION
(
kFloat
))
{
const
auto
*
din
=
param
.
input
->
data
<
signed
char
>
();
auto
*
dout
=
param
.
output
->
mutable_data
<
float
>
();
lite
::
arm
::
math
::
int8_to_fp32
(
din
,
dout
,
scale
.
data
(),
1
,
1
,
param
.
input
->
numel
());
return
;
}
LOG
(
FATAL
)
<<
"Unsupport Dtype."
;
std
::
vector
<
float
>
scale
=
{
param
.
scale
};
const
auto
*
din
=
param
.
input
->
data
<
float
>
();
auto
*
dout
=
param
.
output
->
mutable_data
<
signed
char
>
();
lite
::
arm
::
math
::
fp32_to_int8
(
din
,
dout
,
scale
.
data
(),
1
,
1
,
param
.
input
->
numel
());
return
;
}
void
CalibComputeInt8ToFp32
::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
CalibParam
>
();
const
auto
*
din
=
param
.
input
->
data
<
signed
char
>
();
std
::
vector
<
float
>
scale
=
{
param
.
scale
};
auto
*
dout
=
param
.
output
->
mutable_data
<
float
>
();
lite
::
arm
::
math
::
int8_to_fp32
(
din
,
dout
,
scale
.
data
(),
1
,
1
,
param
.
input
->
numel
());
return
;
}
}
// namespace arm
...
...
@@ -51,7 +49,16 @@ void CalibCompute::Run() {
}
// namespace paddle
REGISTER_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CalibCompute
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
paddle
::
lite
::
kernels
::
arm
::
CalibComputeFp32ToInt8
,
fp32_to_int8
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kFloat
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
CalibComputeInt8ToFp32
,
int8_to_fp32
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kFloat
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/calib_compute.h
浏览文件 @
b0de9835
...
...
@@ -21,13 +21,26 @@ namespace lite {
namespace
kernels
{
namespace
arm
{
class
CalibCompute
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
class
CalibComputeFp32ToInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
CalibParam
;
void
Run
()
override
;
~
CalibCompute
()
override
{};
~
CalibComputeFp32ToInt8
()
override
{};
private:
};
class
CalibComputeInt8ToFp32
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
CalibParam
;
void
Run
()
override
;
~
CalibComputeInt8ToFp32
()
override
{};
private:
};
...
...
paddle/fluid/lite/kernels/arm/calib_compute_test.cc
浏览文件 @
b0de9835
...
...
@@ -146,4 +146,5 @@ TEST(calib_arm, int8_to_fp32) {
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
int8_to_fp32
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
fp32_to_int8
);
paddle/fluid/lite/kernels/arm/conv_compute.cc
浏览文件 @
b0de9835
...
...
@@ -123,13 +123,16 @@ void ConvComputeInt8<Ptype_out>::PrepareForRun() {
// weigth is int8 and bias is int32 so do not need trans
if
(
param
.
groups
==
ic
&&
ic
==
oc
&&
kps_equal
&&
no_dilation
&&
flag_dw
)
{
impl_
=
new
lite
::
arm
::
math
::
DepthwiseConvInt8
<
Ptype_out
>
;
VLOG
(
3
)
<<
"DepthwiseConv Int8"
;
// impl_ = new lite::arm::math::DepthwiseConvInt8<Ptype_out>;
impl_
=
new
lite
::
arm
::
math
::
GemmLikeConvInt8
<
Ptype_out
>
;
VLOG
(
3
)
<<
"Run DepthwiseConv Int8"
;
}
else
if
(
param
.
groups
==
1
&&
kw
==
3
&&
(
sw
==
1
||
sw
==
2
)
&&
kps_equal
&&
no_dilation
)
{
impl_
=
new
lite
::
arm
::
math
::
DirectConvInt8
<
Ptype_out
>
;
VLOG
(
3
)
<<
"Run DirectConv Int8"
;
impl_
=
new
lite
::
arm
::
math
::
GemmLikeConvInt8
<
Ptype_out
>
;
// impl_ = new lite::arm::math::DirectConvInt8<Ptype_out>;
}
else
{
VLOG
(
3
)
<<
"GemmLikeConvInt8"
;
VLOG
(
3
)
<<
"
Run
GemmLikeConvInt8"
;
impl_
=
new
lite
::
arm
::
math
::
GemmLikeConvInt8
<
Ptype_out
>
;
}
...
...
@@ -189,3 +192,25 @@ REGISTER_LITE_KERNEL(
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kFloat
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
<
PRECISION
(
kInt8
)
>
,
int8_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
depthwise_conv2d
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
ConvComputeInt8
<
PRECISION
(
kFloat
)
>
,
fp32_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kFloat
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/fc_compute.cc
浏览文件 @
b0de9835
...
...
@@ -14,9 +14,13 @@
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <vector>
#include "paddle/fluid/lite/api/paddle_place.h"
#include "paddle/fluid/lite/arm/math/funcs.h"
#include "paddle/fluid/lite/arm/math/gemm_prepacked_int8.h"
#include "paddle/fluid/lite/arm/math/gemv_arm_int8.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
...
...
@@ -71,8 +75,8 @@ void FcCompute::Run() {
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
if
(
m_
>
1
)
{
float
*
packed_in
=
static_cast
<
float
*>
(
ctx
.
workspace_data
<
float
>
())
+
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
float
*
packed_in
=
ctx
.
workspace_data
<
float
>
()
+
ctx
.
l2_cache_size
()
/
sizeof
(
float
);
lite
::
arm
::
math
::
prepackA
(
packed_in
,
i_data
,
k_
,
0
,
m_
,
0
,
k_
,
false
,
&
ctx
);
lite
::
arm
::
math
::
sgemm_prepack
(
packed_in
,
w_data
,
b_data
,
o_data
,
m_
,
n_
,
k_
,
false
,
false
,
false
,
&
ctx
);
...
...
@@ -89,6 +93,97 @@ void FcCompute::Run() {
}
}
template
<
PrecisionType
Ptype_out
>
void
FcComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
auto
x_dims
=
param
.
input
->
dims
();
auto
w_dims
=
param
.
w
->
dims
();
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
if
(
!
tmp_int32_out_
)
{
tmp_int32_out_
=
new
Tensor
;
tmp_int32_out_
->
Resize
(
param
.
output
->
dims
());
}
CHECK_GE
(
x_dims
.
size
(),
2UL
);
CHECK_EQ
(
w_dims
.
size
(),
2UL
);
CHECK_EQ
(
param
.
output
->
dims
().
size
(),
2UL
);
this
->
m_
=
x_dims
.
Slice
(
0
,
param
.
in_num_col_dims
).
production
();
this
->
k_
=
x_dims
.
Slice
(
param
.
in_num_col_dims
,
x_dims
.
size
()).
production
();
this
->
n_
=
w_dims
[
1
];
CHECK_EQ
(
k_
,
static_cast
<
int
>
(
w_dims
[
0
]));
if
(
this
->
m_
==
1
)
{
if
(
!
this
->
transed_weight_
)
{
this
->
transed_weight_
=
new
Tensor
;
}
this
->
transed_weight_
->
Resize
({
this
->
n_
,
this
->
k_
});
const
auto
*
w_data
=
param
.
w
->
template
data
<
int8_t
>();
auto
*
t_data
=
this
->
transed_weight_
->
template
mutable_data
<
int8_t
>();
int
i
=
0
;
for
(
int
nn
=
0
;
nn
<
this
->
n_
;
++
nn
)
{
for
(
int
kk
=
0
;
kk
<
this
->
k_
;
++
kk
)
{
t_data
[
i
++
]
=
w_data
[
kk
*
this
->
n_
+
nn
];
}
}
}
if
(
this
->
m_
>
1
)
{
int
hblock
=
lite
::
arm
::
math
::
get_hblock
(
ctx
.
arch
());
int
m_round
=
hblock
*
((
this
->
m_
+
hblock
-
1
)
/
hblock
);
ctx
.
ExtendWorkspace
(
DDimLite
(
std
::
vector
<
int64_t
>
({
m_round
*
this
->
k_
})));
}
}
template
<
PrecisionType
Ptype_out
>
void
FcComputeInt8
<
Ptype_out
>::
Run
()
{
auto
&
param
=
this
->
Param
<
operators
::
FcParam
>
();
const
auto
*
i_data
=
param
.
input
->
template
data
<
int8_t
>();
const
auto
*
w_data
=
param
.
w
->
template
data
<
int8_t
>();
const
auto
*
b_data
=
param
.
bias
?
param
.
bias
->
template
data
<
int
>()
:
nullptr
;
int
*
o_data
=
nullptr
;
auto
&
ctx
=
this
->
ctx_
->
template
As
<
ARMContext
>();
o_data
=
this
->
tmp_int32_out_
->
template
mutable_data
<
int
>();
if
(
m_
>
1
)
{
int8_t
*
packed_in
=
static_cast
<
int8_t
*>
(
ctx
.
template
workspace_data
<
int8_t
>())
+
ctx
.
l2_cache_size
()
/
sizeof
(
int8_t
);
lite
::
arm
::
math
::
prepackA_int8
(
packed_in
,
i_data
,
k_
,
0
,
m_
,
0
,
k_
,
false
);
lite
::
arm
::
math
::
gemm_prepack_int8
(
packed_in
,
w_data
,
b_data
,
o_data
,
m_
,
n_
,
k_
,
false
,
false
,
false
,
nullptr
,
&
ctx
);
if
(
param
.
bias
)
{
CHECK_EQ
(
param
.
bias
->
numel
(),
n_
);
lite
::
arm
::
math
::
fill_bias_fc
(
o_data
,
b_data
,
m_
,
n_
);
}
}
else
{
CHECK
(
transed_weight_
);
const
auto
*
t_data
=
transed_weight_
->
template
data
<
int8_t
>();
lite
::
arm
::
math
::
gemv_int8
(
t_data
,
i_data
,
o_data
,
false
,
n_
,
k_
,
nullptr
,
b_data
!=
nullptr
,
b_data
,
false
);
}
float
i_scale
=
param
.
input_scale
;
std
::
vector
<
float
>
weight_scale
=
param
.
weight_scale
;
if
(
Ptype_out
==
PRECISION
(
kInt8
))
{
float
o_scale
=
param
.
output_scale
;
param
.
output
->
template
mutable_data
<
int8_t
>();
lite
::
arm
::
math
::
trans_tensor_dtype
<
PRECISION
(
kInt32
),
PRECISION
(
kInt8
)
>
(
tmp_int32_out_
,
param
.
output
,
i_scale
,
o_scale
,
weight_scale
);
}
else
if
(
Ptype_out
==
PRECISION
(
kFloat
))
{
param
.
output
->
template
mutable_data
<
float
>();
lite
::
arm
::
math
::
trans_tensor_dtype
<
PRECISION
(
kInt32
),
PRECISION
(
kFloat
)
>
(
tmp_int32_out_
,
param
.
output
,
i_scale
,
1.
f
,
weight_scale
);
}
else
{
LOG
(
ERROR
)
<<
"unsupported precision type!!"
;
}
}
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
@@ -101,3 +196,21 @@ REGISTER_LITE_KERNEL(fc, kARM, kFloat, kNCHW,
.
BindInput
(
"W"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fc
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
FcComputeInt8
<
PRECISION
(
kInt8
)
>
,
int8out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"W"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
fc
,
kARM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
arm
::
FcComputeInt8
<
PRECISION
(
kFloat
)
>
,
fp32out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt32
))})
.
BindInput
(
"W"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kARM
),
PRECISION
(
kFloat
))})
.
Finalize
();
paddle/fluid/lite/kernels/arm/fc_compute.h
浏览文件 @
b0de9835
...
...
@@ -13,6 +13,8 @@
// limitations under the License.
#pragma once
#include <stdint.h>
#include "paddle/fluid/lite/arm/math/type_trans.h"
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/fc_op.h"
...
...
@@ -40,6 +42,27 @@ class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
int
m_
,
n_
,
k_
;
};
template
<
PrecisionType
Ptype_out
>
class
FcComputeInt8
:
public
KernelLite
<
TARGET
(
kARM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
FcParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
~
FcComputeInt8
()
override
{
if
(
transed_weight_
)
{
delete
transed_weight_
;
}
};
private:
lite
::
Tensor
*
transed_weight_
{
nullptr
};
Tensor
*
tmp_int32_out_
{
nullptr
};
int
m_
,
n_
,
k_
;
};
}
// namespace arm
}
// namespace kernels
}
// namespace lite
...
...
paddle/fluid/lite/operators/calib_op.cc
浏览文件 @
b0de9835
...
...
@@ -37,12 +37,8 @@ bool CalibOpLite::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) {
param_
.
input
=
const_cast
<
lite
::
Tensor
*>
(
&
(
x_var
->
Get
<
lite
::
Tensor
>
()));
param_
.
output
=
output_var
->
GetMutable
<
lite
::
Tensor
>
();
std
::
vector
<
std
::
string
>
input_arg_names
=
opdesc
.
InputArgumentNames
();
param_
.
in_dtype
=
static_cast
<
lite
::
PrecisionType
>
(
opdesc
.
GetAttr
<
int
>
(
"in_dtype"
));
param_
.
out_dtype
=
static_cast
<
lite
::
PrecisionType
>
(
opdesc
.
GetAttr
<
int
>
(
"out_dtype"
));
if
(
opdesc
.
HasAttr
(
"in_scale"
))
{
param_
.
in_scale
=
opdesc
.
GetAttr
<
float
>
(
"in_scale"
);
if
(
opdesc
.
HasAttr
(
"scale"
))
{
param_
.
scale
=
opdesc
.
GetAttr
<
float
>
(
"scale"
);
}
CHECK
(
param_
.
input
)
<<
"Input(X) of CalibOp should not be null."
;
CHECK
(
param_
.
output
)
<<
"Output(Out) of CalibOp should not be null."
;
...
...
paddle/fluid/lite/operators/calib_op_test.cc
浏览文件 @
b0de9835
...
...
@@ -11,7 +11,6 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/operators/calib_op.h"
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/op_registry.h"
...
...
@@ -42,9 +41,7 @@ TEST(calib_op_lite, TestARM) {
desc
.
SetType
(
"calib"
);
desc
.
SetInput
(
"Input"
,
{
"Input"
});
desc
.
SetOutput
(
"Out"
,
{
"output"
});
desc
.
SetAttr
(
"in_dtype"
,
static_cast
<
int
>
(
PRECISION
(
kInt8
)));
desc
.
SetAttr
(
"out_dtype"
,
static_cast
<
int
>
(
PRECISION
(
kFloat
)));
desc
.
SetAttr
(
"in_scale"
,
10.0
f
);
desc
.
SetAttr
(
"scale"
,
10.0
f
);
CalibOpLite
calib
(
"calib"
);
...
...
@@ -60,5 +57,6 @@ TEST(calib_op_lite, TestARM) {
}
// namespace paddle
#ifdef LITE_WITH_ARM
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
def
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
fp32_to_int8
);
USE_LITE_KERNEL
(
calib
,
kARM
,
kInt8
,
kNCHW
,
int8_to_fp32
);
#endif
paddle/fluid/lite/operators/conv_op.h
浏览文件 @
b0de9835
...
...
@@ -76,6 +76,17 @@ class ConvOpLite : public OpLite {
}
}
param_
.
fuse_relu
=
op_desc
.
GetAttr
<
bool
>
(
"fuse_relu"
);
// For Int8
if
(
op_desc
.
HasAttr
(
"enable_int8"
))
{
param_
.
enable_int8
=
op_desc
.
GetAttr
<
bool
>
(
"enable_int8"
);
if
(
op_desc
.
HasAttr
(
"input_scale"
))
param_
.
input_scale
=
op_desc
.
GetAttr
<
float
>
(
"input_scale"
);
if
(
op_desc
.
HasAttr
(
"weight_scale"
))
param_
.
weight_scale
=
op_desc
.
GetAttr
<
std
::
vector
<
float
>>
(
"weight_scale"
);
if
(
op_desc
.
HasAttr
(
"output_scale"
))
param_
.
output_scale
=
op_desc
.
GetAttr
<
float
>
(
"output_scale"
);
}
return
true
;
}
...
...
paddle/fluid/lite/operators/fc_op.h
浏览文件 @
b0de9835
...
...
@@ -59,6 +59,17 @@ class FcOpLite : public OpLite {
param_
.
output
=
scope
->
FindVar
(
out
)
->
GetMutable
<
lite
::
Tensor
>
();
param_
.
in_num_col_dims
=
op_desc
.
GetAttr
<
int
>
(
"in_num_col_dims"
);
// For Int8
if
(
op_desc
.
HasAttr
(
"enable_int8"
))
{
param_
.
enable_int8
=
op_desc
.
GetAttr
<
bool
>
(
"enable_int8"
);
if
(
op_desc
.
HasAttr
(
"input_scale"
))
param_
.
input_scale
=
op_desc
.
GetAttr
<
float
>
(
"input_scale"
);
if
(
op_desc
.
HasAttr
(
"weight_scale"
))
param_
.
weight_scale
=
op_desc
.
GetAttr
<
std
::
vector
<
float
>>
(
"weight_scale"
);
if
(
op_desc
.
HasAttr
(
"output_scale"
))
param_
.
output_scale
=
op_desc
.
GetAttr
<
float
>
(
"output_scale"
);
}
return
true
;
}
...
...
paddle/fluid/lite/operators/op_params.h
浏览文件 @
b0de9835
...
...
@@ -19,11 +19,6 @@
#include "paddle/fluid/lite/core/framework.pb.h"
#include "paddle/fluid/lite/utils/all.h"
#define WITH_INT8_CONFIG \
bool enable_int8; \
float input_scale; \
std::vector<float> weight_scale{}; \
float output_scale;
/*
* This file contains all the argument parameter data structure for operators.
*/
...
...
@@ -33,6 +28,11 @@ namespace lite {
namespace
operators
{
using
param_t
=
Any
;
#define WITH_INT8_CONFIG \
bool enable_int8{false}; \
float input_scale{1.0}; \
std::vector<float> weight_scale{}; \
float output_scale{1.0};
/// ----------------------- Functional operators ------------------------------
struct
FeedParam
{
...
...
@@ -56,9 +56,7 @@ struct IoCopyParam {
struct
CalibParam
{
const
lite
::
Tensor
*
input
{};
lite
::
Tensor
*
output
{};
float
in_scale
;
PrecisionType
in_dtype
;
PrecisionType
out_dtype
;
float
scale
;
};
/// -------------------------- NN operators ------------------------------------
...
...
@@ -71,6 +69,8 @@ struct FcParam {
lite
::
DDim
in_mat_dims
;
int
in_num_col_dims
{
1
};
bool
weight_transposed
{
false
};
// for int8
WITH_INT8_CONFIG
};
// For Mul Op
...
...
@@ -81,6 +81,8 @@ struct MulParam {
int
x_num_col_dims
{
1
};
int
y_num_col_dims
{
1
};
// for int8
WITH_INT8_CONFIG
};
struct
MulGradParam
{
...
...
@@ -152,6 +154,7 @@ struct ConvParam {
float
scale_weights
{
1.0
f
};
// only used with mkl-dnn int8
bool
force_fp32_output
{
false
};
// only used in mkl-dnn int8
std
::
string
data_format
{
"Anylayout"
};
// for int8
WITH_INT8_CONFIG
};
...
...
paddle/fluid/lite/tools/build.sh
浏览文件 @
b0de9835
...
...
@@ -4,6 +4,7 @@ set -ex
TESTS_FILE
=
"./lite_tests.txt"
LIBS_FILE
=
"./lite_libs.txt"
readonly
ADB_WORK_DIR
=
"/data/local/tmp"
readonly
common_flags
=
"-DWITH_LITE=ON -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=OFF -DWITH_PYTHON=OFF -DWITH_TESTING=ON -DLITE_WITH_ARM=OFF"
NUM_CORES_FOR_COMPILE
=
8
...
...
@@ -183,7 +184,36 @@ function test_arm_model {
adb
-s
emulator-
${
port
}
shell
chmod
+x
"
${
adb_work_dir
}
/
${
test_name
}
"
local
adb_model_path
=
"
${
adb_work_dir
}
/
`
basename
${
model_dir
}
`
"
adb
-s
emulator-
${
port
}
shell
"
${
adb_work_dir
}
/
${
test_name
}
--model_dir=
$adb_model_path
"
}
function
_test_model_optimize_tool
{
local
port
=
$1
local
remote_model_path
=
$ADB_WORK_DIR
/lite_naive_model
local
remote_test
=
$ADB_WORK_DIR
/model_optimize_tool
local
adb
=
"adb -s emulator-
${
port
}
"
make model_optimize_tool
-j
$NUM_CORES_FOR_COMPILE
local
test_path
=
$(
find
.
-name
model_optimize_tool
)
local
model_path
=
$(
find
.
-name
lite_naive_model
)
$adb
push
${
test_path
}
${
ADB_WORK_DIR
}
$adb
shell
mkdir
-p
$remote_model_path
$adb
push
$model_path
/
*
$remote_model_path
$adb
shell
$remote_test
--model_dir
$remote_model_path
--optimize_out
${
remote_model_path
}
.opt
\
--valid_targets
"arm"
}
function
_test_paddle_code_generator
{
local
port
=
$1
local
test_name
=
paddle_code_generator
local
remote_test
=
$ADB_WORK_DIR
/
$test_name
local
remote_model
=
$ADB_WORK_DIR
/lite_naive_model.opt
local
adb
=
"adb -s emulator-
${
port
}
"
make paddle_code_generator
-j
$NUM_CORES_FOR_COMPILE
local
test_path
=
$(
find
.
-name
$test_name
)
$adb
push
$test_path
$remote_test
$adb
shell
$remote_test
--optimized_model
$remote_model
--generated_code_file
$ADB_WORK_DIR
/gen_code.cc
}
function
cmake_arm
{
...
...
@@ -273,6 +303,9 @@ function test_arm {
# test finally
test_arm_api
$port
_test_model_optimize_tool
$port
_test_paddle_code_generator
$port
}
function
prepare_emulator
{
...
...
paddle/fluid/lite/utils/string.h
浏览文件 @
b0de9835
...
...
@@ -52,8 +52,8 @@ static std::string to_string_with_precision(const T& v, const int n = 6) {
return
ss
.
str
();
}
static
std
::
string
Join
(
const
std
::
vector
<
std
::
string
>&
vec
,
const
std
::
string
&
delim
)
{
template
<
typename
T
>
std
::
string
Join
(
const
std
::
vector
<
T
>&
vec
,
const
std
::
string
&
delim
)
{
if
(
vec
.
empty
())
return
""
;
std
::
stringstream
ss
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录