Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
662e4d7c
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
662e4d7c
编写于
12月 06, 2019
作者:
C
cen.li
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
* change to bridge way
* fix code_style test=develop
上级
87271863
变更
37
显示空白变更内容
内联
并排
Showing
37 changed file
with
710 addition
and
1011 deletion
+710
-1011
lite/api/test_resnet50_lite_bm.cc
lite/api/test_resnet50_lite_bm.cc
+1
-3
lite/backends/bm/CMakeLists.txt
lite/backends/bm/CMakeLists.txt
+1
-1
lite/backends/bm/bm_context.cc
lite/backends/bm/bm_context.cc
+28
-0
lite/backends/bm/target_wrapper.cc
lite/backends/bm/target_wrapper.cc
+8
-2
lite/core/context.h
lite/core/context.h
+8
-3
lite/core/memory.cc
lite/core/memory.cc
+16
-0
lite/core/memory.h
lite/core/memory.h
+9
-0
lite/core/mir/static_kernel_pick_pass.cc
lite/core/mir/static_kernel_pick_pass.cc
+1
-1
lite/core/mir/subgraph/generate_bm_program_pass.cc
lite/core/mir/subgraph/generate_bm_program_pass.cc
+183
-0
lite/core/mir/subgraph/generate_bm_program_pass.h
lite/core/mir/subgraph/generate_bm_program_pass.h
+60
-0
lite/kernels/bm/CMakeLists.txt
lite/kernels/bm/CMakeLists.txt
+2
-12
lite/kernels/bm/batch_norm_compute.cc
lite/kernels/bm/batch_norm_compute.cc
+0
-75
lite/kernels/bm/bridges/CMakeLists.txt
lite/kernels/bm/bridges/CMakeLists.txt
+23
-0
lite/kernels/bm/bridges/act_op.cc
lite/kernels/bm/bridges/act_op.cc
+11
-21
lite/kernels/bm/bridges/batch_norm_op.cc
lite/kernels/bm/bridges/batch_norm_op.cc
+11
-25
lite/kernels/bm/bridges/conv_op.cc
lite/kernels/bm/bridges/conv_op.cc
+36
-0
lite/kernels/bm/bridges/elementwise_ops.cc
lite/kernels/bm/bridges/elementwise_ops.cc
+36
-0
lite/kernels/bm/bridges/mul_op.cc
lite/kernels/bm/bridges/mul_op.cc
+36
-0
lite/kernels/bm/bridges/pool_op.cc
lite/kernels/bm/bridges/pool_op.cc
+36
-0
lite/kernels/bm/bridges/registry.cc
lite/kernels/bm/bridges/registry.cc
+14
-24
lite/kernels/bm/bridges/registry.h
lite/kernels/bm/bridges/registry.h
+85
-0
lite/kernels/bm/bridges/scale_op.cc
lite/kernels/bm/bridges/scale_op.cc
+36
-0
lite/kernels/bm/bridges/softmax_op.cc
lite/kernels/bm/bridges/softmax_op.cc
+36
-0
lite/kernels/bm/calib_compute.cc
lite/kernels/bm/calib_compute.cc
+0
-76
lite/kernels/bm/conv_compute.cc
lite/kernels/bm/conv_compute.cc
+0
-71
lite/kernels/bm/elementwise_compute.cc
lite/kernels/bm/elementwise_compute.cc
+0
-56
lite/kernels/bm/graph_compute.cc
lite/kernels/bm/graph_compute.cc
+22
-24
lite/kernels/bm/graph_compute.h
lite/kernels/bm/graph_compute.h
+11
-12
lite/kernels/bm/io_copy_compute.cc
lite/kernels/bm/io_copy_compute.cc
+0
-167
lite/kernels/bm/mul_compute.cc
lite/kernels/bm/mul_compute.cc
+0
-61
lite/kernels/bm/pool_compute.cc
lite/kernels/bm/pool_compute.cc
+0
-59
lite/kernels/bm/pool_compute.h
lite/kernels/bm/pool_compute.h
+0
-50
lite/kernels/bm/relu_compute.h
lite/kernels/bm/relu_compute.h
+0
-50
lite/kernels/bm/scale_compute.cc
lite/kernels/bm/scale_compute.cc
+0
-59
lite/kernels/bm/scale_compute.h
lite/kernels/bm/scale_compute.h
+0
-50
lite/kernels/bm/softmax_compute.cc
lite/kernels/bm/softmax_compute.cc
+0
-59
lite/kernels/bm/softmax_compute.h
lite/kernels/bm/softmax_compute.h
+0
-50
未找到文件。
lite/api/test_resnet50_lite_bm.cc
浏览文件 @
662e4d7c
...
@@ -35,7 +35,7 @@ void TestModel(const std::vector<Place>& valid_places) {
...
@@ -35,7 +35,7 @@ void TestModel(const std::vector<Place>& valid_places) {
//DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
//DeviceInfo::Global().SetRunMode(lite_api::LITE_POWER_NO_BIND, FLAGS_threads);
lite
::
Predictor
predictor
;
lite
::
Predictor
predictor
;
predictor
.
Build
(
FLAGS_model_dir
,
""
,
""
,
valid_places
);
predictor
.
Build
(
FLAGS_model_dir
,
""
,
""
,
valid_places
);
#if 0
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
auto
*
data
=
input_tensor
->
mutable_data
<
float
>
();
...
@@ -53,7 +53,6 @@ void TestModel(const std::vector<Place>& valid_places) {
...
@@ -53,7 +53,6 @@ void TestModel(const std::vector<Place>& valid_places) {
fs
>>
data
[
i
];
fs
>>
data
[
i
];
}
}
}
}
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
for
(
int
i
=
0
;
i
<
FLAGS_warmup
;
++
i
)
{
predictor
.
Run
();
predictor
.
Run
();
}
}
...
@@ -102,7 +101,6 @@ void TestModel(const std::vector<Place>& valid_places) {
...
@@ -102,7 +101,6 @@ void TestModel(const std::vector<Place>& valid_places) {
}
}
}
}
LOG
(
INFO
)
<<
"max val:"
<<
max_val
<<
", max_val_arg:"
<<
max_val_arg
;
LOG
(
INFO
)
<<
"max val:"
<<
max_val
<<
", max_val_arg:"
<<
max_val_arg
;
#endif
}
}
TEST
(
ResNet50
,
test_bm
)
{
TEST
(
ResNet50
,
test_bm
)
{
...
...
lite/backends/bm/CMakeLists.txt
浏览文件 @
662e4d7c
...
@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM)
...
@@ -2,4 +2,4 @@ if (NOT LITE_WITH_BM)
return
()
return
()
endif
()
endif
()
lite_cc_library
(
target_wrapper_bm SRCS target_wrapper.cc DEPS
${
bm_runtime_libs
}
)
lite_cc_library
(
target_wrapper_bm SRCS target_wrapper.cc
bm_context.cc
DEPS
${
bm_runtime_libs
}
)
lite/backends/bm/bm_context.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/core/context.h"
#include "bmcompiler_if.h"
namespace
paddle
{
namespace
lite
{
static
const
char
*
CHIP_NAME
=
"BM1684"
;
void
BMContext
::
InitOnce
()
{
compiler_handle_
=
create_bmcompiler
(
CHIP_NAME
);
CHECK
(
NULL
!=
compiler_handle_
);
}
}
// namespace lite
}
// namespace paddle
lite/backends/bm/target_wrapper.cc
浏览文件 @
662e4d7c
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
#include <map>
#include <map>
#include "lite/backends/bm/target_wrapper.h"
#include "lite/backends/bm/target_wrapper.h"
#include "bmlib_runtime.h"
#include "bmlib_runtime.h"
#include "bmcompiler_if.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -32,7 +33,8 @@ void TargetWrapperBM::SetDevice(int id) {
...
@@ -32,7 +33,8 @@ void TargetWrapperBM::SetDevice(int id) {
if
(
g_bm_handles
.
find
(
id
)
==
g_bm_handles
.
end
())
{
if
(
g_bm_handles
.
find
(
id
)
==
g_bm_handles
.
end
())
{
bm_handle_t
bm_handle
;
bm_handle_t
bm_handle
;
bm_dev_request
(
&
bm_handle
,
id
);
bm_status_t
ret
=
bm_dev_request
(
&
bm_handle
,
id
);
CHECK_EQ
(
ret
,
BM_SUCCESS
)
<<
"Failed with error code: "
<<
(
int
)
ret
;
g_bm_handles
.
insert
(
std
::
pair
<
int
,
bm_handle_t
>
(
id
,
bm_handle
));
g_bm_handles
.
insert
(
std
::
pair
<
int
,
bm_handle_t
>
(
id
,
bm_handle
));
}
}
return
;
return
;
...
@@ -41,6 +43,10 @@ void TargetWrapperBM::SetDevice(int id) {
...
@@ -41,6 +43,10 @@ void TargetWrapperBM::SetDevice(int id) {
void
*
TargetWrapperBM
::
Malloc
(
size_t
size
)
{
void
*
TargetWrapperBM
::
Malloc
(
size_t
size
)
{
void
*
ptr
{};
void
*
ptr
{};
if
(
g_bm_handles
.
find
(
g_current_device_id
)
==
g_bm_handles
.
end
())
{
SetDevice
(
g_current_device_id
);
}
bm_handle_t
bm_handle
=
g_bm_handles
.
at
(
g_current_device_id
);
bm_handle_t
bm_handle
=
g_bm_handles
.
at
(
g_current_device_id
);
bm_device_mem_t
*
p_mem
=
(
bm_device_mem_t
*
)
malloc
(
sizeof
(
bm_device_mem_t
));
bm_device_mem_t
*
p_mem
=
(
bm_device_mem_t
*
)
malloc
(
sizeof
(
bm_device_mem_t
));
bm_malloc_device_byte
(
bm_handle
,
p_mem
,
size
);
bm_malloc_device_byte
(
bm_handle
,
p_mem
,
size
);
...
...
lite/core/context.h
浏览文件 @
662e4d7c
...
@@ -96,11 +96,17 @@ class Context<TargetType::kBM> {
...
@@ -96,11 +96,17 @@ class Context<TargetType::kBM> {
Context
()
{}
Context
()
{}
explicit
Context
(
const
BMContext
&
ctx
);
explicit
Context
(
const
BMContext
&
ctx
);
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
;
void
CopySharedTo
(
BMContext
*
ctx
)
{}
void
CopySharedTo
(
BMContext
*
ctx
)
{}
std
::
string
name
()
const
{
return
"BMContext"
;
}
std
::
string
name
()
const
{
return
"BMContext"
;
}
};
void
*
compiler_handle
()
{
return
compiler_handle_
;
}
private:
void
*
compiler_handle_
{
nullptr
};
};
#endif
#endif
#ifdef LITE_WITH_XPU
#ifdef LITE_WITH_XPU
...
@@ -340,7 +346,6 @@ class ContextScheduler {
...
@@ -340,7 +346,6 @@ class ContextScheduler {
std
::
unique_ptr
<
KernelContext
>
NewContext
(
TargetType
target
)
{
std
::
unique_ptr
<
KernelContext
>
NewContext
(
TargetType
target
)
{
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
LOG
(
INFO
)
<<
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaa "
<<
int
(
target
)
<<
" "
<<
int
(
TARGET
(
kBM
));
switch
(
target
)
{
switch
(
target
)
{
case
TARGET
(
kHost
):
case
TARGET
(
kHost
):
kernel_contexts_
[
TargetType
::
kHost
].
As
<
HostContext
>
().
CopySharedTo
(
kernel_contexts_
[
TargetType
::
kHost
].
As
<
HostContext
>
().
CopySharedTo
(
...
...
lite/core/memory.cc
浏览文件 @
662e4d7c
...
@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) {
...
@@ -40,6 +40,11 @@ void* TargetMalloc(TargetType target, size_t size) {
data
=
TargetWrapper
<
TARGET
(
kFPGA
)
>::
Malloc
(
size
);
data
=
TargetWrapper
<
TARGET
(
kFPGA
)
>::
Malloc
(
size
);
break
;
break
;
#endif // LITE_WITH_OPENCL
#endif // LITE_WITH_OPENCL
#ifdef LITE_WITH_BM
case
TargetType
::
kBM
:
data
=
TargetWrapper
<
TARGET
(
kBM
)
>::
Malloc
(
size
);
break
;
#endif
default:
default:
LOG
(
FATAL
)
<<
"Unknown supported target "
<<
TargetToStr
(
target
);
LOG
(
FATAL
)
<<
"Unknown supported target "
<<
TargetToStr
(
target
);
}
}
...
@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) {
...
@@ -69,6 +74,11 @@ void TargetFree(TargetType target, void* data) {
TargetWrapper
<
TARGET
(
kFPGA
)
>::
Free
(
data
);
TargetWrapper
<
TARGET
(
kFPGA
)
>::
Free
(
data
);
break
;
break
;
#endif // LITE_WITH_CUDA
#endif // LITE_WITH_CUDA
#ifdef LITE_WITH_BM
case
TargetType
::
kBM
:
TargetWrapper
<
TARGET
(
kBM
)
>::
Free
(
data
);
break
;
#endif
default:
default:
LOG
(
FATAL
)
<<
"Unknown type"
;
LOG
(
FATAL
)
<<
"Unknown type"
;
}
}
...
@@ -95,6 +105,12 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
...
@@ -95,6 +105,12 @@ void TargetCopy(TargetType target, void* dst, const void* src, size_t size) {
dst
,
src
,
size
,
IoDirection
::
DtoD
);
dst
,
src
,
size
,
IoDirection
::
DtoD
);
break
;
break
;
#endif
#endif
#ifdef LITE_WITH_BM
case
TargetType
::
kBM
:
TargetWrapper
<
TARGET
(
kBM
)
>::
MemcpySync
(
dst
,
src
,
size
,
IoDirection
::
DtoD
);
break
;
#endif
#ifdef LITE_WITH_OPENCL
#ifdef LITE_WITH_OPENCL
case
TargetType
::
kOpenCL
:
case
TargetType
::
kOpenCL
:
TargetWrapperCL
::
MemcpySync
(
dst
,
src
,
size
,
IoDirection
::
DtoD
);
TargetWrapperCL
::
MemcpySync
(
dst
,
src
,
size
,
IoDirection
::
DtoD
);
...
...
lite/core/memory.h
浏览文件 @
662e4d7c
...
@@ -25,6 +25,10 @@
...
@@ -25,6 +25,10 @@
#include "lite/backends/cuda/target_wrapper.h"
#include "lite/backends/cuda/target_wrapper.h"
#endif // LITE_WITH_CUDA
#endif // LITE_WITH_CUDA
#ifdef LITE_WITH_BM
#include "lite/backends/bm/target_wrapper.h"
#endif // LITE_WITH_BM
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
...
@@ -71,6 +75,11 @@ void CopySync(void* dst, const void* src, size_t size, IoDirection dir) {
case
TARGET
(
kFPGA
):
case
TARGET
(
kFPGA
):
TargetWrapper
<
TARGET
(
kFPGA
)
>::
MemcpySync
(
dst
,
src
,
size
,
dir
);
TargetWrapper
<
TARGET
(
kFPGA
)
>::
MemcpySync
(
dst
,
src
,
size
,
dir
);
break
;
break
;
#endif
#ifdef LITE_WITH_BM
case
TARGET
(
kBM
):
TargetWrapper
<
TARGET
(
kBM
)
>::
MemcpySync
(
dst
,
src
,
size
,
dir
);
break
;
#endif
#endif
}
}
}
}
...
...
lite/core/mir/static_kernel_pick_pass.cc
浏览文件 @
662e4d7c
...
@@ -33,6 +33,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
...
@@ -33,6 +33,7 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
kernel_pick_factors_
.
ConsiderTarget
();
kernel_pick_factors_
.
ConsiderTarget
();
kernel_pick_factors_
.
ConsiderPrecision
();
kernel_pick_factors_
.
ConsiderPrecision
();
kernel_pick_factors_
.
ConsiderDataLayout
();
kernel_pick_factors_
.
ConsiderDataLayout
();
CHECK
(
kernel_pick_factors_
.
any_factor_considered
())
CHECK
(
kernel_pick_factors_
.
any_factor_considered
())
<<
"kernel_pick_factors should be specified first"
;
<<
"kernel_pick_factors should be specified first"
;
CHECK
(
graph
)
<<
"graph not valid"
;
CHECK
(
graph
)
<<
"graph not valid"
;
...
@@ -114,7 +115,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
...
@@ -114,7 +115,6 @@ void StaticKernelPickPass::Apply(const std::unique_ptr<SSAGraph>& graph) {
bool
all_output_type_match
=
true
;
bool
all_output_type_match
=
true
;
auto
expect_output_type
=
auto
expect_output_type
=
out_type_int8
?
PRECISION
(
kInt8
)
:
PRECISION
(
kFloat
);
out_type_int8
?
PRECISION
(
kInt8
)
:
PRECISION
(
kFloat
);
for
(
auto
&
arg_name
:
output_arguments
)
{
for
(
auto
&
arg_name
:
output_arguments
)
{
const
Type
*
out_arg_ty
=
const
Type
*
out_arg_ty
=
candidate
.
second
->
GetOutputDeclType
(
arg_name
);
candidate
.
second
->
GetOutputDeclType
(
arg_name
);
...
...
lite/core/mir/subgraph/generate_bm_program_pass.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/core/mir/subgraph/generate_bm_program_pass.h"
#include <memory>
#include <unordered_map>
#include <unordered_set>
#include <utility>
#include <vector>
#include "lite/core/mir/graph_visualize_pass.h"
#include "lite/core/mir/pass_registry.h"
#include "lite/core/mir/pattern_matcher.h"
namespace
paddle
{
namespace
lite
{
namespace
mir
{
namespace
subgraph
{
std
::
shared_ptr
<
ge
::
Operator
>
GenerateBMProgramPass
::
CvtVarNode
(
lite
::
mir
::
Node
*
var_node
,
const
Scope
*
scope
)
{
CHECK
(
var_node
->
IsArg
());
const
auto
&
arg
=
var_node
->
AsArg
();
VLOG
(
4
)
<<
"Convert var node "
<<
arg
.
name
;
auto
*
var
=
scope
->
FindVar
(
arg
.
name
);
CHECK
(
var
);
auto
*
tensor
=
var
->
GetMutable
<
lite
::
Tensor
>
();
CHECK
(
tensor
);
auto
dims
=
tensor
->
dims
();
if
(
arg
.
is_weight
)
{
auto
wgt
=
std
::
make_shared
<
ge
::
op
::
Const
>
(
arg
.
name
);
LOG
(
INFO
)
<<
" Convert const var node "
<<
arg
.
name
;
VLOG
(
4
)
<<
dims
;
wgt
->
set_attr_value
(
lite
::
npu
::
CvtTensor
(
tensor
));
return
wgt
;
}
else
{
CHECK_EQ
(
dims
.
size
(),
4
);
LOG
(
INFO
)
<<
"[NPU] Convert data var node "
<<
arg
.
name
;
LOG
(
INFO
)
<<
dims
;
// TODO(xxx): support more types and dims size
ge
::
TensorDesc
desc
(
ge
::
Shape
(
dims
.
Vectorize
()),
ge
::
Format
::
FORMAT_NCHW
,
ge
::
DataType
::
DT_FLOAT
);
// auto size = desc.GetShape().GetShapeSize();
// ge::TensorUtils::SetSize(desc, size*sizeof(float));
// ge::TensorUtils::SetRealDimCnt(desc, 4);
auto
data
=
std
::
make_shared
<
ge
::
op
::
Data
>
(
arg
.
name
);
data
->
update_input_desc_x
(
desc
);
return
data
;
}
return
nullptr
;
}
void
GenerateNPUProgramPass
::
CvtAllOpNodes
(
const
std
::
vector
<
Node
*>&
nodes2cvt
,
lite
::
kernels
::
npu
::
bridges
::
node_map_type
*
converted_vars
)
{
const
auto
&
bridges
=
lite
::
kernels
::
npu
::
bridges
::
Factory
::
Instance
();
const
auto
&
cvtfunc_map
=
bridges
.
AllFunctions
();
// return record all converted vars
// op node's inputs must be found in converted_vars
for
(
auto
&
node
:
nodes2cvt
)
{
lite
::
kernels
::
npu
::
bridges
::
node_map_type
node_inputs
;
auto
&
stmt
=
node
->
AsStmt
();
for
(
auto
&
var_node
:
node
->
inlinks
)
{
auto
&
arg
=
var_node
->
AsArg
();
// weight should be handled in the converter, so skip here
if
(
arg
.
is_weight
)
{
continue
;
}
auto
var_name
=
arg
.
name
;
if
(
!
converted_vars
->
count
(
var_name
))
{
converted_vars
->
insert
(
std
::
make_pair
(
var_name
,
CvtVarNode
(
var_node
,
stmt
.
op
()
->
scope
())));
}
node_inputs
.
insert
(
*
converted_vars
->
find
(
var_name
));
}
auto
node_outputs
=
cvtfunc_map
.
at
(
stmt
.
op_type
())(
stmt
.
op
(),
node_inputs
);
converted_vars
->
insert
(
node_outputs
.
begin
(),
node_outputs
.
end
());
}
}
std
::
string
GenerateNPUProgramPass
::
BuildNPUGraph
(
const
std
::
unordered_set
<
Node
*>&
op_nodes
,
const
std
::
unordered_set
<
Node
*>&
in_data_vars
,
const
std
::
unordered_set
<
Node
*>&
out_data_vars
,
int
sub_id
)
{
auto
ordered_nodes
=
GetTopologicalOrder
(
op_nodes
);
lite
::
kernels
::
npu
::
bridges
::
node_map_type
converted_vars
;
CvtAllOpNodes
(
ordered_nodes
,
&
converted_vars
);
std
::
vector
<
std
::
string
>
in_var_names
;
std
::
vector
<
std
::
string
>
out_var_names
;
std
::
vector
<
ge
::
Operator
>
inputs
;
std
::
vector
<
ge
::
Operator
>
outputs
;
for
(
auto
i
:
in_data_vars
)
{
auto
argname
=
i
->
AsArg
().
name
;
in_var_names
.
push_back
(
argname
);
inputs
.
push_back
(
*
converted_vars
.
at
(
argname
));
}
for
(
auto
i
:
out_data_vars
)
{
auto
argname
=
i
->
AsArg
().
name
;
out_var_names
.
push_back
(
argname
);
outputs
.
push_back
(
*
converted_vars
.
at
(
argname
));
}
std
::
string
weight_var_name
=
"graph"
+
std
::
to_string
(
sub_id
)
+
"_weights"
;
auto
any_op
=
(
*
op_nodes
.
begin
())
->
AsStmt
().
op
();
auto
weight
=
any_op
->
scope
()
->
Var
(
weight_var_name
)
->
GetMutable
<
Tensor
>
();
weight
->
set_persistable
(
true
);
weight
->
set_precision
(
PRECISION
(
kInt8
));
// Compiling IR graph to NPU model and store mode data into weight tensor with
// persistable=true, Sothat the model parser can recognize it and save it to
// param files
if
(
!
lite
::
npu
::
BuildModel
(
inputs
,
outputs
,
weight
))
{
LOG
(
WARNING
)
<<
"[NPU] Build NPU graph failed (subgraph="
<<
sub_id
<<
")"
;
throw
std
::
runtime_error
(
"Build NPU graph failed."
);
}
LOG
(
INFO
)
<<
"[NPU] Build NPU graph success (subgraph="
<<
sub_id
<<
")"
;
return
weight_var_name
;
}
void
GenerateBMProgramPass
::
GenSubgraph
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
,
const
std
::
unordered_set
<
Node
*>&
op_nodes
,
int
sub_id
)
{
#if 0
std::unordered_set<Node*> in_data_vars;
std::unordered_set<Node*> in_wgt_vars;
std::unordered_set<Node*> out_data_vars;
std::unordered_set<Node*> out_unused_vars;
FindInputOutputVars(
op_nodes, &in_data_vars, &in_wgt_vars, &out_data_vars, &out_unused_vars);
auto weight_var_name =
BuildNPUGraph(op_nodes, in_data_vars, out_data_vars, sub_id);
auto any_op = (*op_nodes.begin())->AsStmt().op();
InsertNewNode(graph,
weight_var_name,
any_op->scope(),
any_op->valid_places(),
in_data_vars,
in_wgt_vars,
out_data_vars,
out_unused_vars);
auto nodes2rm = GetNode2rm(
op_nodes, {in_data_vars, in_wgt_vars, out_data_vars, out_unused_vars});
GraphSafeRemoveNodes(graph.get(), nodes2rm);
#endif
}
void
GenerateBMProgramPass
::
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
{
}
std
::
unique_ptr
<
RuntimeProgram
>
GenerateBMProgramPass
::
GenProgram
()
{
std
::
unique_ptr
<
RuntimeProgram
>
program
(
new
RuntimeProgram
(
std
::
move
(
insts_
)));
return
program
;
}
}
// namespace subgraph
}
// namespace mir
}
// namespace lite
}
// namespace paddle
REGISTER_MIR_PASS
(
generate_bm_program_pass
,
paddle
::
lite
::
mir
::
subgraph
::
GenerateBMProgramPass
)
.
BindTargets
({
TARGET
(
kBM
)});
lite/
kernels/bm/relu_compute.cc
→
lite/
core/mir/subgraph/generate_bm_program_pass.h
浏览文件 @
662e4d7c
...
@@ -12,48 +12,49 @@
...
@@ -12,48 +12,49 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#include "lite/kernels/bm/relu_compute.h"
#pragma once
#include <map>
#include <memory>
#include <string>
#include <string>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/context.h"
#include "lite/core/type_system.h"
#include "lite/core/mir/pass.h"
#include "lite/core/mir/subgraph/subgraph_program_pass.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
mir
{
namespace
bm
{
namespace
subgraph
{
class
GenerateBMProgramPass
:
public
SubgraphProgramPass
{
public:
using
key2nodes_t
=
std
::
map
<
std
::
string
,
Node
*>
;
void
ReluCompute
::
PrepareForRun
()
{
void
Apply
(
const
std
::
unique_ptr
<
SSAGraph
>&
graph
)
override
;
return
;
std
::
unique_ptr
<
RuntimeProgram
>
GenProgram
();
}
void
ReluCompute
::
Run
()
{
protected:
return
;
// nodes2cvt: op nodes to convert
}
// return cvted_vars: converted var nodes
void
CvtAllOpNodes
(
const
std
::
vector
<
Node
*>&
nodes2cvt
,
lite
::
kernels
::
npu
::
bridges
::
node_map_type
*
cvted_vars
);
template
<
PrecisionType
Ptype_out
>
std
::
shared_ptr
<
ge
::
Operator
>
CvtVarNode
(
lite
::
mir
::
Node
*
var_node
,
void
ReluComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
const
Scope
*
scope
);
return
;
}
template
<
PrecisionType
Ptype_out
>
std
::
string
BuildGraph
(
const
std
::
unordered_set
<
Node
*>&
op_nodes
,
void
ReluComputeInt8
<
Ptype_out
>::
Run
()
{
const
std
::
unordered_set
<
Node
*>&
in_data_vars
,
return
;
const
std
::
unordered_set
<
Node
*>&
out_data_vars
,
}
int
sub_id
);
}
// namespace bm
private:
}
// namespace kernels
std
::
vector
<
Instruction
>
insts_
;
};
}
// namespace subgraph
}
// namespace mir
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
REGISTER_LITE_KERNEL
(
relu
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ReluCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
relu
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ReluComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/CMakeLists.txt
浏览文件 @
662e4d7c
...
@@ -2,16 +2,6 @@ if(NOT LITE_WITH_BM)
...
@@ -2,16 +2,6 @@ if(NOT LITE_WITH_BM)
return
()
return
()
endif
()
endif
()
add_kernel
(
conv_compute_bm BM basic SRCS conv_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
graph_compute_bm BM basic SRCS graph_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
calib_compute_bm BM basic SRCS calib_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
pool_compute_bm BM basic SRCS pool_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
elementwise_compute_bm BM basic SRCS elementwise_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
relu_compute_bm BM basic SRCS relu_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
softmax_compute_bm BM basic SRCS softmax_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
batch_norm_compute_bm BM basic SRCS batch_norm_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
scale_compute_bm BM basic SRCS scale_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
mul_compute_bm BM basic SRCS mul_compute.cc DEPS
${
lite_kernel_deps
}
)
add_kernel
(
io_copy_compute_bm BM basic SRCS io_copy_compute.cc DEPS
${
lite_kernel_deps
}
)
message
(
STATUS
"compile with lite BM kernels"
)
add_subdirectory
(
bridges
)
lite/kernels/bm/batch_norm_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/batch_norm_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
BatchNormCompute
::
PrepareForRun
()
{
return
;
}
void
BatchNormCompute
::
Run
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
BatchNormComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
BatchNormComputeInt8
<
Ptype_out
>::
Run
()
{
return
;
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
batch_norm
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
BatchNormCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Mean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Variance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"VarianceOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"SavedMean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"SavedVariance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
batch_norm
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
BatchNormComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Scale"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Mean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Variance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"MeanOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"VarianceOut"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"SavedMean"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"SavedVariance"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/bridges/CMakeLists.txt
0 → 100644
浏览文件 @
662e4d7c
lite_cc_library
(
bm_bridge_registry SRCS registry.cc
)
set
(
bm_bridge_deps bm_bridge_registry op
)
lite_cc_library
(
bm_bridge_act_op SRCS act_op.cc DEPS
${
bm_bridge_deps
}
)
lite_cc_library
(
bm_bridge_conv_op SRCS conv_op.cc DEPS
${
bm_bridge_deps
}
)
lite_cc_library
(
bm_bridge_elementwise_ops SRCS elementwise_ops.cc DEPS
${
bm_bridge_deps
}
)
lite_cc_library
(
bm_bridge_pool_op SRCS pool_op.cc DEPS
${
bm_bridge_deps
}
)
lite_cc_library
(
bm_bridge_softmax_op SRCS softmax_op.cc DEPS
${
bm_bridge_deps
}
)
lite_cc_library
(
bm_bridge_mul_op SRCS mul_op.cc DEPS
${
bm_bridge_deps
}
)
lite_cc_library
(
bm_bridge_batch_norm_op SRCS batch_norm_op.cc DEPS
${
bm_bridge_deps
}
)
set
(
bm_bridges
bm_bridge_registry
bm_bridge_act_op
bm_bridge_conv_op
bm_bridge_elementwise_ops
bm_bridge_pool_op
bm_bridge_softmax_op
bm_bridge_mul_op
bm_bridge_batch_norm_op
CACHE INTERNAL
"bm_bridges"
)
lite/kernels/bm/
conv_compute.h
→
lite/kernels/bm/
bridges/act_op.cc
浏览文件 @
662e4d7c
...
@@ -12,35 +12,25 @@
...
@@ -12,35 +12,25 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include "lite/kernels/bm/bridges/registry.h"
#include "lite/core/kernel.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
bm
{
namespace
bm
{
namespace
bridges
{
class
ConvCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
node_map_type
ActConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
public:
const
node_map_type
&
input_nodes
)
{
using
param_t
=
operators
::
ConvParam
;
// output converted nodes
node_map_type
output_nodes
;
void
PrepareForRun
()
{};
return
output_nodes
;
void
Run
()
{};
}
virtual
~
ConvCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
ConvComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
{
public:
using
param_t
=
operators
::
ConvParam
;
void
PrepareForRun
()
{};
void
Run
()
{};
virtual
~
ConvComputeInt8
()
=
default
;
};
}
// namespace bridges
}
// namespace bm
}
// namespace bm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
REGISTER_BM_BRIDGE
(
relu
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
ActConverter
);
lite/kernels/bm/
mul_compute.h
→
lite/kernels/bm/
bridges/batch_norm_op.cc
浏览文件 @
662e4d7c
...
@@ -12,39 +12,25 @@
...
@@ -12,39 +12,25 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include "lite/kernels/bm/bridges/registry.h"
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/mul_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
bm
{
namespace
bm
{
namespace
bridges
{
class
MulCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
node_map_type
BatchNormConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
public:
const
node_map_type
&
input_nodes
)
{
using
param_t
=
operators
::
MulParam
;
// output converted nodes
node_map_type
output_nodes
;
void
PrepareForRun
()
override
;
return
output_nodes
;
void
Run
()
override
;
}
virtual
~
MulCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
MulComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
MulParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
MulComputeInt8
()
=
default
;
};
}
// namespace bridges
}
// namespace bm
}
// namespace bm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
REGISTER_BM_BRIDGE
(
batch_norm
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
BatchNormConverter
);
lite/kernels/bm/bridges/conv_op.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
node_map_type
ConvConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
const
node_map_type
&
input_nodes
)
{
// output converted nodes
node_map_type
output_nodes
;
return
output_nodes
;
}
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_BM_BRIDGE
(
conv2d
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
ConvConverter
);
lite/kernels/bm/bridges/elementwise_ops.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
node_map_type
ElementwiseConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
const
node_map_type
&
input_nodes
)
{
// output converted nodes
node_map_type
output_nodes
;
return
output_nodes
;
}
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_BM_BRIDGE
(
elementwise
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
ElementwiseConverter
);
lite/kernels/bm/bridges/mul_op.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
node_map_type
MulConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
const
node_map_type
&
input_nodes
)
{
// output converted nodes
node_map_type
output_nodes
;
return
output_nodes
;
}
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_BM_BRIDGE
(
mul
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
MulConverter
);
lite/kernels/bm/bridges/pool_op.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
node_map_type
PoolConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
const
node_map_type
&
input_nodes
)
{
// output converted nodes
node_map_type
output_nodes
;
return
output_nodes
;
}
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_BM_BRIDGE
(
pool2d
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
PoolConverter
);
lite/kernels/bm/
calib_compute.h
→
lite/kernels/bm/
bridges/registry.cc
浏览文件 @
662e4d7c
...
@@ -12,39 +12,29 @@
...
@@ -12,39 +12,29 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include "lite/kernels/bm/bridges/registry.h"
#include "lite/core/kernel.h"
#include <utility>
#include "lite/operators/calib_op.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
bm
{
namespace
bm
{
namespace
bridges
{
class
CalibComputeFp32ToInt8
Factory
&
Factory
::
Instance
()
{
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
static
Factory
g_bm_bridge
;
public:
return
g_bm_bridge
;
using
param_t
=
operators
::
CalibParam
;
}
void
Run
()
override
;
bool
Factory
::
HasType
(
const
std
::
string
&
op_type
)
const
{
return
map_
.
count
(
op_type
);
}
~
CalibComputeFp32ToInt8
()
override
{};
void
Factory
::
Insert
(
const
std
::
string
&
op_type
,
const
func_type
&
func_name
)
{
map_
.
insert
(
std
::
make_pair
(
op_type
,
func_name
));
private:
}
};
class
CalibComputeInt8ToFp32
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
CalibParam
;
void
Run
()
override
;
~
CalibComputeInt8ToFp32
()
override
{};
private:
};
}
// namespace bridges
}
// namespace bm
}
// namespace bm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
...
...
lite/kernels/bm/bridges/registry.h
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <string>
#include <unordered_map>
#include <vector>
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
// var_name, bm node point
using
node_map_type
=
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
void
*>>
;
using
func_type
=
std
::
function
<
node_map_type
(
const
std
::
shared_ptr
<
OpLite
>
,
const
node_map_type
&
)
>
;
using
cvt_map_type
=
std
::
unordered_map
<
std
::
string
,
func_type
>
;
class
Factory
{
public:
static
Factory
&
Instance
();
const
cvt_map_type
&
AllFunctions
()
const
{
return
map_
;
}
bool
HasType
(
const
std
::
string
&
op_type
)
const
;
void
Insert
(
const
std
::
string
&
op_type
,
const
func_type
&
func_name
);
Factory
()
=
default
;
private:
cvt_map_type
map_
;
DISALLOW_COPY_AND_ASSIGN
(
Factory
);
};
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
// some platform-independent defintion
#if defined(_WIN32)
#define UNUSED
#define __builtin_expect(EXP, C) (EXP)
#else
#define UNUSED __attribute__((unused))
#endif
#define STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE(uniq_name, msg) \
struct __test_global_namespace_##uniq_name##__ {}; \
static_assert(std::is_same<::__test_global_namespace_##uniq_name##__, \
__test_global_namespace_##uniq_name##__>::value, \
msg)
#define REGISTER_BM_BRIDGE(op_type, cvt_func_name) \
STATIC_ASSERT_JITKERNEL_GLOBAL_NAMESPACE( \
__reg_bm_bridge_##op_type##__, \
"REGISTER_BM_BRIDGE must be called in global namespace only once!"); \
int __reg_bm_bridge_##op_type##_Insert() { \
paddle::lite::kernels::bm::bridges::Factory::Instance().Insert( \
#op_type, cvt_func_name); \
return 0; \
}
#define USE_BM_BRIDGE(op_type) \
extern int __reg_bm_bridge_##op_type##_Insert(); \
static int __reg_bm_bridge_##op_type##_Insert_return UNUSED = \
__reg_bm_bridge_##op_type##_Insert();
lite/kernels/bm/bridges/scale_op.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
node_map_type
ScaleConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
const
node_map_type
&
input_nodes
)
{
// output converted nodes
node_map_type
output_nodes
;
return
output_nodes
;
}
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_BM_BRIDGE
(
scale
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
ScaleConverter
);
lite/kernels/bm/bridges/softmax_op.cc
0 → 100644
浏览文件 @
662e4d7c
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/bridges/registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
namespace
bridges
{
node_map_type
SoftmaxConverter
(
const
std
::
shared_ptr
<
lite
::
OpLite
>
op
,
const
node_map_type
&
input_nodes
)
{
// output converted nodes
node_map_type
output_nodes
;
return
output_nodes
;
}
}
// namespace bridges
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_BM_BRIDGE
(
softmax
,
paddle
::
lite
::
kernels
::
bm
::
bridges
::
SoftmaxConverter
);
lite/kernels/bm/calib_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/calib_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
CalibComputeFp32ToInt8
::
Run
()
{
}
void
CalibComputeInt8ToFp32
::
Run
()
{
return
;
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
calib
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
CalibComputeFp32ToInt8
,
fp32_to_int8
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
calib
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
CalibComputeInt8ToFp32
,
int8_to_fp32
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
calib_once
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
CalibComputeFp32ToInt8
,
fp32_to_int8
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kInt8
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
calib_once
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
CalibComputeInt8ToFp32
,
int8_to_fp32
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kInt8
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
))})
.
Finalize
();
lite/kernels/bm/conv_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/conv_compute.h"
#include <vector>
#include "lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
template
class
ConvComputeInt8
<
PRECISION
(
kInt8
)>;
template
class
ConvComputeInt8
<
PRECISION
(
kFloat
)>;
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
conv2d
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ConvCompute
,
def
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
conv2d
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ConvComputeInt8
<
PRECISION
(
kInt8
)
>
,
int8_out
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
))})
.
BindInput
(
"Bias"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
))})
.
BindInput
(
"Filter"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
))})
.
BindOutput
(
"Output"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
))})
.
Finalize
();
lite/kernels/bm/elementwise_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/elementwise_compute.h"
#include <string>
#include <vector>
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
ElementwiseAddCompute
::
Run
()
{
}
template
<
PrecisionType
Ptype_out
>
void
ElementwiseAddComputeInt8
<
Ptype_out
>::
Run
()
{
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
elementwise_add
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ElementwiseAddCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
elementwise_add
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ElementwiseAddComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/
batch_norm_compute.h
→
lite/kernels/bm/
graph_compute.cc
浏览文件 @
662e4d7c
...
@@ -12,39 +12,37 @@
...
@@ -12,39 +12,37 @@
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// limitations under the License.
// limitations under the License.
#pragma once
#include "lite/kernels/bm/graph_compute.h"
#include <algorithm>
#include <sys/time.h>
#include "lite/core/kernel.h"
#include <time.h>
#include "lite/operators/batch_norm_op.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
bm
{
namespace
bm
{
class
BatchNormCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
void
GraphCompute
::
PrepareForRun
()
{
public:
}
using
param_t
=
operators
::
BatchNormParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
BatchNormCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
BatchNormComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
BatchNormParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
BatchNormComputeInt8
()
=
default
;
};
void
GraphCompute
::
Run
()
{
}
}
// namespace bm
}
// namespace bm
}
// namespace kernels
}
// namespace kernels
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
REGISTER_LITE_KERNEL
(
graph_op
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
GraphCompute
,
def
)
.
BindInput
(
"Inputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Weight"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"Outputs"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
lite/kernels/bm/
elementwise
_compute.h
→
lite/kernels/bm/
graph
_compute.h
浏览文件 @
662e4d7c
...
@@ -13,29 +13,28 @@
...
@@ -13,29 +13,28 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <algorithm>
#include <memory>
#include <string>
#include <vector>
#include "lite/core/kernel.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
#include "lite/core/op_registry.h"
#include "lite/core/types.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
bm
{
namespace
bm
{
class
ElementwiseAddCompute
class
GraphCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
public:
public:
void
Run
()
override
;
using
param_t
=
operators
::
GraphParam
;
virtual
~
ElementwiseAddCompute
()
=
default
;
void
PrepareForRun
()
override
;
};
template
<
PrecisionType
Ptype_out
>
class
ElementwiseAddComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
void
Run
()
override
;
void
Run
()
override
;
virtual
~
ElementwiseAddComputeInt8
()
=
default
;
virtual
~
GraphCompute
()
=
default
;
};
};
}
// namespace bm
}
// namespace bm
...
...
lite/kernels/bm/io_copy_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/backends/bm/target_wrapper.h"
#include "lite/core/kernel.h"
#include "lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
using
TargetW
=
TargetWrapper
<
TARGET
(
kBM
)
>
;
// Host to BM memory.
void
CopyFromHostSync
(
void
*
target
,
const
void
*
source
,
size_t
size
)
{
TargetW
::
MemcpySync
(
target
,
source
,
size
,
IoDirection
::
HtoD
);
}
void
CopyFromHostAsync
(
void
*
target
,
const
void
*
source
,
size_t
size
,
TargetW
::
stream_t
stream
)
{
TargetW
::
MemcpyAsync
(
target
,
source
,
size
,
IoDirection
::
HtoD
,
stream
);
}
// Host to Host memory.
void
CopyToHostSync
(
void
*
target
,
const
void
*
source
,
size_t
size
)
{
TargetW
::
MemcpySync
(
target
,
source
,
size
,
IoDirection
::
DtoH
);
}
/*
* This kernel copies a tensor from host to BM space.
*/
class
IoCopyHostToBMCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)
>
{
public:
void
Run
()
override
{
auto
&
param
=
Param
<
operators
::
IoCopyParam
>
();
CHECK
(
param
.
x
->
target
()
==
TARGET
(
kHost
)
||
param
.
x
->
target
()
==
TARGET
(
kX86
));
auto
mem_size
=
param
.
x
->
memory_size
();
VLOG
(
4
)
<<
"copy size "
<<
mem_size
;
auto
*
data
=
param
.
y
->
mutable_data
(
TARGET
(
kBM
),
mem_size
);
CopyFromHostSync
(
data
,
param
.
x
->
raw_data
(),
mem_size
);
}
std
::
unique_ptr
<
type_infer_handler_t
>
GetTypeInferHandler
()
override
{
std
::
unique_ptr
<
type_infer_handler_t
>
res
(
new
type_infer_handler_t
);
*
res
=
[](
const
std
::
map
<
std
::
string
,
const
Type
*>&
inputs
,
const
std
::
string
&
out
)
->
const
Type
*
{
CHECK
(
!
inputs
.
empty
());
auto
*
type
=
inputs
.
at
(
"Input"
);
CHECK
(
type
->
target
()
==
TARGET
(
kHost
));
auto
out_place
=
type
->
place
();
out_place
.
target
=
TARGET
(
kBM
);
auto
*
out_type
=
Type
::
Get
(
type
->
id
(),
out_place
.
target
,
out_place
.
precision
,
out_place
.
layout
,
out_place
.
device
);
return
out_type
;
};
return
res
;
}
std
::
string
doc
()
const
override
{
return
"Copy IO from HOST to BM"
;
}
};
/*
* This kernel copies a tensor from BM to host space.
*/
class
IoCopyBMToHostCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
)
>
{
public:
void
Run
()
override
{
auto
&
param
=
Param
<
operators
::
IoCopyParam
>
();
CHECK
(
param
.
x
->
target
()
==
TARGET
(
kBM
));
auto
mem_size
=
param
.
x
->
memory_size
();
VLOG
(
4
)
<<
"io copy bm to host "
<<
mem_size
;
auto
*
data
=
param
.
y
->
mutable_data
(
TARGET
(
kHost
),
mem_size
);
CopyToHostSync
(
data
,
param
.
x
->
raw_data
(),
mem_size
);
}
std
::
string
doc
()
const
override
{
return
"Copy IO from BM to HOST"
;
}
};
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
io_copy
,
kBM
,
kAny
,
kAny
,
paddle
::
lite
::
kernels
::
bm
::
IoCopyHostToBMCompute
,
host_to_device
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
io_copy
,
kBM
,
kAny
,
kAny
,
paddle
::
lite
::
kernels
::
bm
::
IoCopyBMToHostCompute
,
device_to_host
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
io_copy_once
,
kBM
,
kAny
,
kAny
,
paddle
::
lite
::
kernels
::
bm
::
IoCopyHostToBMCompute
,
host_to_device
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
io_copy_once
,
kBM
,
kAny
,
kAny
,
paddle
::
lite
::
kernels
::
bm
::
IoCopyBMToHostCompute
,
device_to_host
)
.
BindInput
(
"Input"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
),
PRECISION
(
kAny
),
DATALAYOUT
(
kAny
))})
.
Finalize
();
lite/kernels/bm/mul_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/mul_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
MulCompute
::
PrepareForRun
()
{
return
;
}
void
MulCompute
::
Run
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
MulComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
MulComputeInt8
<
Ptype_out
>::
Run
()
{
return
;
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
mul
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
MulCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
mul
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
MulComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/pool_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/pool_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
PoolCompute
::
PrepareForRun
()
{
return
;
}
void
PoolCompute
::
Run
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
PoolComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
PoolComputeInt8
<
Ptype_out
>::
Run
()
{
return
;
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
pool2d
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
PoolCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
pool2d
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
PoolComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/pool_compute.h
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/pool_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
class
PoolCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
PoolParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
PoolCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
PoolComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
PoolParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
PoolComputeInt8
()
=
default
;
};
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/bm/relu_compute.h
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/relu_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
class
ReluCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ActivationParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
ReluCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
ReluComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
ActivationParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
ReluComputeInt8
()
=
default
;
};
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/bm/scale_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/scale_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
ScaleCompute
::
PrepareForRun
()
{
return
;
}
void
ScaleCompute
::
Run
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
ScaleComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
ScaleComputeInt8
<
Ptype_out
>::
Run
()
{
return
;
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
scale
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ScaleCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
scale
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
ScaleComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/scale_compute.h
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/scale_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
class
ScaleCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
ScaleParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
ScaleCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
ScaleComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
ScaleParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
ScaleComputeInt8
()
=
default
;
};
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
lite/kernels/bm/softmax_compute.cc
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "lite/kernels/bm/softmax_compute.h"
#include <string>
#include <vector>
#include "lite/core/op_registry.h"
#include "lite/core/type_system.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
void
SoftmaxCompute
::
PrepareForRun
()
{
return
;
}
void
SoftmaxCompute
::
Run
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
SoftmaxComputeInt8
<
Ptype_out
>::
PrepareForRun
()
{
return
;
}
template
<
PrecisionType
Ptype_out
>
void
SoftmaxComputeInt8
<
Ptype_out
>::
Run
()
{
return
;
}
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
softmax
,
kBM
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
SoftmaxCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
REGISTER_LITE_KERNEL
(
softmax
,
kBM
,
kInt8
,
kNCHW
,
paddle
::
lite
::
kernels
::
bm
::
SoftmaxComputeInt8
<
PRECISION
(
kInt8
)
>
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kBM
))})
.
Finalize
();
lite/kernels/bm/softmax_compute.h
已删除
100644 → 0
浏览文件 @
87271863
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "lite/core/kernel.h"
#include "lite/operators/softmax_op.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
bm
{
class
SoftmaxCompute
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kFloat
)
>
{
public:
using
param_t
=
operators
::
SoftmaxParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
SoftmaxCompute
()
=
default
;
};
template
<
PrecisionType
Ptype_out
>
class
SoftmaxComputeInt8
:
public
KernelLite
<
TARGET
(
kBM
),
PRECISION
(
kInt8
)
>
{
public:
using
param_t
=
operators
::
SoftmaxParam
;
void
PrepareForRun
()
override
;
void
Run
()
override
;
virtual
~
SoftmaxComputeInt8
()
=
default
;
};
}
// namespace bm
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录