Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
3a425e7e
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
3a425e7e
编写于
6月 24, 2019
作者:
C
Chunwei
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'chunwei/framework-support-cl' into 'incubate/lite'
framework support cl See merge request inference/paddlelite!48
上级
17ef40f3
0f03e23b
变更
32
隐藏空白更改
内联
并排
Showing
32 changed file
with
340 addition
and
82 deletion
+340
-82
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+3
-1
paddle/fluid/inference/analysis/dot.h
paddle/fluid/inference/analysis/dot.h
+5
-3
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+7
-1
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+5
-4
paddle/fluid/lite/api/cxx_api.cc
paddle/fluid/lite/api/cxx_api.cc
+0
-3
paddle/fluid/lite/api/mobilenetv1_test.cc
paddle/fluid/lite/api/mobilenetv1_test.cc
+23
-7
paddle/fluid/lite/core/context.cc
paddle/fluid/lite/core/context.cc
+4
-0
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+61
-8
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc
+11
-10
paddle/fluid/lite/core/mir/graph_visualize_pass.cc
paddle/fluid/lite/core/mir/graph_visualize_pass.cc
+2
-1
paddle/fluid/lite/core/mir/pattern_matcher.cc
paddle/fluid/lite/core/mir/pattern_matcher.cc
+1
-1
paddle/fluid/lite/core/mir/pattern_matcher.h
paddle/fluid/lite/core/mir/pattern_matcher.h
+2
-1
paddle/fluid/lite/core/mir/type_target_transform_pass.cc
paddle/fluid/lite/core/mir/type_target_transform_pass.cc
+2
-1
paddle/fluid/lite/core/op_lite.cc
paddle/fluid/lite/core/op_lite.cc
+2
-0
paddle/fluid/lite/core/op_registry.cc
paddle/fluid/lite/core/op_registry.cc
+7
-0
paddle/fluid/lite/core/op_registry.h
paddle/fluid/lite/core/op_registry.h
+4
-0
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+13
-4
paddle/fluid/lite/core/program.h
paddle/fluid/lite/core/program.h
+2
-1
paddle/fluid/lite/core/target_wrapper.h
paddle/fluid/lite/core/target_wrapper.h
+5
-4
paddle/fluid/lite/kernels/CMakeLists.txt
paddle/fluid/lite/kernels/CMakeLists.txt
+1
-1
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
+16
-0
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
+53
-0
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
...fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
+66
-0
paddle/fluid/lite/kernels/use_kernels.h
paddle/fluid/lite/kernels/use_kernels.h
+3
-0
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+13
-15
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+3
-3
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+7
-2
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+7
-6
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+9
-2
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+1
-1
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+1
-1
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+1
-1
未找到文件。
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
3a425e7e
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/lite/utils/string.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -84,7 +85,8 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
...
@@ -84,7 +85,8 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
);
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
);
// Create nodes
// Create nodes
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
std
::
string
node_id
=
FormatName
(
n
)
+
"("
+
std
::
to_string
(
n
->
id
())
+
")"
;
std
::
string
node_id
=
lite
::
string_format
(
"%s(%d)"
,
FormatName
(
n
).
c_str
(),
n
->
id
());
if
(
n
->
IsOp
())
{
if
(
n
->
IsOp
())
{
decltype
(
op_attrs
)
attr
=
decltype
(
op_attrs
)
attr
=
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
...
...
paddle/fluid/inference/analysis/dot.h
浏览文件 @
3a425e7e
...
@@ -58,9 +58,11 @@ class Dot {
...
@@ -58,9 +58,11 @@ class Dot {
std
::
vector
<
Attr
>
attrs
;
std
::
vector
<
Attr
>
attrs
;
Node
(
const
std
::
string
&
name
,
const
std
::
vector
<
Attr
>&
attrs
)
Node
(
const
std
::
string
&
name
,
const
std
::
vector
<
Attr
>&
attrs
)
:
name
(
name
),
:
name
(
name
),
attrs
(
attrs
)
{
attrs
(
attrs
),
std
::
stringstream
ss
;
id_
(
"node_"
+
std
::
to_string
(
dot_node_counter
++
))
{}
ss
<<
"node_"
<<
dot_node_counter
++
;
id_
=
ss
.
str
();
}
std
::
string
id
()
const
{
return
id_
;
}
std
::
string
id
()
const
{
return
id_
;
}
...
...
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
3a425e7e
...
@@ -37,7 +37,7 @@ endfunction()
...
@@ -37,7 +37,7 @@ endfunction()
function
(
lite_deps TARGET
)
function
(
lite_deps TARGET
)
set
(
options
""
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS ARGS
)
set
(
multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS
CL_DEPS
ARGS
)
cmake_parse_arguments
(
lite_deps
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
lite_deps
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
deps
${
lite_deps_DEPS
}
)
set
(
deps
${
lite_deps_DEPS
}
)
...
@@ -78,6 +78,12 @@ function (lite_deps TARGET)
...
@@ -78,6 +78,12 @@ function (lite_deps TARGET)
endforeach
(
var
)
endforeach
(
var
)
endif
()
endif
()
if
(
LITE_WITH_OPENCL
)
foreach
(
var
${
lite_deps_CL_DEPS
}
)
set
(
deps
${
deps
}
${
var
}
)
endforeach
(
var
)
endif
()
set
(
${
TARGET
}
${
deps
}
PARENT_SCOPE
)
set
(
${
TARGET
}
${
deps
}
PARENT_SCOPE
)
endfunction
()
endfunction
()
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
3a425e7e
...
@@ -52,22 +52,23 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
...
@@ -52,22 +52,23 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
set
(
lite_model_test_DEPS cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
arm_kernels
}
)
set
(
lite_model_test_DEPS cxx_api_lite mir_passes
${
ops_lite
}
${
host_kernels
}
${
arm_kernels
}
)
lite_cc_test
(
test_mobilenetv1_lite SRCS mobilenetv1_test.cc
lite_cc_test
(
test_mobilenetv1_lite SRCS mobilenetv1_test.cc
DEPS
${
lite_model_test_DEPS
}
DEPS
${
lite_model_test_DEPS
}
CL_DEPS
${
opencl_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/mobilenet_v1 SERIAL
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/mobilenet_v1 SERIAL
)
add_dependencies
(
test_mobilenetv1_lite extern_lite_download_mobilenet_v1_tar_gz
)
add_dependencies
(
test_mobilenetv1_lite extern_lite_download_mobilenet_v1_tar_gz
)
lite_cc_test
(
test_mobilenetv2_lite SRCS mobilenetv2_test.cc
lite_cc_test
(
test_mobilenetv2_lite SRCS mobilenetv2_test.cc
DEPS
${
lite_model_test_DEPS
}
DEPS
${
lite_model_test_DEPS
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/mobilenet_v2 SERIAL
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/mobilenet_v2 SERIAL
)
add_dependencies
(
test_mobilenetv2_lite extern_lite_download_mobilenet_v2_tar_gz
)
add_dependencies
(
test_mobilenetv2_lite extern_lite_download_mobilenet_v2_tar_gz
)
lite_cc_test
(
test_resnet50_lite SRCS resnet50_test.cc
lite_cc_test
(
test_resnet50_lite SRCS resnet50_test.cc
DEPS
${
lite_model_test_DEPS
}
DEPS
${
lite_model_test_DEPS
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/resnet50 SERIAL
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/resnet50 SERIAL
)
add_dependencies
(
test_resnet50_lite extern_lite_download_resnet50_tar_gz
)
add_dependencies
(
test_resnet50_lite extern_lite_download_resnet50_tar_gz
)
lite_cc_test
(
test_inceptionv4_lite SRCS inceptionv4_test.cc
lite_cc_test
(
test_inceptionv4_lite SRCS inceptionv4_test.cc
DEPS
${
lite_model_test_DEPS
}
DEPS
${
lite_model_test_DEPS
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/inception_v4 SERIAL
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/inception_v4 SERIAL
)
add_dependencies
(
test_inceptionv4_lite extern_lite_download_inception_v4_tar_gz
)
add_dependencies
(
test_inceptionv4_lite extern_lite_download_inception_v4_tar_gz
)
endif
()
endif
()
...
...
paddle/fluid/lite/api/cxx_api.cc
浏览文件 @
3a425e7e
...
@@ -23,10 +23,7 @@ namespace paddle {
...
@@ -23,10 +23,7 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
)
{
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
)
{
#ifndef LITE_WITH_ARM
MkDirRecur
(
dir
);
MkDirRecur
(
dir
);
#else
#endif
program_
->
PersistModel
(
dir
,
program_desc_
);
program_
->
PersistModel
(
dir
,
program_desc_
);
LOG
(
INFO
)
<<
"Save model to "
<<
dir
;
LOG
(
INFO
)
<<
"Save model to "
<<
dir
;
}
}
...
...
paddle/fluid/lite/api/mobilenetv1_test.cc
浏览文件 @
3a425e7e
...
@@ -25,16 +25,13 @@
...
@@ -25,16 +25,13 @@
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
#ifdef LITE_WITH_ARM
void
TestModel
(
const
std
::
vector
<
Place
>&
valid_places
,
TEST
(
MobileNetV1
,
test
)
{
const
Place
&
preferred_place
)
{
DeviceInfo
::
Init
();
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
predictor
.
Build
(
FLAGS_model_dir
,
preferred_place
,
valid_places
);
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
...
@@ -70,7 +67,26 @@ TEST(MobileNetV1, test) {
...
@@ -70,7 +67,26 @@ TEST(MobileNetV1, test) {
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
ASSERT_EQ
(
out
->
dims
()[
1
],
1000
);
ASSERT_EQ
(
out
->
dims
()[
1
],
1000
);
}
}
#endif
TEST
(
MobileNetV1
,
test_arm
)
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
// Place{TARGET(kOpenCL), PRECISION(kFloat)},
});
TestModel
(
valid_places
,
Place
({
TARGET
(
kARM
),
PRECISION
(
kFloat
)}));
}
TEST
(
MobileNetV1
,
test_opencl
)
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
)},
});
TestModel
(
valid_places
,
Place
({
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
)}));
}
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/core/context.cc
浏览文件 @
3a425e7e
...
@@ -14,6 +14,10 @@
...
@@ -14,6 +14,10 @@
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/context.h"
#ifdef LITE_WITH_OPENCL
DEFINE_string
(
cl_path
,
"/data/local/tmp/opencl"
,
"The OpenCL kernels path."
);
#endif
namespace
paddle
{
namespace
paddle
{
namespace
lite
{}
// namespace lite
namespace
lite
{}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/core/context.h
浏览文件 @
3a425e7e
...
@@ -23,6 +23,11 @@
...
@@ -23,6 +23,11 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#endif
#endif
#ifdef LITE_WITH_OPENCL
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#endif
#include <map>
#include <map>
#include <memory>
#include <memory>
#include <set>
#include <set>
...
@@ -34,6 +39,10 @@
...
@@ -34,6 +39,10 @@
#include "paddle/fluid/lite/core/target_wrapper.h"
#include "paddle/fluid/lite/core/target_wrapper.h"
#include "paddle/fluid/lite/utils/all.h"
#include "paddle/fluid/lite/utils/all.h"
#ifdef LITE_WITH_OPENCL
DECLARE_string
(
cl_path
);
#endif
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -44,6 +53,7 @@ using HostContext = Context<TargetType::kHost>;
...
@@ -44,6 +53,7 @@ using HostContext = Context<TargetType::kHost>;
using
X86Context
=
Context
<
TargetType
::
kX86
>
;
using
X86Context
=
Context
<
TargetType
::
kX86
>
;
using
CUDAContext
=
Context
<
TargetType
::
kCUDA
>
;
using
CUDAContext
=
Context
<
TargetType
::
kCUDA
>
;
using
ARMContext
=
Context
<
TargetType
::
kARM
>
;
using
ARMContext
=
Context
<
TargetType
::
kARM
>
;
using
OpenClContext
=
Context
<
TargetType
::
kOpenCL
>
;
template
<
>
template
<
>
class
Context
<
TargetType
::
kHost
>
{
class
Context
<
TargetType
::
kHost
>
{
...
@@ -51,7 +61,7 @@ class Context<TargetType::kHost> {
...
@@ -51,7 +61,7 @@ class Context<TargetType::kHost> {
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{}
void
CopyShared
(
const
HostContext
*
ctx
)
{}
void
CopyShared
To
(
const
HostContext
*
ctx
)
{}
std
::
string
name
()
const
{
return
"HostContext"
;
}
std
::
string
name
()
const
{
return
"HostContext"
;
}
};
};
...
@@ -69,7 +79,7 @@ class Context<TargetType::kARM> {
...
@@ -69,7 +79,7 @@ class Context<TargetType::kARM> {
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
CopyShared
To
(
const
ARMContext
*
ctx
)
{}
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
...
@@ -109,7 +119,7 @@ class Context<TargetType::kCUDA> {
...
@@ -109,7 +119,7 @@ class Context<TargetType::kCUDA> {
cublas_fp32_
=
std
::
make_shared
<
lite
::
cuda
::
Blas
<
float
>>
();
cublas_fp32_
=
std
::
make_shared
<
lite
::
cuda
::
Blas
<
float
>>
();
}
}
void
CopyShared
(
const
CUDAContext
*
ctx
)
{
void
CopyShared
To
(
const
CUDAContext
*
ctx
)
{
CHECK
(
ctx
);
CHECK
(
ctx
);
CHECK
(
cublas_fp32_
)
<<
"cublas_fp32 should be set first"
;
CHECK
(
cublas_fp32_
)
<<
"cublas_fp32 should be set first"
;
ctx
->
cublas_fp32_
=
cublas_fp32_
;
ctx
->
cublas_fp32_
=
cublas_fp32_
;
...
@@ -175,7 +185,7 @@ class Context<TargetType::kX86> {
...
@@ -175,7 +185,7 @@ class Context<TargetType::kX86> {
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{}
void
CopyShared
(
const
X86Context
*
ctx
)
{}
void
CopyShared
To
(
const
X86Context
*
ctx
)
{}
const
device_ctx_t
*
x86_device_context
()
{
return
x86_device_context_
.
get
();
}
const
device_ctx_t
*
x86_device_context
()
{
return
x86_device_context_
.
get
();
}
void
SetX86DeviceContext
(
std
::
unique_ptr
<
device_ctx_t
>&&
ctx
)
{
void
SetX86DeviceContext
(
std
::
unique_ptr
<
device_ctx_t
>&&
ctx
)
{
...
@@ -202,6 +212,40 @@ class Context<TargetType::kX86> {
...
@@ -202,6 +212,40 @@ class Context<TargetType::kX86> {
};
};
#endif
#endif
#ifdef LITE_WITH_OPENCL
template
<
>
class
Context
<
TargetType
::
kOpenCL
>
{
mutable
std
::
shared_ptr
<
CLContext
>
cl_context_
;
mutable
std
::
shared_ptr
<
CLHelper
>
cl_helper_
;
public:
CLContext
*
cl_context
()
{
return
cl_context_
.
get
();
}
CLHelper
*
cl_helper
()
{
return
cl_helper_
.
get
();
}
void
InitOnce
()
{
// Init cl engine.
CHECK
(
CLEngine
::
Global
()
->
IsInitSuccess
())
<<
"OpenCL engine init failed"
;
CLEngine
::
Global
()
->
set_cl_path
(
FLAGS_cl_path
);
cl_context_
=
std
::
make_shared
<
CLContext
>
();
cl_helper_
=
std
::
make_shared
<
CLHelper
>
();
cl_helper_
->
set_context
(
cl_context_
.
get
());
PrepareKernels
();
}
void
CopySharedTo
(
const
OpenClContext
*
ctx
)
{
ctx
->
cl_context_
=
cl_context_
;
}
private:
void
PrepareKernels
()
{
cl_helper_
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
cl_helper_
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
}
};
#endif
// Context for running a kernel.
// Context for running a kernel.
// Holds the necessary resource and information.
// Holds the necessary resource and information.
class
KernelContext
{
class
KernelContext
{
...
@@ -230,26 +274,32 @@ class ContextScheduler {
...
@@ -230,26 +274,32 @@ class ContextScheduler {
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
switch
(
target
)
{
switch
(
target
)
{
case
TARGET
(
kHost
):
case
TARGET
(
kHost
):
kernel_contexts_
[
TargetType
::
kHost
].
As
<
HostContext
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kHost
].
As
<
HostContext
>
().
CopyShared
To
(
&
ctx
->
As
<
HostContext
>
());
&
ctx
->
As
<
HostContext
>
());
break
;
break
;
#ifdef LITE_WITH_X86
#ifdef LITE_WITH_X86
case
TARGET
(
kX86
):
case
TARGET
(
kX86
):
kernel_contexts_
[
TargetType
::
kX86
].
As
<
X86Context
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kX86
].
As
<
X86Context
>
().
CopyShared
To
(
&
ctx
->
As
<
X86Context
>
());
&
ctx
->
As
<
X86Context
>
());
break
;
break
;
#endif
#endif
#ifdef LITE_WITH_CUDA
#ifdef LITE_WITH_CUDA
case
TARGET
(
kCUDA
):
case
TARGET
(
kCUDA
):
kernel_contexts_
[
TargetType
::
kCUDA
].
As
<
CUDAContext
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kCUDA
].
As
<
CUDAContext
>
().
CopyShared
To
(
&
ctx
->
As
<
CUDAContext
>
());
&
ctx
->
As
<
CUDAContext
>
());
break
;
break
;
#endif
#endif
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
case
TARGET
(
kARM
):
case
TARGET
(
kARM
):
kernel_contexts_
[
TargetType
::
kARM
].
As
<
ARMContext
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kARM
].
As
<
ARMContext
>
().
CopyShared
To
(
&
ctx
->
As
<
ARMContext
>
());
&
ctx
->
As
<
ARMContext
>
());
break
;
break
;
#endif
#ifdef LITE_WITH_OPENCL
case
TARGET
(
kOpenCL
):
kernel_contexts_
[
TargetType
::
kOpenCL
].
As
<
OpenClContext
>
().
CopySharedTo
(
&
ctx
->
As
<
OpenClContext
>
());
break
;
#endif
#endif
default:
default:
LOG
(
FATAL
)
<<
"unsupported target "
<<
TargetToStr
(
target
);
LOG
(
FATAL
)
<<
"unsupported target "
<<
TargetToStr
(
target
);
...
@@ -273,6 +323,9 @@ class ContextScheduler {
...
@@ -273,6 +323,9 @@ class ContextScheduler {
#endif
#endif
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
InitContext
<
TargetType
::
kARM
,
ARMContext
>
();
InitContext
<
TargetType
::
kARM
,
ARMContext
>
();
#endif
#ifdef LITE_WITH_OPENCL
InitContext
<
TargetType
::
kOpenCL
,
OpenClContext
>
();
#endif
#endif
}
}
...
...
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc
浏览文件 @
3a425e7e
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h"
#include "paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h"
#include <memory>
#include <memory>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -57,24 +58,24 @@ void QuantDequantOpFuser::BuildPattern() {
...
@@ -57,24 +58,24 @@ void QuantDequantOpFuser::BuildPattern() {
->
AsIntermediate
();
->
AsIntermediate
();
std
::
vector
<
PMNode
*>
nodes
;
std
::
vector
<
PMNode
*>
nodes
;
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
nodes
.
push_back
(
VarNode
(
"quantized_op_weight"
+
std
::
to_string
(
i
))
nodes
.
push_back
(
VarNode
(
string_format
(
"quantized_op_weight%d"
,
i
))
->
assert_is_op_input
(
op_type_
,
weight_name
)
->
assert_is_op_input
(
op_type_
,
weight_name
)
->
AsInput
());
->
AsInput
());
nodes
.
push_back
(
OpNode
(
"quantized_op"
+
std
::
to_string
(
i
),
op_type_
)
nodes
.
push_back
(
OpNode
(
string_format
(
"quantized_op%d"
,
i
),
op_type_
)
->
assert_is_op
(
op_type_
)
->
assert_is_op
(
op_type_
)
->
AsIntermediate
());
->
AsIntermediate
());
nodes
.
push_back
(
VarNode
(
"quantized_op_out"
+
std
::
to_string
(
i
))
nodes
.
push_back
(
VarNode
(
string_format
(
"quantized_op_out%d"
,
i
))
->
assert_is_op_output
(
op_type_
)
->
assert_is_op_output
(
op_type_
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"X"
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"X"
)
->
AsIntermediate
());
->
AsIntermediate
());
nodes
.
push_back
(
nodes
.
push_back
(
OpNode
(
"dequant_op"
+
std
::
to_string
(
i
),
"fake_dequantize_max_abs"
)
OpNode
(
string_format
(
"dequant_op%d"
,
i
),
"fake_dequantize_max_abs"
)
->
assert_is_op
(
"fake_dequantize_max_abs"
)
->
assert_is_op
(
"fake_dequantize_max_abs"
)
->
AsIntermediate
());
->
AsIntermediate
());
nodes
.
push_back
(
VarNode
(
"dequant_op_out"
+
std
::
to_string
(
i
))
nodes
.
push_back
(
VarNode
(
string_format
(
"dequant_op_out%d"
,
i
))
->
assert_is_op_output
(
"fake_dequantize_max_abs"
,
"Out"
)
->
assert_is_op_output
(
"fake_dequantize_max_abs"
,
"Out"
)
->
AsOutput
());
->
AsOutput
());
}
}
...
@@ -108,11 +109,11 @@ void QuantDequantOpFuser::InsertNewNode(SSAGraph* graph,
...
@@ -108,11 +109,11 @@ void QuantDequantOpFuser::InsertNewNode(SSAGraph* graph,
std
::
vector
<
Node
*>
nodes
;
std
::
vector
<
Node
*>
nodes
;
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
nodes
.
push_back
(
matched
.
at
(
"quantized_op_weight"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"quantized_op_weight%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"quantized_op"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"quantized_op%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"quantized_op_out"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"quantized_op_out%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"dequant_op"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"dequant_op%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"dequant_op_out"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"dequant_op_out%d"
,
i
)));
}
}
int
bit_length
=
quant_op
->
stmt
()
->
op_info
()
->
GetAttr
<
int
>
(
"bit_length"
);
int
bit_length
=
quant_op
->
stmt
()
->
op_info
()
->
GetAttr
<
int
>
(
"bit_length"
);
auto
*
scope
=
quant_op
->
stmt
()
->
op
()
->
scope
();
auto
*
scope
=
quant_op
->
stmt
()
->
op
()
->
scope
();
...
...
paddle/fluid/lite/core/mir/graph_visualize_pass.cc
浏览文件 @
3a425e7e
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
#include <set>
#include <set>
#include <string>
#include <string>
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -39,7 +40,7 @@ std::string Visualize(mir::SSAGraph* graph) {
...
@@ -39,7 +40,7 @@ std::string Visualize(mir::SSAGraph* graph) {
if
(
node
.
IsArg
())
{
if
(
node
.
IsArg
())
{
key
=
node
.
AsArg
().
name
;
key
=
node
.
AsArg
().
name
;
}
else
{
}
else
{
key
=
node
.
AsStmt
().
op_type
()
+
std
::
to_string
(
id
++
);
key
=
string_format
(
"%s%d"
,
node
.
AsStmt
().
op_type
().
c_str
(),
id
++
);
}
}
if
(
node
.
IsStmt
())
{
if
(
node
.
IsStmt
())
{
...
...
paddle/fluid/lite/core/mir/pattern_matcher.cc
浏览文件 @
3a425e7e
...
@@ -325,7 +325,7 @@ std::string PMPattern::DotString() const {
...
@@ -325,7 +325,7 @@ std::string PMPattern::DotString() const {
// Create Nodes
// Create Nodes
std
::
unordered_map
<
PMNode
*
,
std
::
string
>
node2dot
;
std
::
unordered_map
<
PMNode
*
,
std
::
string
>
node2dot
;
for
(
const
auto
&
node
:
nodes
())
{
for
(
const
auto
&
node
:
nodes
())
{
std
::
string
node_id
=
"Node"
+
std
::
to_string
(
id
++
);
std
::
string
node_id
=
string_format
(
"Node%d"
,
id
++
);
dot
.
AddNode
(
node_id
,
{},
node
->
name
());
dot
.
AddNode
(
node_id
,
{},
node
->
name
());
node2dot
[
node
.
get
()]
=
node_id
;
node2dot
[
node
.
get
()]
=
node_id
;
}
}
...
...
paddle/fluid/lite/core/mir/pattern_matcher.h
浏览文件 @
3a425e7e
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include "paddle/fluid/lite/core/mir/node.h"
#include "paddle/fluid/lite/core/mir/node.h"
#include "paddle/fluid/lite/core/mir/ssa_graph.h"
#include "paddle/fluid/lite/core/mir/ssa_graph.h"
#include "paddle/fluid/lite/model_parser/pb/op_desc.h"
#include "paddle/fluid/lite/model_parser/pb/op_desc.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -228,7 +229,7 @@ class PMPattern {
...
@@ -228,7 +229,7 @@ class PMPattern {
FRIEND_TEST
(
PMPattern
,
NewNode
);
FRIEND_TEST
(
PMPattern
,
NewNode
);
#endif
#endif
static
std
::
string
NewID
()
{
return
"pmnode-"
+
std
::
to_string
(
id_
++
);
}
static
std
::
string
NewID
()
{
return
string_format
(
"pmnode-%d"
,
id_
++
);
}
std
::
vector
<
std
::
unique_ptr
<
PMNode
>>
nodes_
;
std
::
vector
<
std
::
unique_ptr
<
PMNode
>>
nodes_
;
std
::
vector
<
edge_t
>
edges_
;
std
::
vector
<
edge_t
>
edges_
;
...
...
paddle/fluid/lite/core/mir/type_target_transform_pass.cc
浏览文件 @
3a425e7e
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -80,7 +81,7 @@ void TypeTargetTransformPass::AddIoCopyInst(
...
@@ -80,7 +81,7 @@ void TypeTargetTransformPass::AddIoCopyInst(
CHECK
(
in
->
IsArg
());
CHECK
(
in
->
IsArg
());
auto
node_id
=
[
&
]
{
return
graph
->
nodes
().
size
();
};
auto
node_id
=
[
&
]
{
return
graph
->
nodes
().
size
();
};
auto
io_copy_output_name
=
auto
io_copy_output_name
=
in
->
AsArg
().
name
+
"/trans/"
+
std
::
to_string
(
node_id
());
string_format
(
"%s/trans/%d"
,
in
->
AsArg
().
name
.
c_str
(),
node_id
());
auto
*
io_copy_output_arg
=
graph
->
NewArgumentNode
(
io_copy_output_name
);
auto
*
io_copy_output_arg
=
graph
->
NewArgumentNode
(
io_copy_output_name
);
auto
*
io_copy_inst
=
graph
->
NewInstructNode
();
auto
*
io_copy_inst
=
graph
->
NewInstructNode
();
...
...
paddle/fluid/lite/core/op_lite.cc
浏览文件 @
3a425e7e
...
@@ -30,6 +30,8 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
...
@@ -30,6 +30,8 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
auto
pick_kernel
=
[
&
](
const
Place
&
place
)
{
auto
pick_kernel
=
[
&
](
const
Place
&
place
)
{
auto
ks
=
KernelRegistry
::
Global
().
Create
(
op_type_
,
place
.
target
,
auto
ks
=
KernelRegistry
::
Global
().
Create
(
op_type_
,
place
.
target
,
place
.
precision
,
place
.
layout
);
place
.
precision
,
place
.
layout
);
VLOG
(
5
)
<<
"pick kernel for "
<<
op_info
()
->
Type
()
<<
" "
<<
place
<<
" get "
<<
ks
.
size
()
<<
" kernels"
;
for
(
auto
&&
it
:
ks
)
{
for
(
auto
&&
it
:
ks
)
{
AttachKernel
(
it
.
get
());
AttachKernel
(
it
.
get
());
kernels
.
emplace_back
(
std
::
move
(
it
));
kernels
.
emplace_back
(
std
::
move
(
it
));
...
...
paddle/fluid/lite/core/op_registry.cc
浏览文件 @
3a425e7e
...
@@ -62,6 +62,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
...
@@ -62,6 +62,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
case
TARGET
(
kARM
):
{
case
TARGET
(
kARM
):
{
CREATE_KERNEL
(
kARM
);
CREATE_KERNEL
(
kARM
);
}
break
;
}
break
;
case
TARGET
(
kOpenCL
):
{
CREATE_KERNEL
(
kOpenCL
);
}
break
;
default:
default:
CHECK
(
false
)
<<
"not supported kernel target "
<<
TargetToStr
(
target
);
CHECK
(
false
)
<<
"not supported kernel target "
<<
TargetToStr
(
target
);
}
}
...
@@ -99,6 +102,10 @@ KernelRegistry::KernelRegistry()
...
@@ -99,6 +102,10 @@ KernelRegistry::KernelRegistry()
INIT_FOR
(
kARM
,
kInt8
,
kNCHW
);
INIT_FOR
(
kARM
,
kInt8
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
INIT_FOR
(
kOpenCL
,
kFloat
,
kNCHW
);
INIT_FOR
(
kOpenCL
,
kAny
,
kNCHW
);
INIT_FOR
(
kOpenCL
,
kAny
,
kAny
);
#undef INIT_FOR
#undef INIT_FOR
}
}
...
...
paddle/fluid/lite/core/op_registry.h
浏览文件 @
3a425e7e
...
@@ -82,6 +82,10 @@ class KernelRegistry final {
...
@@ -82,6 +82,10 @@ class KernelRegistry final {
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kFloat
),
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kOpenCL
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
*
//
DATALAYOUT
(
kNCHW
)
>
*
//
>
;
>
;
...
...
paddle/fluid/lite/core/optimizer.h
浏览文件 @
3a425e7e
...
@@ -50,13 +50,22 @@ class Optimizer {
...
@@ -50,13 +50,22 @@ class Optimizer {
if
(
passes
.
empty
())
{
if
(
passes
.
empty
())
{
RunPasses
(
std
::
vector
<
std
::
string
>
{{
RunPasses
(
std
::
vector
<
std
::
string
>
{{
"lite_quant_dequant_fuse_pass"
,
//
"lite_quant_dequant_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
// This pass is disabled to force some opencl kernels selected for final
// running, otherwise, they will be fused to ARM fusion kernels, and the OpenCL
// devices will be discarded.
// TODO(Superjomn) Refine the fusion related design to select fusion kernels for
// devices automatically.
#ifndef LITE_WITH_OPENCL
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
"lite_fc_fuse_pass"
,
//
#endif
"identity_scale_eliminate_pass"
,
//
"lite_fc_fuse_pass"
,
//
"identity_scale_eliminate_pass"
,
//
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_OPENCL
"lite_elementwise_add_activation_fuse_pass"
,
//
"lite_elementwise_add_activation_fuse_pass"
,
//
#endif
#endif
#endif
"static_kernel_pick_pass"
,
//
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"variable_place_inference_pass"
,
//
...
...
paddle/fluid/lite/core/program.h
浏览文件 @
3a425e7e
...
@@ -140,7 +140,8 @@ class RuntimeProgram {
...
@@ -140,7 +140,8 @@ class RuntimeProgram {
void
Run
()
{
void
Run
()
{
for
(
auto
&
inst
:
instructions_
)
{
for
(
auto
&
inst
:
instructions_
)
{
VLOG
(
3
)
<<
">> Running kernel: "
<<
inst
.
op
()
->
op_info
()
->
Repr
();
VLOG
(
4
)
<<
">> Running kernel: "
<<
inst
.
op
()
->
op_info
()
->
Repr
()
<<
" on Target "
<<
TargetToStr
(
inst
.
kernel
()
->
target
());
inst
.
Run
();
inst
.
Run
();
}
}
}
}
...
...
paddle/fluid/lite/core/target_wrapper.h
浏览文件 @
3a425e7e
...
@@ -31,6 +31,7 @@ enum class TargetType : int {
...
@@ -31,6 +31,7 @@ enum class TargetType : int {
kX86
,
kX86
,
kCUDA
,
kCUDA
,
kARM
,
kARM
,
kOpenCL
,
kAny
,
// any target
kAny
,
// any target
NUM
,
// number of fields.
NUM
,
// number of fields.
};
};
...
@@ -69,8 +70,8 @@ static size_t PrecisionTypeLength(PrecisionType type) {
...
@@ -69,8 +70,8 @@ static size_t PrecisionTypeLength(PrecisionType type) {
#define DATALAYOUT(item__) paddle::lite::DataLayoutType::item__
#define DATALAYOUT(item__) paddle::lite::DataLayoutType::item__
static
const
std
::
string
&
TargetToStr
(
TargetType
target
)
{
static
const
std
::
string
&
TargetToStr
(
TargetType
target
)
{
static
const
std
::
string
target2string
[]
=
{
"unk"
,
"host"
,
"x86
"
,
static
const
std
::
string
target2string
[]
=
{
"unk"
,
"host"
,
"x86"
,
"cuda
"
,
"
cuda"
,
"arm"
,
"any"
};
"
arm"
,
"opencl"
,
"any"
};
auto
x
=
static_cast
<
int
>
(
target
);
auto
x
=
static_cast
<
int
>
(
target
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
return
target2string
[
x
];
return
target2string
[
x
];
...
@@ -92,8 +93,8 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
...
@@ -92,8 +93,8 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
}
}
static
const
std
::
string
&
TargetRepr
(
TargetType
target
)
{
static
const
std
::
string
&
TargetRepr
(
TargetType
target
)
{
static
const
std
::
string
target2string
[]
=
{
"kUnk"
,
"kHost"
,
"kX86"
,
"kCUDA"
,
static
const
std
::
string
target2string
[]
=
{
"kAny"
};
"kUnk"
,
"kHost"
,
"kX86"
,
"kCUDA"
,
"kARM"
,
"kOpenCL"
,
"kAny"
};
auto
x
=
static_cast
<
int
>
(
target
);
auto
x
=
static_cast
<
int
>
(
target
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
return
target2string
[
x
];
return
target2string
[
x
];
...
...
paddle/fluid/lite/kernels/CMakeLists.txt
浏览文件 @
3a425e7e
...
@@ -4,5 +4,5 @@ add_subdirectory(host)
...
@@ -4,5 +4,5 @@ add_subdirectory(host)
add_subdirectory
(
arm
)
add_subdirectory
(
arm
)
add_subdirectory
(
cuda
)
add_subdirectory
(
cuda
)
add_subdirectory
(
x86
)
add_subdirectory
(
x86
)
add_subdirectory
(
opencl
)
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
0 → 100644
浏览文件 @
3a425e7e
if
(
NOT LITE_WITH_OPENCL
)
return
()
endif
()
set
(
cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper
)
cc_library
(
elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS
${
cl_kernel_deps
}
)
lite_cc_test
(
test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl
op_registry_lite program_lite
context_lite
)
set
(
opencl_kernels
elementwise_add_opencl
CACHE INTERNAL
""
)
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
0 → 100644
浏览文件 @
3a425e7e
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/operators/op_params.h"
// NOTE ugly here, hide these.
#include "paddle/fluid/lite/opencl/cl_caller.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
opencl
{
class
ElementwiseAddCompute
:
public
KernelLite
<
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
OpenClContext
>
();
CHECK
(
context
.
cl_context
());
elementwise_add
(
context
.
cl_context
(),
static_cast
<
const
float
*>
(
param
.
X
->
raw_data
()),
param
.
X
->
dims
(),
static_cast
<
const
float
*>
(
param
.
Y
->
raw_data
()),
param
.
Y
->
dims
(),
param
.
Out
->
mutable_data
<
float
>
(),
param
.
Out
->
dims
());
}
};
}
// namespace opencl
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
opencl
::
ElementwiseAddCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
0 → 100644
浏览文件 @
3a425e7e
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
TEST
(
elementwise_add
,
init
)
{
LOG
(
INFO
)
<<
"to get kernel ..."
;
auto
kernels
=
KernelRegistry
::
Global
().
Create
(
"elementwise_add"
,
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
));
ASSERT_FALSE
(
kernels
.
empty
());
auto
kernel
=
std
::
move
(
kernels
.
front
());
LOG
(
INFO
)
<<
"get kernel"
;
lite
::
Tensor
X
,
Y
,
Out
;
operators
::
ElementwiseParam
param
;
param
.
X
=
&
X
;
param
.
Y
=
&
Y
;
param
.
Out
=
&
Out
;
std
::
unique_ptr
<
KernelContext
>
context
(
new
KernelContext
);
context
->
As
<
OpenClContext
>
().
InitOnce
();
kernel
->
SetParam
(
param
);
kernel
->
SetContext
(
std
::
move
(
context
));
X
.
Resize
({
1
,
10
});
Y
.
Resize
({
1
,
10
});
Out
.
Resize
({
1
,
10
});
auto
*
x_data
=
X
.
mutable_data
<
float
>
();
auto
*
y_data
=
Y
.
mutable_data
<
float
>
();
auto
*
out_data
=
Out
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
x_data
[
i
]
=
1.1
*
i
;
y_data
[
i
]
=
2.3
*
i
;
}
kernel
->
Launch
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
3.4
*
i
,
1e-1
);
}
}
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/use_kernels.h
浏览文件 @
3a425e7e
...
@@ -61,3 +61,6 @@ USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
...
@@ -61,3 +61,6 @@ USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
#endif
#endif
#ifdef LITE_WITH_OPENCL
USE_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
#endif
paddle/fluid/lite/opencl/CMakeLists.txt
浏览文件 @
3a425e7e
...
@@ -2,18 +2,16 @@ if (NOT LITE_WITH_OPENCL)
...
@@ -2,18 +2,16 @@ if (NOT LITE_WITH_OPENCL)
return
()
return
()
endif
()
endif
()
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
add_dependencies
(
cl_tool opencl_clhpp
)
add_dependencies
(
cl_tool opencl_clhpp
)
endif
()
paddle/fluid/lite/opencl/cl_caller.cc
浏览文件 @
3a425e7e
...
@@ -49,12 +49,12 @@ bool InitOpenCLEngine(std::string cl_path) {
...
@@ -49,12 +49,12 @@ bool InitOpenCLEngine(std::string cl_path) {
return
engine
->
IsInitSuccess
();
return
engine
->
IsInitSuccess
();
}
}
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
void
elementwise_add
(
CLContext
*
context
,
const
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
CLHelper
helper
(
context
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
.
KernelAt
(
0
);
auto
kernel
=
helper
.
GetKernel
(
0
);
CLImage
in_image
;
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
...
...
paddle/fluid/lite/opencl/cl_caller.h
浏览文件 @
3a425e7e
...
@@ -22,8 +22,13 @@ namespace paddle {
...
@@ -22,8 +22,13 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
/// An elementwise_add method to embed OpenCL logic inside, it is used as a
/// black box so that the framework can remain simple.
/// NOTE Currently, these methods are quite expensive, we will optimize them
/// latter.
void
elementwise_add
(
CLContext
*
context
,
const
float
*
in
,
const
DDim
&
in_dim
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_helper.cc
浏览文件 @
3a425e7e
...
@@ -29,17 +29,18 @@ void CLHelper::AddKernel(const std::string &kernel_name,
...
@@ -29,17 +29,18 @@ void CLHelper::AddKernel(const std::string &kernel_name,
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
kernels_
.
emplace_back
(
std
::
move
(
kernel
));
kernel_offset_
[
kernel_name
]
=
kernels_
.
size
()
-
1
;
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
}
}
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
cl
::
Kernel
&
CLHelper
::
GetKernel
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
_
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
.
size
())
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
_
.
size
())
<<
"The index must be less than the size of kernels."
;
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels
[
index
]
!=
nullptr
)
CHECK
(
kernels
_
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels
[
index
]);
return
*
(
kernels
_
[
index
]);
}
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
...
...
paddle/fluid/lite/opencl/cl_helper.h
浏览文件 @
3a425e7e
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <map>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -35,7 +36,12 @@ class CLHelper {
...
@@ -35,7 +36,12 @@ class CLHelper {
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
const
std
::
string
&
options
=
""
);
cl
::
Kernel
&
KernelAt
(
const
int
index
);
cl
::
Kernel
&
GetKernel
(
const
int
index
);
cl
::
Kernel
&
GetKernel
(
const
std
::
string
&
name
)
{
auto
it
=
kernel_offset_
.
find
(
name
);
CHECK
(
it
!=
kernel_offset_
.
end
());
return
GetKernel
(
it
->
second
);
}
cl
::
CommandQueue
&
OpenCLCommandQueue
();
cl
::
CommandQueue
&
OpenCLCommandQueue
();
...
@@ -45,7 +51,8 @@ class CLHelper {
...
@@ -45,7 +51,8 @@ class CLHelper {
private:
private:
CLContext
*
context_
{
nullptr
};
CLContext
*
context_
{
nullptr
};
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
std
::
map
<
std
::
string
,
int
>
kernel_offset_
;
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels_
;
};
};
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_image.cc
浏览文件 @
3a425e7e
...
@@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
...
@@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
return
os
;
return
os
;
}
}
void
CLImage
::
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
void
CLImage
::
set_tensor_data
(
const
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
auto
numel
=
dim
.
product
();
#else
#else
...
...
paddle/fluid/lite/opencl/cl_image.h
浏览文件 @
3a425e7e
...
@@ -33,7 +33,7 @@ class CLImage {
...
@@ -33,7 +33,7 @@ class CLImage {
/*
/*
* Will not hold input tensor data, memcpy in this method.
* Will not hold input tensor data, memcpy in this method.
* */
* */
void
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
);
void
set_tensor_data
(
const
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
bool
IsInit
()
{
return
initialized_
;
}
/*
/*
...
...
paddle/fluid/lite/opencl/cl_test.cc
浏览文件 @
3a425e7e
...
@@ -65,7 +65,7 @@ TEST(cl_test, kernel_test) {
...
@@ -65,7 +65,7 @@ TEST(cl_test, kernel_test) {
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
KernelAt
(
2
);
auto
kernel
=
helper
->
GetKernel
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录