Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0f03e23b
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0f03e23b
编写于
6月 24, 2019
作者:
C
Chunwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
framework support cl
上级
17ef40f3
变更
32
显示空白变更内容
内联
并排
Showing
32 changed file
with
340 addition
and
82 deletion
+340
-82
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+3
-1
paddle/fluid/inference/analysis/dot.h
paddle/fluid/inference/analysis/dot.h
+5
-3
paddle/fluid/lite/CMakeLists.txt
paddle/fluid/lite/CMakeLists.txt
+7
-1
paddle/fluid/lite/api/CMakeLists.txt
paddle/fluid/lite/api/CMakeLists.txt
+5
-4
paddle/fluid/lite/api/cxx_api.cc
paddle/fluid/lite/api/cxx_api.cc
+0
-3
paddle/fluid/lite/api/mobilenetv1_test.cc
paddle/fluid/lite/api/mobilenetv1_test.cc
+23
-7
paddle/fluid/lite/core/context.cc
paddle/fluid/lite/core/context.cc
+4
-0
paddle/fluid/lite/core/context.h
paddle/fluid/lite/core/context.h
+61
-8
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc
+11
-10
paddle/fluid/lite/core/mir/graph_visualize_pass.cc
paddle/fluid/lite/core/mir/graph_visualize_pass.cc
+2
-1
paddle/fluid/lite/core/mir/pattern_matcher.cc
paddle/fluid/lite/core/mir/pattern_matcher.cc
+1
-1
paddle/fluid/lite/core/mir/pattern_matcher.h
paddle/fluid/lite/core/mir/pattern_matcher.h
+2
-1
paddle/fluid/lite/core/mir/type_target_transform_pass.cc
paddle/fluid/lite/core/mir/type_target_transform_pass.cc
+2
-1
paddle/fluid/lite/core/op_lite.cc
paddle/fluid/lite/core/op_lite.cc
+2
-0
paddle/fluid/lite/core/op_registry.cc
paddle/fluid/lite/core/op_registry.cc
+7
-0
paddle/fluid/lite/core/op_registry.h
paddle/fluid/lite/core/op_registry.h
+4
-0
paddle/fluid/lite/core/optimizer.h
paddle/fluid/lite/core/optimizer.h
+13
-4
paddle/fluid/lite/core/program.h
paddle/fluid/lite/core/program.h
+2
-1
paddle/fluid/lite/core/target_wrapper.h
paddle/fluid/lite/core/target_wrapper.h
+5
-4
paddle/fluid/lite/kernels/CMakeLists.txt
paddle/fluid/lite/kernels/CMakeLists.txt
+1
-1
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
+16
-0
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
+53
-0
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
...fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
+66
-0
paddle/fluid/lite/kernels/use_kernels.h
paddle/fluid/lite/kernels/use_kernels.h
+3
-0
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+13
-15
paddle/fluid/lite/opencl/cl_caller.cc
paddle/fluid/lite/opencl/cl_caller.cc
+3
-3
paddle/fluid/lite/opencl/cl_caller.h
paddle/fluid/lite/opencl/cl_caller.h
+7
-2
paddle/fluid/lite/opencl/cl_helper.cc
paddle/fluid/lite/opencl/cl_helper.cc
+7
-6
paddle/fluid/lite/opencl/cl_helper.h
paddle/fluid/lite/opencl/cl_helper.h
+9
-2
paddle/fluid/lite/opencl/cl_image.cc
paddle/fluid/lite/opencl/cl_image.cc
+1
-1
paddle/fluid/lite/opencl/cl_image.h
paddle/fluid/lite/opencl/cl_image.h
+1
-1
paddle/fluid/lite/opencl/cl_test.cc
paddle/fluid/lite/opencl/cl_test.cc
+1
-1
未找到文件。
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
0f03e23b
...
@@ -18,6 +18,7 @@ limitations under the License. */
...
@@ -18,6 +18,7 @@ limitations under the License. */
#include <unordered_set>
#include <unordered_set>
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/lite/utils/string.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
paddle
{
...
@@ -84,7 +85,8 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
...
@@ -84,7 +85,8 @@ void GraphVizPass::ApplyImpl(ir::Graph* graph) const {
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
);
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
);
// Create nodes
// Create nodes
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
for
(
const
Node
*
n
:
graph
->
Nodes
())
{
std
::
string
node_id
=
FormatName
(
n
)
+
"("
+
std
::
to_string
(
n
->
id
())
+
")"
;
std
::
string
node_id
=
lite
::
string_format
(
"%s(%d)"
,
FormatName
(
n
).
c_str
(),
n
->
id
());
if
(
n
->
IsOp
())
{
if
(
n
->
IsOp
())
{
decltype
(
op_attrs
)
attr
=
decltype
(
op_attrs
)
attr
=
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
...
...
paddle/fluid/inference/analysis/dot.h
浏览文件 @
0f03e23b
...
@@ -58,9 +58,11 @@ class Dot {
...
@@ -58,9 +58,11 @@ class Dot {
std
::
vector
<
Attr
>
attrs
;
std
::
vector
<
Attr
>
attrs
;
Node
(
const
std
::
string
&
name
,
const
std
::
vector
<
Attr
>&
attrs
)
Node
(
const
std
::
string
&
name
,
const
std
::
vector
<
Attr
>&
attrs
)
:
name
(
name
),
:
name
(
name
),
attrs
(
attrs
)
{
attrs
(
attrs
),
std
::
stringstream
ss
;
id_
(
"node_"
+
std
::
to_string
(
dot_node_counter
++
))
{}
ss
<<
"node_"
<<
dot_node_counter
++
;
id_
=
ss
.
str
();
}
std
::
string
id
()
const
{
return
id_
;
}
std
::
string
id
()
const
{
return
id_
;
}
...
...
paddle/fluid/lite/CMakeLists.txt
浏览文件 @
0f03e23b
...
@@ -37,7 +37,7 @@ endfunction()
...
@@ -37,7 +37,7 @@ endfunction()
function
(
lite_deps TARGET
)
function
(
lite_deps TARGET
)
set
(
options
""
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS ARGS
)
set
(
multiValueArgs DEPS X86_DEPS CUDA_DEPS ARM_DEPS PROFILE_DEPS LIGHT_DEPS HVY_DEPS
CL_DEPS
ARGS
)
cmake_parse_arguments
(
lite_deps
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
lite_deps
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
set
(
deps
${
lite_deps_DEPS
}
)
set
(
deps
${
lite_deps_DEPS
}
)
...
@@ -78,6 +78,12 @@ function (lite_deps TARGET)
...
@@ -78,6 +78,12 @@ function (lite_deps TARGET)
endforeach
(
var
)
endforeach
(
var
)
endif
()
endif
()
if
(
LITE_WITH_OPENCL
)
foreach
(
var
${
lite_deps_CL_DEPS
}
)
set
(
deps
${
deps
}
${
var
}
)
endforeach
(
var
)
endif
()
set
(
${
TARGET
}
${
deps
}
PARENT_SCOPE
)
set
(
${
TARGET
}
${
deps
}
PARENT_SCOPE
)
endfunction
()
endfunction
()
...
...
paddle/fluid/lite/api/CMakeLists.txt
浏览文件 @
0f03e23b
...
@@ -53,6 +53,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
...
@@ -53,6 +53,7 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING)
lite_cc_test
(
test_mobilenetv1_lite SRCS mobilenetv1_test.cc
lite_cc_test
(
test_mobilenetv1_lite SRCS mobilenetv1_test.cc
DEPS
${
lite_model_test_DEPS
}
DEPS
${
lite_model_test_DEPS
}
CL_DEPS
${
opencl_kernels
}
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/mobilenet_v1 SERIAL
)
ARGS --model_dir=
${
LITE_MODEL_DIR
}
/mobilenet_v1 SERIAL
)
add_dependencies
(
test_mobilenetv1_lite extern_lite_download_mobilenet_v1_tar_gz
)
add_dependencies
(
test_mobilenetv1_lite extern_lite_download_mobilenet_v1_tar_gz
)
...
...
paddle/fluid/lite/api/cxx_api.cc
浏览文件 @
0f03e23b
...
@@ -23,10 +23,7 @@ namespace paddle {
...
@@ -23,10 +23,7 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
)
{
void
Predictor
::
SaveModel
(
const
std
::
string
&
dir
)
{
#ifndef LITE_WITH_ARM
MkDirRecur
(
dir
);
MkDirRecur
(
dir
);
#else
#endif
program_
->
PersistModel
(
dir
,
program_desc_
);
program_
->
PersistModel
(
dir
,
program_desc_
);
LOG
(
INFO
)
<<
"Save model to "
<<
dir
;
LOG
(
INFO
)
<<
"Save model to "
<<
dir
;
}
}
...
...
paddle/fluid/lite/api/mobilenetv1_test.cc
浏览文件 @
0f03e23b
...
@@ -25,16 +25,13 @@
...
@@ -25,16 +25,13 @@
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
#ifdef LITE_WITH_ARM
void
TestModel
(
const
std
::
vector
<
Place
>&
valid_places
,
TEST
(
MobileNetV1
,
test
)
{
const
Place
&
preferred_place
)
{
DeviceInfo
::
Init
();
DeviceInfo
::
Init
();
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
DeviceInfo
::
Global
().
SetRunMode
(
LITE_POWER_HIGH
,
FLAGS_threads
);
lite
::
Predictor
predictor
;
lite
::
Predictor
predictor
;
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)}});
predictor
.
Build
(
FLAGS_model_dir
,
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
predictor
.
Build
(
FLAGS_model_dir
,
preferred_place
,
valid_places
);
valid_places
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
auto
*
input_tensor
=
predictor
.
GetInput
(
0
);
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
input_tensor
->
Resize
(
DDim
(
std
::
vector
<
DDim
::
value_type
>
({
1
,
3
,
224
,
224
})));
...
@@ -70,7 +67,26 @@ TEST(MobileNetV1, test) {
...
@@ -70,7 +67,26 @@ TEST(MobileNetV1, test) {
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
ASSERT_EQ
(
out
->
dims
()[
0
],
1
);
ASSERT_EQ
(
out
->
dims
()[
1
],
1000
);
ASSERT_EQ
(
out
->
dims
()[
1
],
1000
);
}
}
#endif
TEST
(
MobileNetV1
,
test_arm
)
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
// Place{TARGET(kOpenCL), PRECISION(kFloat)},
});
TestModel
(
valid_places
,
Place
({
TARGET
(
kARM
),
PRECISION
(
kFloat
)}));
}
TEST
(
MobileNetV1
,
test_opencl
)
{
std
::
vector
<
Place
>
valid_places
({
Place
{
TARGET
(
kHost
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kARM
),
PRECISION
(
kFloat
)},
Place
{
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
)},
});
TestModel
(
valid_places
,
Place
({
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
)}));
}
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/core/context.cc
浏览文件 @
0f03e23b
...
@@ -14,6 +14,10 @@
...
@@ -14,6 +14,10 @@
#include "paddle/fluid/lite/core/context.h"
#include "paddle/fluid/lite/core/context.h"
#ifdef LITE_WITH_OPENCL
DEFINE_string
(
cl_path
,
"/data/local/tmp/opencl"
,
"The OpenCL kernels path."
);
#endif
namespace
paddle
{
namespace
paddle
{
namespace
lite
{}
// namespace lite
namespace
lite
{}
// namespace lite
}
// namespace paddle
}
// namespace paddle
paddle/fluid/lite/core/context.h
浏览文件 @
0f03e23b
...
@@ -23,6 +23,11 @@
...
@@ -23,6 +23,11 @@
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
#endif
#endif
#ifdef LITE_WITH_OPENCL
#include "paddle/fluid/lite/opencl/cl_context.h"
#include "paddle/fluid/lite/opencl/cl_engine.h"
#include "paddle/fluid/lite/opencl/cl_helper.h"
#endif
#include <map>
#include <map>
#include <memory>
#include <memory>
#include <set>
#include <set>
...
@@ -34,6 +39,10 @@
...
@@ -34,6 +39,10 @@
#include "paddle/fluid/lite/core/target_wrapper.h"
#include "paddle/fluid/lite/core/target_wrapper.h"
#include "paddle/fluid/lite/utils/all.h"
#include "paddle/fluid/lite/utils/all.h"
#ifdef LITE_WITH_OPENCL
DECLARE_string
(
cl_path
);
#endif
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -44,6 +53,7 @@ using HostContext = Context<TargetType::kHost>;
...
@@ -44,6 +53,7 @@ using HostContext = Context<TargetType::kHost>;
using
X86Context
=
Context
<
TargetType
::
kX86
>
;
using
X86Context
=
Context
<
TargetType
::
kX86
>
;
using
CUDAContext
=
Context
<
TargetType
::
kCUDA
>
;
using
CUDAContext
=
Context
<
TargetType
::
kCUDA
>
;
using
ARMContext
=
Context
<
TargetType
::
kARM
>
;
using
ARMContext
=
Context
<
TargetType
::
kARM
>
;
using
OpenClContext
=
Context
<
TargetType
::
kOpenCL
>
;
template
<
>
template
<
>
class
Context
<
TargetType
::
kHost
>
{
class
Context
<
TargetType
::
kHost
>
{
...
@@ -51,7 +61,7 @@ class Context<TargetType::kHost> {
...
@@ -51,7 +61,7 @@ class Context<TargetType::kHost> {
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{}
void
CopyShared
(
const
HostContext
*
ctx
)
{}
void
CopyShared
To
(
const
HostContext
*
ctx
)
{}
std
::
string
name
()
const
{
return
"HostContext"
;
}
std
::
string
name
()
const
{
return
"HostContext"
;
}
};
};
...
@@ -69,7 +79,7 @@ class Context<TargetType::kARM> {
...
@@ -69,7 +79,7 @@ class Context<TargetType::kARM> {
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
InitOnce
()
{
DeviceInfo
::
Init
();
}
void
CopyShared
(
const
ARMContext
*
ctx
)
{}
void
CopyShared
To
(
const
ARMContext
*
ctx
)
{}
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
void
SetRunMode
(
PowerMode
mode
,
int
threads
)
{
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
return
DeviceInfo
::
Global
().
SetRunMode
(
mode
,
threads
);
...
@@ -109,7 +119,7 @@ class Context<TargetType::kCUDA> {
...
@@ -109,7 +119,7 @@ class Context<TargetType::kCUDA> {
cublas_fp32_
=
std
::
make_shared
<
lite
::
cuda
::
Blas
<
float
>>
();
cublas_fp32_
=
std
::
make_shared
<
lite
::
cuda
::
Blas
<
float
>>
();
}
}
void
CopyShared
(
const
CUDAContext
*
ctx
)
{
void
CopyShared
To
(
const
CUDAContext
*
ctx
)
{
CHECK
(
ctx
);
CHECK
(
ctx
);
CHECK
(
cublas_fp32_
)
<<
"cublas_fp32 should be set first"
;
CHECK
(
cublas_fp32_
)
<<
"cublas_fp32 should be set first"
;
ctx
->
cublas_fp32_
=
cublas_fp32_
;
ctx
->
cublas_fp32_
=
cublas_fp32_
;
...
@@ -175,7 +185,7 @@ class Context<TargetType::kX86> {
...
@@ -175,7 +185,7 @@ class Context<TargetType::kX86> {
// NOTE: InitOnce should only be used by ContextScheduler
// NOTE: InitOnce should only be used by ContextScheduler
void
InitOnce
()
{}
void
InitOnce
()
{}
void
CopyShared
(
const
X86Context
*
ctx
)
{}
void
CopyShared
To
(
const
X86Context
*
ctx
)
{}
const
device_ctx_t
*
x86_device_context
()
{
return
x86_device_context_
.
get
();
}
const
device_ctx_t
*
x86_device_context
()
{
return
x86_device_context_
.
get
();
}
void
SetX86DeviceContext
(
std
::
unique_ptr
<
device_ctx_t
>&&
ctx
)
{
void
SetX86DeviceContext
(
std
::
unique_ptr
<
device_ctx_t
>&&
ctx
)
{
...
@@ -202,6 +212,40 @@ class Context<TargetType::kX86> {
...
@@ -202,6 +212,40 @@ class Context<TargetType::kX86> {
};
};
#endif
#endif
#ifdef LITE_WITH_OPENCL
template
<
>
class
Context
<
TargetType
::
kOpenCL
>
{
mutable
std
::
shared_ptr
<
CLContext
>
cl_context_
;
mutable
std
::
shared_ptr
<
CLHelper
>
cl_helper_
;
public:
CLContext
*
cl_context
()
{
return
cl_context_
.
get
();
}
CLHelper
*
cl_helper
()
{
return
cl_helper_
.
get
();
}
void
InitOnce
()
{
// Init cl engine.
CHECK
(
CLEngine
::
Global
()
->
IsInitSuccess
())
<<
"OpenCL engine init failed"
;
CLEngine
::
Global
()
->
set_cl_path
(
FLAGS_cl_path
);
cl_context_
=
std
::
make_shared
<
CLContext
>
();
cl_helper_
=
std
::
make_shared
<
CLHelper
>
();
cl_helper_
->
set_context
(
cl_context_
.
get
());
PrepareKernels
();
}
void
CopySharedTo
(
const
OpenClContext
*
ctx
)
{
ctx
->
cl_context_
=
cl_context_
;
}
private:
void
PrepareKernels
()
{
cl_helper_
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
cl_helper_
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
}
};
#endif
// Context for running a kernel.
// Context for running a kernel.
// Holds the necessary resource and information.
// Holds the necessary resource and information.
class
KernelContext
{
class
KernelContext
{
...
@@ -230,26 +274,32 @@ class ContextScheduler {
...
@@ -230,26 +274,32 @@ class ContextScheduler {
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
std
::
unique_ptr
<
KernelContext
>
ctx
(
new
KernelContext
);
switch
(
target
)
{
switch
(
target
)
{
case
TARGET
(
kHost
):
case
TARGET
(
kHost
):
kernel_contexts_
[
TargetType
::
kHost
].
As
<
HostContext
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kHost
].
As
<
HostContext
>
().
CopyShared
To
(
&
ctx
->
As
<
HostContext
>
());
&
ctx
->
As
<
HostContext
>
());
break
;
break
;
#ifdef LITE_WITH_X86
#ifdef LITE_WITH_X86
case
TARGET
(
kX86
):
case
TARGET
(
kX86
):
kernel_contexts_
[
TargetType
::
kX86
].
As
<
X86Context
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kX86
].
As
<
X86Context
>
().
CopyShared
To
(
&
ctx
->
As
<
X86Context
>
());
&
ctx
->
As
<
X86Context
>
());
break
;
break
;
#endif
#endif
#ifdef LITE_WITH_CUDA
#ifdef LITE_WITH_CUDA
case
TARGET
(
kCUDA
):
case
TARGET
(
kCUDA
):
kernel_contexts_
[
TargetType
::
kCUDA
].
As
<
CUDAContext
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kCUDA
].
As
<
CUDAContext
>
().
CopyShared
To
(
&
ctx
->
As
<
CUDAContext
>
());
&
ctx
->
As
<
CUDAContext
>
());
break
;
break
;
#endif
#endif
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
case
TARGET
(
kARM
):
case
TARGET
(
kARM
):
kernel_contexts_
[
TargetType
::
kARM
].
As
<
ARMContext
>
().
CopyShared
(
kernel_contexts_
[
TargetType
::
kARM
].
As
<
ARMContext
>
().
CopyShared
To
(
&
ctx
->
As
<
ARMContext
>
());
&
ctx
->
As
<
ARMContext
>
());
break
;
break
;
#endif
#ifdef LITE_WITH_OPENCL
case
TARGET
(
kOpenCL
):
kernel_contexts_
[
TargetType
::
kOpenCL
].
As
<
OpenClContext
>
().
CopySharedTo
(
&
ctx
->
As
<
OpenClContext
>
());
break
;
#endif
#endif
default:
default:
LOG
(
FATAL
)
<<
"unsupported target "
<<
TargetToStr
(
target
);
LOG
(
FATAL
)
<<
"unsupported target "
<<
TargetToStr
(
target
);
...
@@ -273,6 +323,9 @@ class ContextScheduler {
...
@@ -273,6 +323,9 @@ class ContextScheduler {
#endif
#endif
#ifdef LITE_WITH_ARM
#ifdef LITE_WITH_ARM
InitContext
<
TargetType
::
kARM
,
ARMContext
>
();
InitContext
<
TargetType
::
kARM
,
ARMContext
>
();
#endif
#ifdef LITE_WITH_OPENCL
InitContext
<
TargetType
::
kOpenCL
,
OpenClContext
>
();
#endif
#endif
}
}
...
...
paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.cc
浏览文件 @
0f03e23b
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h"
#include "paddle/fluid/lite/core/mir/fusion/quant_dequant_op_fuser.h"
#include <memory>
#include <memory>
#include <vector>
#include <vector>
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -57,24 +58,24 @@ void QuantDequantOpFuser::BuildPattern() {
...
@@ -57,24 +58,24 @@ void QuantDequantOpFuser::BuildPattern() {
->
AsIntermediate
();
->
AsIntermediate
();
std
::
vector
<
PMNode
*>
nodes
;
std
::
vector
<
PMNode
*>
nodes
;
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
nodes
.
push_back
(
VarNode
(
"quantized_op_weight"
+
std
::
to_string
(
i
))
nodes
.
push_back
(
VarNode
(
string_format
(
"quantized_op_weight%d"
,
i
))
->
assert_is_op_input
(
op_type_
,
weight_name
)
->
assert_is_op_input
(
op_type_
,
weight_name
)
->
AsInput
());
->
AsInput
());
nodes
.
push_back
(
OpNode
(
"quantized_op"
+
std
::
to_string
(
i
),
op_type_
)
nodes
.
push_back
(
OpNode
(
string_format
(
"quantized_op%d"
,
i
),
op_type_
)
->
assert_is_op
(
op_type_
)
->
assert_is_op
(
op_type_
)
->
AsIntermediate
());
->
AsIntermediate
());
nodes
.
push_back
(
VarNode
(
"quantized_op_out"
+
std
::
to_string
(
i
))
nodes
.
push_back
(
VarNode
(
string_format
(
"quantized_op_out%d"
,
i
))
->
assert_is_op_output
(
op_type_
)
->
assert_is_op_output
(
op_type_
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"X"
)
->
assert_is_op_input
(
"fake_dequantize_max_abs"
,
"X"
)
->
AsIntermediate
());
->
AsIntermediate
());
nodes
.
push_back
(
nodes
.
push_back
(
OpNode
(
"dequant_op"
+
std
::
to_string
(
i
),
"fake_dequantize_max_abs"
)
OpNode
(
string_format
(
"dequant_op%d"
,
i
),
"fake_dequantize_max_abs"
)
->
assert_is_op
(
"fake_dequantize_max_abs"
)
->
assert_is_op
(
"fake_dequantize_max_abs"
)
->
AsIntermediate
());
->
AsIntermediate
());
nodes
.
push_back
(
VarNode
(
"dequant_op_out"
+
std
::
to_string
(
i
))
nodes
.
push_back
(
VarNode
(
string_format
(
"dequant_op_out%d"
,
i
))
->
assert_is_op_output
(
"fake_dequantize_max_abs"
,
"Out"
)
->
assert_is_op_output
(
"fake_dequantize_max_abs"
,
"Out"
)
->
AsOutput
());
->
AsOutput
());
}
}
...
@@ -108,11 +109,11 @@ void QuantDequantOpFuser::InsertNewNode(SSAGraph* graph,
...
@@ -108,11 +109,11 @@ void QuantDequantOpFuser::InsertNewNode(SSAGraph* graph,
std
::
vector
<
Node
*>
nodes
;
std
::
vector
<
Node
*>
nodes
;
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
for
(
int
i
=
0
;
i
<
times_
;
i
++
)
{
nodes
.
push_back
(
matched
.
at
(
"quantized_op_weight"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"quantized_op_weight%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"quantized_op"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"quantized_op%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"quantized_op_out"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"quantized_op_out%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"dequant_op"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"dequant_op%d"
,
i
)));
nodes
.
push_back
(
matched
.
at
(
"dequant_op_out"
+
std
::
to_string
(
i
)));
nodes
.
push_back
(
matched
.
at
(
string_format
(
"dequant_op_out%d"
,
i
)));
}
}
int
bit_length
=
quant_op
->
stmt
()
->
op_info
()
->
GetAttr
<
int
>
(
"bit_length"
);
int
bit_length
=
quant_op
->
stmt
()
->
op_info
()
->
GetAttr
<
int
>
(
"bit_length"
);
auto
*
scope
=
quant_op
->
stmt
()
->
op
()
->
scope
();
auto
*
scope
=
quant_op
->
stmt
()
->
op
()
->
scope
();
...
...
paddle/fluid/lite/core/mir/graph_visualize_pass.cc
浏览文件 @
0f03e23b
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
#include <set>
#include <set>
#include <string>
#include <string>
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -39,7 +40,7 @@ std::string Visualize(mir::SSAGraph* graph) {
...
@@ -39,7 +40,7 @@ std::string Visualize(mir::SSAGraph* graph) {
if
(
node
.
IsArg
())
{
if
(
node
.
IsArg
())
{
key
=
node
.
AsArg
().
name
;
key
=
node
.
AsArg
().
name
;
}
else
{
}
else
{
key
=
node
.
AsStmt
().
op_type
()
+
std
::
to_string
(
id
++
);
key
=
string_format
(
"%s%d"
,
node
.
AsStmt
().
op_type
().
c_str
(),
id
++
);
}
}
if
(
node
.
IsStmt
())
{
if
(
node
.
IsStmt
())
{
...
...
paddle/fluid/lite/core/mir/pattern_matcher.cc
浏览文件 @
0f03e23b
...
@@ -325,7 +325,7 @@ std::string PMPattern::DotString() const {
...
@@ -325,7 +325,7 @@ std::string PMPattern::DotString() const {
// Create Nodes
// Create Nodes
std
::
unordered_map
<
PMNode
*
,
std
::
string
>
node2dot
;
std
::
unordered_map
<
PMNode
*
,
std
::
string
>
node2dot
;
for
(
const
auto
&
node
:
nodes
())
{
for
(
const
auto
&
node
:
nodes
())
{
std
::
string
node_id
=
"Node"
+
std
::
to_string
(
id
++
);
std
::
string
node_id
=
string_format
(
"Node%d"
,
id
++
);
dot
.
AddNode
(
node_id
,
{},
node
->
name
());
dot
.
AddNode
(
node_id
,
{},
node
->
name
());
node2dot
[
node
.
get
()]
=
node_id
;
node2dot
[
node
.
get
()]
=
node_id
;
}
}
...
...
paddle/fluid/lite/core/mir/pattern_matcher.h
浏览文件 @
0f03e23b
...
@@ -30,6 +30,7 @@
...
@@ -30,6 +30,7 @@
#include "paddle/fluid/lite/core/mir/node.h"
#include "paddle/fluid/lite/core/mir/node.h"
#include "paddle/fluid/lite/core/mir/ssa_graph.h"
#include "paddle/fluid/lite/core/mir/ssa_graph.h"
#include "paddle/fluid/lite/model_parser/pb/op_desc.h"
#include "paddle/fluid/lite/model_parser/pb/op_desc.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -228,7 +229,7 @@ class PMPattern {
...
@@ -228,7 +229,7 @@ class PMPattern {
FRIEND_TEST
(
PMPattern
,
NewNode
);
FRIEND_TEST
(
PMPattern
,
NewNode
);
#endif
#endif
static
std
::
string
NewID
()
{
return
"pmnode-"
+
std
::
to_string
(
id_
++
);
}
static
std
::
string
NewID
()
{
return
string_format
(
"pmnode-%d"
,
id_
++
);
}
std
::
vector
<
std
::
unique_ptr
<
PMNode
>>
nodes_
;
std
::
vector
<
std
::
unique_ptr
<
PMNode
>>
nodes_
;
std
::
vector
<
edge_t
>
edges_
;
std
::
vector
<
edge_t
>
edges_
;
...
...
paddle/fluid/lite/core/mir/type_target_transform_pass.cc
浏览文件 @
0f03e23b
...
@@ -20,6 +20,7 @@
...
@@ -20,6 +20,7 @@
#include <vector>
#include <vector>
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/core/mir/pass_registry.h"
#include "paddle/fluid/lite/utils/string.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
@@ -80,7 +81,7 @@ void TypeTargetTransformPass::AddIoCopyInst(
...
@@ -80,7 +81,7 @@ void TypeTargetTransformPass::AddIoCopyInst(
CHECK
(
in
->
IsArg
());
CHECK
(
in
->
IsArg
());
auto
node_id
=
[
&
]
{
return
graph
->
nodes
().
size
();
};
auto
node_id
=
[
&
]
{
return
graph
->
nodes
().
size
();
};
auto
io_copy_output_name
=
auto
io_copy_output_name
=
in
->
AsArg
().
name
+
"/trans/"
+
std
::
to_string
(
node_id
());
string_format
(
"%s/trans/%d"
,
in
->
AsArg
().
name
.
c_str
(),
node_id
());
auto
*
io_copy_output_arg
=
graph
->
NewArgumentNode
(
io_copy_output_name
);
auto
*
io_copy_output_arg
=
graph
->
NewArgumentNode
(
io_copy_output_name
);
auto
*
io_copy_inst
=
graph
->
NewInstructNode
();
auto
*
io_copy_inst
=
graph
->
NewInstructNode
();
...
...
paddle/fluid/lite/core/op_lite.cc
浏览文件 @
0f03e23b
...
@@ -30,6 +30,8 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
...
@@ -30,6 +30,8 @@ std::vector<std::unique_ptr<KernelBase>> OpLite::CreateKernels(
auto
pick_kernel
=
[
&
](
const
Place
&
place
)
{
auto
pick_kernel
=
[
&
](
const
Place
&
place
)
{
auto
ks
=
KernelRegistry
::
Global
().
Create
(
op_type_
,
place
.
target
,
auto
ks
=
KernelRegistry
::
Global
().
Create
(
op_type_
,
place
.
target
,
place
.
precision
,
place
.
layout
);
place
.
precision
,
place
.
layout
);
VLOG
(
5
)
<<
"pick kernel for "
<<
op_info
()
->
Type
()
<<
" "
<<
place
<<
" get "
<<
ks
.
size
()
<<
" kernels"
;
for
(
auto
&&
it
:
ks
)
{
for
(
auto
&&
it
:
ks
)
{
AttachKernel
(
it
.
get
());
AttachKernel
(
it
.
get
());
kernels
.
emplace_back
(
std
::
move
(
it
));
kernels
.
emplace_back
(
std
::
move
(
it
));
...
...
paddle/fluid/lite/core/op_registry.cc
浏览文件 @
0f03e23b
...
@@ -62,6 +62,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
...
@@ -62,6 +62,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
case
TARGET
(
kARM
):
{
case
TARGET
(
kARM
):
{
CREATE_KERNEL
(
kARM
);
CREATE_KERNEL
(
kARM
);
}
break
;
}
break
;
case
TARGET
(
kOpenCL
):
{
CREATE_KERNEL
(
kOpenCL
);
}
break
;
default:
default:
CHECK
(
false
)
<<
"not supported kernel target "
<<
TargetToStr
(
target
);
CHECK
(
false
)
<<
"not supported kernel target "
<<
TargetToStr
(
target
);
}
}
...
@@ -99,6 +102,10 @@ KernelRegistry::KernelRegistry()
...
@@ -99,6 +102,10 @@ KernelRegistry::KernelRegistry()
INIT_FOR
(
kARM
,
kInt8
,
kNCHW
);
INIT_FOR
(
kARM
,
kInt8
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kNCHW
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
INIT_FOR
(
kARM
,
kAny
,
kAny
);
INIT_FOR
(
kOpenCL
,
kFloat
,
kNCHW
);
INIT_FOR
(
kOpenCL
,
kAny
,
kNCHW
);
INIT_FOR
(
kOpenCL
,
kAny
,
kAny
);
#undef INIT_FOR
#undef INIT_FOR
}
}
...
...
paddle/fluid/lite/core/op_registry.h
浏览文件 @
0f03e23b
...
@@ -82,6 +82,10 @@ class KernelRegistry final {
...
@@ -82,6 +82,10 @@ class KernelRegistry final {
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kFloat
),
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
KernelRegistryForTarget
<
TARGET
(
kARM
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
*
,
//
KernelRegistryForTarget
<
TARGET
(
kOpenCL
),
PRECISION
(
kInt8
),
DATALAYOUT
(
kNCHW
)
>
*
//
DATALAYOUT
(
kNCHW
)
>
*
//
>
;
>
;
...
...
paddle/fluid/lite/core/optimizer.h
浏览文件 @
0f03e23b
...
@@ -52,11 +52,20 @@ class Optimizer {
...
@@ -52,11 +52,20 @@ class Optimizer {
RunPasses
(
std
::
vector
<
std
::
string
>
{{
RunPasses
(
std
::
vector
<
std
::
string
>
{{
"lite_quant_dequant_fuse_pass"
,
//
"lite_quant_dequant_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
"lite_conv_bn_fuse_pass"
,
//
// This pass is disabled to force some opencl kernels selected for final
// running, otherwise, they will be fused to ARM fusion kernels, and the OpenCL
// devices will be discarded.
// TODO(Superjomn) Refine the fusion related design to select fusion kernels for
// devices automatically.
#ifndef LITE_WITH_OPENCL
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
"lite_conv_elementwise_add_activation_fuse_pass"
,
//
#endif
"lite_fc_fuse_pass"
,
//
"lite_fc_fuse_pass"
,
//
"identity_scale_eliminate_pass"
,
//
"identity_scale_eliminate_pass"
,
//
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifndef LITE_WITH_OPENCL
"lite_elementwise_add_activation_fuse_pass"
,
//
"lite_elementwise_add_activation_fuse_pass"
,
//
#endif
#endif
#endif
"static_kernel_pick_pass"
,
//
"static_kernel_pick_pass"
,
//
"variable_place_inference_pass"
,
//
"variable_place_inference_pass"
,
//
...
...
paddle/fluid/lite/core/program.h
浏览文件 @
0f03e23b
...
@@ -140,7 +140,8 @@ class RuntimeProgram {
...
@@ -140,7 +140,8 @@ class RuntimeProgram {
void
Run
()
{
void
Run
()
{
for
(
auto
&
inst
:
instructions_
)
{
for
(
auto
&
inst
:
instructions_
)
{
VLOG
(
3
)
<<
">> Running kernel: "
<<
inst
.
op
()
->
op_info
()
->
Repr
();
VLOG
(
4
)
<<
">> Running kernel: "
<<
inst
.
op
()
->
op_info
()
->
Repr
()
<<
" on Target "
<<
TargetToStr
(
inst
.
kernel
()
->
target
());
inst
.
Run
();
inst
.
Run
();
}
}
}
}
...
...
paddle/fluid/lite/core/target_wrapper.h
浏览文件 @
0f03e23b
...
@@ -31,6 +31,7 @@ enum class TargetType : int {
...
@@ -31,6 +31,7 @@ enum class TargetType : int {
kX86
,
kX86
,
kCUDA
,
kCUDA
,
kARM
,
kARM
,
kOpenCL
,
kAny
,
// any target
kAny
,
// any target
NUM
,
// number of fields.
NUM
,
// number of fields.
};
};
...
@@ -69,8 +70,8 @@ static size_t PrecisionTypeLength(PrecisionType type) {
...
@@ -69,8 +70,8 @@ static size_t PrecisionTypeLength(PrecisionType type) {
#define DATALAYOUT(item__) paddle::lite::DataLayoutType::item__
#define DATALAYOUT(item__) paddle::lite::DataLayoutType::item__
static
const
std
::
string
&
TargetToStr
(
TargetType
target
)
{
static
const
std
::
string
&
TargetToStr
(
TargetType
target
)
{
static
const
std
::
string
target2string
[]
=
{
"unk"
,
"host"
,
"x86
"
,
static
const
std
::
string
target2string
[]
=
{
"unk"
,
"host"
,
"x86"
,
"cuda
"
,
"
cuda"
,
"arm"
,
"any"
};
"
arm"
,
"opencl"
,
"any"
};
auto
x
=
static_cast
<
int
>
(
target
);
auto
x
=
static_cast
<
int
>
(
target
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
return
target2string
[
x
];
return
target2string
[
x
];
...
@@ -92,8 +93,8 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
...
@@ -92,8 +93,8 @@ static const std::string& DataLayoutToStr(DataLayoutType layout) {
}
}
static
const
std
::
string
&
TargetRepr
(
TargetType
target
)
{
static
const
std
::
string
&
TargetRepr
(
TargetType
target
)
{
static
const
std
::
string
target2string
[]
=
{
"kUnk"
,
"kHost"
,
"kX86"
,
"kCUDA"
,
static
const
std
::
string
target2string
[]
=
{
"kAny"
};
"kUnk"
,
"kHost"
,
"kX86"
,
"kCUDA"
,
"kARM"
,
"kOpenCL"
,
"kAny"
};
auto
x
=
static_cast
<
int
>
(
target
);
auto
x
=
static_cast
<
int
>
(
target
);
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
CHECK_LT
(
x
,
static_cast
<
int
>
(
TARGET
(
NUM
)));
return
target2string
[
x
];
return
target2string
[
x
];
...
...
paddle/fluid/lite/kernels/CMakeLists.txt
浏览文件 @
0f03e23b
...
@@ -4,5 +4,5 @@ add_subdirectory(host)
...
@@ -4,5 +4,5 @@ add_subdirectory(host)
add_subdirectory
(
arm
)
add_subdirectory
(
arm
)
add_subdirectory
(
cuda
)
add_subdirectory
(
cuda
)
add_subdirectory
(
x86
)
add_subdirectory
(
x86
)
add_subdirectory
(
opencl
)
paddle/fluid/lite/kernels/opencl/CMakeLists.txt
0 → 100644
浏览文件 @
0f03e23b
if
(
NOT LITE_WITH_OPENCL
)
return
()
endif
()
set
(
cl_kernel_deps op_params_lite cl_caller cl_engine cl_context cl_wrapper
)
cc_library
(
elementwise_add_opencl SRCS elementwise_add_compute.cc DEPS
${
cl_kernel_deps
}
)
lite_cc_test
(
test_elementwise_add_opencl SRCS elementwise_add_compute_test.cc DEPS elementwise_add_opencl
op_registry_lite program_lite
context_lite
)
set
(
opencl_kernels
elementwise_add_opencl
CACHE INTERNAL
""
)
paddle/fluid/lite/kernels/opencl/elementwise_add_compute.cc
0 → 100644
浏览文件 @
0f03e23b
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/operators/op_params.h"
// NOTE ugly here, hide these.
#include "paddle/fluid/lite/opencl/cl_caller.h"
namespace
paddle
{
namespace
lite
{
namespace
kernels
{
namespace
opencl
{
class
ElementwiseAddCompute
:
public
KernelLite
<
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
)
>
{
public:
using
param_t
=
operators
::
ElementwiseParam
;
void
Run
()
override
{
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
auto
&
context
=
ctx_
->
As
<
OpenClContext
>
();
CHECK
(
context
.
cl_context
());
elementwise_add
(
context
.
cl_context
(),
static_cast
<
const
float
*>
(
param
.
X
->
raw_data
()),
param
.
X
->
dims
(),
static_cast
<
const
float
*>
(
param
.
Y
->
raw_data
()),
param
.
Y
->
dims
(),
param
.
Out
->
mutable_data
<
float
>
(),
param
.
Out
->
dims
());
}
};
}
// namespace opencl
}
// namespace kernels
}
// namespace lite
}
// namespace paddle
REGISTER_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
paddle
::
lite
::
kernels
::
opencl
::
ElementwiseAddCompute
,
def
)
.
BindInput
(
"X"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindInput
(
"Y"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
BindOutput
(
"Out"
,
{
LiteType
::
GetTensorTy
(
TARGET
(
kHost
))})
.
Finalize
();
paddle/fluid/lite/kernels/opencl/elementwise_add_compute_test.cc
0 → 100644
浏览文件 @
0f03e23b
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include "paddle/fluid/lite/core/op_registry.h"
namespace
paddle
{
namespace
lite
{
TEST
(
elementwise_add
,
init
)
{
LOG
(
INFO
)
<<
"to get kernel ..."
;
auto
kernels
=
KernelRegistry
::
Global
().
Create
(
"elementwise_add"
,
TARGET
(
kOpenCL
),
PRECISION
(
kFloat
),
DATALAYOUT
(
kNCHW
));
ASSERT_FALSE
(
kernels
.
empty
());
auto
kernel
=
std
::
move
(
kernels
.
front
());
LOG
(
INFO
)
<<
"get kernel"
;
lite
::
Tensor
X
,
Y
,
Out
;
operators
::
ElementwiseParam
param
;
param
.
X
=
&
X
;
param
.
Y
=
&
Y
;
param
.
Out
=
&
Out
;
std
::
unique_ptr
<
KernelContext
>
context
(
new
KernelContext
);
context
->
As
<
OpenClContext
>
().
InitOnce
();
kernel
->
SetParam
(
param
);
kernel
->
SetContext
(
std
::
move
(
context
));
X
.
Resize
({
1
,
10
});
Y
.
Resize
({
1
,
10
});
Out
.
Resize
({
1
,
10
});
auto
*
x_data
=
X
.
mutable_data
<
float
>
();
auto
*
y_data
=
Y
.
mutable_data
<
float
>
();
auto
*
out_data
=
Out
.
mutable_data
<
float
>
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
x_data
[
i
]
=
1.1
*
i
;
y_data
[
i
]
=
2.3
*
i
;
}
kernel
->
Launch
();
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
EXPECT_NEAR
(
out_data
[
i
],
3.4
*
i
,
1e-1
);
}
}
}
// namespace lite
}
// namespace paddle
USE_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
paddle/fluid/lite/kernels/use_kernels.h
浏览文件 @
0f03e23b
...
@@ -61,3 +61,6 @@ USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
...
@@ -61,3 +61,6 @@ USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
host_to_device
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
USE_LITE_KERNEL
(
io_copy
,
kCUDA
,
kAny
,
kAny
,
device_to_host
);
#endif
#endif
#ifdef LITE_WITH_OPENCL
USE_LITE_KERNEL
(
elementwise_add
,
kOpenCL
,
kFloat
,
kNCHW
,
def
);
#endif
paddle/fluid/lite/opencl/CMakeLists.txt
浏览文件 @
0f03e23b
...
@@ -2,18 +2,16 @@ if (NOT LITE_WITH_OPENCL)
...
@@ -2,18 +2,16 @@ if (NOT LITE_WITH_OPENCL)
return
()
return
()
endif
()
endif
()
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_wrapper SRCS cl_wrapper.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
cc_library
(
cl_caller SRCS cl_caller.cc DEPS cl_helper cl_image
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
lite_cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_helper cl_image cl_caller cl_wrapper
)
add_dependencies
(
cl_tool opencl_clhpp
)
add_dependencies
(
cl_tool opencl_clhpp
)
endif
()
paddle/fluid/lite/opencl/cl_caller.cc
浏览文件 @
0f03e23b
...
@@ -49,12 +49,12 @@ bool InitOpenCLEngine(std::string cl_path) {
...
@@ -49,12 +49,12 @@ bool InitOpenCLEngine(std::string cl_path) {
return
engine
->
IsInitSuccess
();
return
engine
->
IsInitSuccess
();
}
}
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
void
elementwise_add
(
CLContext
*
context
,
const
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
)
{
const
DDim
&
out_dim
)
{
CLHelper
helper
(
context
);
CLHelper
helper
(
context
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
.
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
.
KernelAt
(
0
);
auto
kernel
=
helper
.
GetKernel
(
0
);
CLImage
in_image
;
CLImage
in_image
;
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
set_tensor_data
(
in
,
in_dim
);
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
in_image
.
InitNormalCLImage
(
helper
.
OpenCLContext
());
...
...
paddle/fluid/lite/opencl/cl_caller.h
浏览文件 @
0f03e23b
...
@@ -22,8 +22,13 @@ namespace paddle {
...
@@ -22,8 +22,13 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
bool
InitOpenCLEngine
(
std
::
string
cl_path
);
void
elementwise_add
(
CLContext
*
context
,
float
*
in
,
const
DDim
&
in_dim
,
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
/// An elementwise_add method to embed OpenCL logic inside, it is used as a
/// black box so that the framework can remain simple.
/// NOTE Currently, these methods are quite expensive, we will optimize them
/// latter.
void
elementwise_add
(
CLContext
*
context
,
const
float
*
in
,
const
DDim
&
in_dim
,
const
float
*
bias
,
const
DDim
&
bias_dim
,
float
*
out
,
const
DDim
&
out_dim
);
const
DDim
&
out_dim
);
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_helper.cc
浏览文件 @
0f03e23b
...
@@ -29,17 +29,18 @@ void CLHelper::AddKernel(const std::string &kernel_name,
...
@@ -29,17 +29,18 @@ void CLHelper::AddKernel(const std::string &kernel_name,
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
CHECK
(
context_
!=
nullptr
)
<<
"Please use set_context first!"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
VLOG
(
3
)
<<
" --- begin to add kernel ---"
;
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
auto
kernel
=
context_
->
GetKernel
(
kernel_name
,
file_name
,
options
);
kernels
.
emplace_back
(
std
::
move
(
kernel
));
kernels_
.
emplace_back
(
std
::
move
(
kernel
));
kernel_offset_
[
kernel_name
]
=
kernels_
.
size
()
-
1
;
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
VLOG
(
3
)
<<
" --- end to add kernel --- "
;
}
}
cl
::
Kernel
&
CLHelper
::
KernelAt
(
const
int
index
)
{
cl
::
Kernel
&
CLHelper
::
GetKernel
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
.
size
()
<<
" --- "
;
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels
_
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
.
size
())
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels
_
.
size
())
<<
"The index must be less than the size of kernels."
;
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels
[
index
]
!=
nullptr
)
CHECK
(
kernels
_
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels
[
index
]);
return
*
(
kernels
_
[
index
]);
}
}
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
cl
::
CommandQueue
&
CLHelper
::
OpenCLCommandQueue
()
{
...
...
paddle/fluid/lite/opencl/cl_helper.h
浏览文件 @
0f03e23b
...
@@ -14,6 +14,7 @@ limitations under the License. */
...
@@ -14,6 +14,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <map>
#include <memory>
#include <memory>
#include <string>
#include <string>
#include <vector>
#include <vector>
...
@@ -35,7 +36,12 @@ class CLHelper {
...
@@ -35,7 +36,12 @@ class CLHelper {
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
void
AddKernel
(
const
std
::
string
&
kernel_name
,
const
std
::
string
&
file_name
,
const
std
::
string
&
options
=
""
);
const
std
::
string
&
options
=
""
);
cl
::
Kernel
&
KernelAt
(
const
int
index
);
cl
::
Kernel
&
GetKernel
(
const
int
index
);
cl
::
Kernel
&
GetKernel
(
const
std
::
string
&
name
)
{
auto
it
=
kernel_offset_
.
find
(
name
);
CHECK
(
it
!=
kernel_offset_
.
end
());
return
GetKernel
(
it
->
second
);
}
cl
::
CommandQueue
&
OpenCLCommandQueue
();
cl
::
CommandQueue
&
OpenCLCommandQueue
();
...
@@ -45,7 +51,8 @@ class CLHelper {
...
@@ -45,7 +51,8 @@ class CLHelper {
private:
private:
CLContext
*
context_
{
nullptr
};
CLContext
*
context_
{
nullptr
};
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels
;
std
::
map
<
std
::
string
,
int
>
kernel_offset_
;
std
::
vector
<
std
::
unique_ptr
<
cl
::
Kernel
>>
kernels_
;
};
};
}
// namespace lite
}
// namespace lite
...
...
paddle/fluid/lite/opencl/cl_image.cc
浏览文件 @
0f03e23b
...
@@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
...
@@ -53,7 +53,7 @@ std::ostream& operator<<(std::ostream& os, const CLImage& cl_image) {
return
os
;
return
os
;
}
}
void
CLImage
::
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
)
{
void
CLImage
::
set_tensor_data
(
const
float
*
tensor_data
,
const
DDim
&
dim
)
{
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
auto
numel
=
dim
.
product
();
auto
numel
=
dim
.
product
();
#else
#else
...
...
paddle/fluid/lite/opencl/cl_image.h
浏览文件 @
0f03e23b
...
@@ -33,7 +33,7 @@ class CLImage {
...
@@ -33,7 +33,7 @@ class CLImage {
/*
/*
* Will not hold input tensor data, memcpy in this method.
* Will not hold input tensor data, memcpy in this method.
* */
* */
void
set_tensor_data
(
float
*
tensor_data
,
const
DDim
&
dim
);
void
set_tensor_data
(
const
float
*
tensor_data
,
const
DDim
&
dim
);
bool
IsInit
()
{
return
initialized_
;
}
bool
IsInit
()
{
return
initialized_
;
}
/*
/*
...
...
paddle/fluid/lite/opencl/cl_test.cc
浏览文件 @
0f03e23b
...
@@ -65,7 +65,7 @@ TEST(cl_test, kernel_test) {
...
@@ -65,7 +65,7 @@ TEST(cl_test, kernel_test) {
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"pool_max"
,
"pool_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
helper
->
AddKernel
(
"elementwise_add"
,
"elementwise_add_kernel.cl"
);
auto
kernel
=
helper
->
KernelAt
(
2
);
auto
kernel
=
helper
->
GetKernel
(
2
);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
std
::
unique_ptr
<
float
[]
>
in_data
(
new
float
[
1024
*
512
]);
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
for
(
int
i
=
0
;
i
<
1024
*
512
;
i
++
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录