Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
5945175f
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
5945175f
编写于
5月 17, 2019
作者:
J
Jiaying Zhao
提交者:
xiebaiyuan
5月 17, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix memcpy size in opencl fetch kernel (#1630)
上级
b05774ca
变更
4
显示空白变更内容
内联
并排
Showing
4 changed file
with
27 addition
and
37 deletion
+27
-37
src/framework/cl/cl_tensor.h
src/framework/cl/cl_tensor.h
+1
-1
src/operators/kernel/cl/feed_kernel.cpp
src/operators/kernel/cl/feed_kernel.cpp
+0
-2
src/operators/kernel/cl/fetch_kernel.cpp
src/operators/kernel/cl/fetch_kernel.cpp
+20
-30
test/net/test_super.cpp
test/net/test_super.cpp
+6
-4
未找到文件。
src/framework/cl/cl_tensor.h
浏览文件 @
5945175f
...
@@ -26,7 +26,7 @@ limitations under the License. */
...
@@ -26,7 +26,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
framework
{
namespace
framework
{
class
CLTensor
:
TensorBase
{
class
CLTensor
:
public
TensorBase
{
public:
public:
CLTensor
(
cl_context
context
,
cl_command_queue
command_queue
)
CLTensor
(
cl_context
context
,
cl_command_queue
command_queue
)
:
context_
(
context
),
command_queue_
(
command_queue
)
{}
:
context_
(
context
),
command_queue_
(
command_queue
)
{}
...
...
src/operators/kernel/cl/feed_kernel.cpp
浏览文件 @
5945175f
...
@@ -31,8 +31,6 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
...
@@ -31,8 +31,6 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> ¶m) {
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
(
param
.
Out
()));
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
(
param
.
Out
()));
cl_int
status
;
cl_int
status
;
param
.
Out
()
->
InitEmptyImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
(),
param
.
Out
()
->
dims
());
auto
output
=
param
.
Out
();
auto
output
=
param
.
Out
();
const
Tensor
*
input
=
&
param
.
InputX
()
->
at
(
col
);
const
Tensor
*
input
=
&
param
.
InputX
()
->
at
(
col
);
// DLOG << *input;
// DLOG << *input;
...
...
src/operators/kernel/cl/fetch_kernel.cpp
浏览文件 @
5945175f
...
@@ -14,19 +14,13 @@ limitations under the License. */
...
@@ -14,19 +14,13 @@ limitations under the License. */
#include "operators/kernel/fetch_kernel.h"
#include "operators/kernel/fetch_kernel.h"
#include "framework/cl/cl_tensor.h"
#include "framework/cl/cl_tensor.h"
// #include "common/common.h"
// #include <iostream>
namespace
paddle_mobile
{
namespace
paddle_mobile
{
namespace
operators
{
namespace
operators
{
template
<
>
template
<
>
bool
FetchKernel
<
GPU_CL
,
float
>::
Init
(
FetchParam
<
GPU_CL
>
*
param
)
{
bool
FetchKernel
<
GPU_CL
,
float
>::
Init
(
FetchParam
<
GPU_CL
>
*
param
)
{
// if (param->InputX()->dims().size() <= 2) {
// this->cl_helper_.AddKernel("fetch_2d", "fetch_kernel.cl");
// } else {
this
->
cl_helper_
.
AddKernel
(
"fetch"
,
"fetch_kernel.cl"
);
this
->
cl_helper_
.
AddKernel
(
"fetch"
,
"fetch_kernel.cl"
);
// }
return
true
;
return
true
;
}
}
...
@@ -40,24 +34,27 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
...
@@ -40,24 +34,27 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
auto
*
out
=
&
param
.
Out
()
->
at
(
col
);
auto
*
out
=
&
param
.
Out
()
->
at
(
col
);
out
->
Resize
(
param
.
InputX
()
->
dims
());
out
->
Resize
(
param
.
InputX
()
->
dims
());
out
->
mutable_data
<
float
>
();
out
->
mutable_data
<
float
>
();
const
auto
&
dim
=
param
.
InputX
()
->
dims
();
DLOG
<<
"fetch kernel out dims = "
<<
out
->
dims
();
DLOG
<<
"fetch kernel out memory size = "
<<
out
->
memory_size
();
auto
dim
=
param
.
InputX
()
->
dims
();
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
size_t
new_dims
[]
=
{
1
,
1
,
1
,
1
};
for
(
int
j
=
0
;
j
<
dim
.
size
();
++
j
)
{
for
(
int
j
=
0
;
j
<
dim
.
size
();
++
j
)
{
new_dims
[
4
-
dim
.
size
()
+
j
]
=
dim
[
j
];
new_dims
[
4
-
dim
.
size
()
+
j
]
=
dim
[
j
];
}
}
size_t
C
,
in_height
,
in_width
;
size_t
in_ch
,
in_height
,
in_width
;
C
=
new_dims
[
1
];
in_ch
=
new_dims
[
1
];
in_height
=
new_dims
[
2
];
in_height
=
new_dims
[
2
];
// if (dim.size() <= 2) {
// in_width = param.InputX()->ImageWidth();
// } else {
in_width
=
new_dims
[
3
];
in_width
=
new_dims
[
3
];
// }
int
size_ch
=
in_height
*
in_width
;
int
size_block
=
size_ch
*
4
;
int
size_batch
=
size_ch
*
in_ch
;
CLTensor
out_cl_tensor
(
this
->
cl_helper_
.
CLContext
(),
framework
::
CLTensor
out_cl_tensor
(
this
->
cl_helper_
.
CLContext
(),
this
->
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
CLCommandQueue
());
out_cl_tensor
.
Resize
(
out
->
dims
());
out_cl_tensor
.
Resize
(
out
->
dims
());
cl_mem
outBuffer
=
out_cl_tensor
.
mutable_data
<
float
>
();
cl_mem
outBuffer
=
out_cl_tensor
.
mutable_data
<
float
>
();
...
@@ -66,35 +63,28 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
...
@@ -66,35 +63,28 @@ void FetchKernel<GPU_CL, float>::Compute(const FetchParam<GPU_CL> ¶m) {
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
in_width
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
in_width
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
outBuffer
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
outBuffer
);
// if (dim.size() > 2) {
int
size_ch
=
in_height
*
in_width
;
int
size_block
=
size_ch
*
4
;
int
size_batch
=
size_ch
*
C
;
int
out_c
=
new_dims
[
1
];
clSetKernelArg
(
kernel
,
4
,
sizeof
(
int
),
&
size_ch
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
int
),
&
size_ch
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
int
),
&
size_block
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
int
),
&
size_block
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
size_batch
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
size_batch
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
out_c
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
in_ch
);
// }
// cl_event wait_event = param.InpdutX()->GetClEvent();
// cl_event wait_event = param.InpdutX()->GetClEvent();
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
// auto time1 = paddle_mobile::time();
// printf(" before finish \n");
// printf(" before finish \n");
// clFlsh(this->cl_helper_.CLCommandQueue());
// clFlsh(this->cl_helper_.CLCommandQueue());
clFinish
(
this
->
cl_helper_
.
CLCommandQueue
());
clFinish
(
this
->
cl_helper_
.
CLCommandQueue
());
// printf(" after finish \n");
// printf(" after finish \n");
// auto time2 = paddle_mobile::time();
DLOG
<<
"fetch kernel out dims = "
<<
out
->
dims
();
//
DLOG
<<
"fetch kernel out memory size = "
<<
out
->
memory_size
();
//
// std::cout << " finish cost :" << paddle_mobile::time_diff(time1, time2)
// << "ms" << std::endl;
memcpy
(
out
->
data
<
float
>
(),
out_cl_tensor
.
Data
<
float
>
(),
out
->
memory_size
());
DLOG
<<
"fetch kernel out_cl_tensor dims = "
<<
out_cl_tensor
.
dims
();
DLOG
<<
"fetch kernel out_cl_tensor memery size = "
<<
out_cl_tensor
.
memory_size
();
memcpy
(
out
->
data
<
float
>
(),
out_cl_tensor
.
Data
<
float
>
(),
sizeof
(
float
)
*
out
->
numel
());
}
}
template
class
FetchKernel
<
GPU_CL
,
float
>;
template
class
FetchKernel
<
GPU_CL
,
float
>;
...
...
test/net/test_super.cpp
浏览文件 @
5945175f
...
@@ -21,12 +21,14 @@ int main() {
...
@@ -21,12 +21,14 @@ int main() {
paddle_mobile
::
PaddleMobileConfigInternal
config
;
paddle_mobile
::
PaddleMobileConfigInternal
config
;
config
.
load_when_predict
=
true
;
config
.
load_when_predict
=
true
;
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
(
config
);
// paddle_mobile.SetThreadNum(4);
auto
time1
=
paddle_mobile
::
time
();
auto
time1
=
paddle_mobile
::
time
();
#ifdef PADDLE_MOBILE_CL
#ifdef PADDLE_MOBILE_CL
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
GPU_CL
>
paddle_mobile
(
config
);
paddle_mobile
.
SetCLPath
(
"/data/local/tmp/bin"
);
paddle_mobile
.
SetCLPath
(
"/data/local/tmp/bin"
);
#else
paddle_mobile
::
PaddleMobile
<
paddle_mobile
::
CPU
>
paddle_mobile
(
config
);
#endif
#endif
// paddle_mobile.SetThreadNum(4);
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
g_super
)
+
"/model"
,
auto
isok
=
paddle_mobile
.
Load
(
std
::
string
(
g_super
)
+
"/model"
,
std
::
string
(
g_super
)
+
"/params"
,
true
,
false
,
std
::
string
(
g_super
)
+
"/params"
,
true
,
false
,
...
@@ -131,12 +133,12 @@ int main() {
...
@@ -131,12 +133,12 @@ int main() {
auto
time5
=
paddle_mobile
::
time
();
auto
time5
=
paddle_mobile
::
time
();
vec_result4
=
paddle_mobile
.
Predict
(
input4
,
dims4
);
vec_result4
=
paddle_mobile
.
Predict
(
input4
,
dims4
);
auto
time6
=
paddle_mobile
::
time
();
auto
time6
=
paddle_mobile
::
time
();
std
::
cout
<<
"
224*224
predict cost :第"
<<
i
<<
": "
std
::
cout
<<
"
300*300
predict cost :第"
<<
i
<<
": "
<<
paddle_mobile
::
time_diff
(
time5
,
time6
)
<<
"ms"
<<
std
::
endl
;
<<
paddle_mobile
::
time_diff
(
time5
,
time6
)
<<
"ms"
<<
std
::
endl
;
}
}
auto
time4
=
paddle_mobile
::
time
();
auto
time4
=
paddle_mobile
::
time
();
std
::
cout
<<
"
224*224
predict cost :"
std
::
cout
<<
"
300*300
predict cost :"
<<
paddle_mobile
::
time_diff
(
time3
,
time4
)
/
max
<<
"ms"
<<
paddle_mobile
::
time_diff
(
time3
,
time4
)
/
max
<<
"ms"
<<
std
::
endl
;
<<
std
::
endl
;
// biggest = std::max_element(std::begin(vec_result4),
// biggest = std::max_element(std::begin(vec_result4),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录