Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
4a6dd33f
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
4a6dd33f
编写于
10月 11, 2018
作者:
R
Ray Liu
提交者:
GitHub
10月 11, 2018
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #1054 from codeWorm2015/opencl
resolve complier error
上级
20d2c1cd
11b2bd57
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
178 addition
and
66 deletion
+178
-66
src/framework/cl/cl_engine.h
src/framework/cl/cl_engine.h
+1
-0
src/framework/cl/cl_image.h
src/framework/cl/cl_image.h
+25
-12
src/framework/loader.cpp
src/framework/loader.cpp
+6
-6
src/framework/loader.h
src/framework/loader.h
+1
-1
src/framework/program/program.h
src/framework/program/program.h
+1
-1
src/io/paddle_mobile.cpp
src/io/paddle_mobile.cpp
+4
-3
src/io/paddle_mobile.h
src/io/paddle_mobile.h
+1
-1
src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl
src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl
+14
-0
src/operators/kernel/cl/conv_add_bn_kernel.cpp
src/operators/kernel/cl/conv_add_bn_kernel.cpp
+0
-37
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+22
-3
src/operators/kernel/cl/conv_add_kernel.cpp
src/operators/kernel/cl/conv_add_kernel.cpp
+52
-1
src/operators/kernel/cl/conv_kernel.cpp
src/operators/kernel/cl/conv_kernel.cpp
+51
-1
未找到文件。
src/framework/cl/cl_engine.h
浏览文件 @
4a6dd33f
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include "CL/cl.h"
#include "common/enforce.h"
#include "framework/cl/cl_deleter.h"
#include "framework/cl/cl_tool.h"
namespace
paddle_mobile
{
namespace
framework
{
...
...
src/framework/cl/cl_image.h
浏览文件 @
4a6dd33f
...
...
@@ -43,17 +43,28 @@ class CLImage {
C
=
tensor_dims_
[
1
];
H
=
tensor_dims_
[
2
];
W
=
tensor_dims_
[
3
];
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
}
else
if
(
tensor_dims_
.
size
()
==
1
)
{
N
=
1
;
C
=
tensor_dims_
[
0
];
H
=
1
;
W
=
1
;
width_of_one_block_
=
W
;
height_of_one_block_
=
H
;
}
DLOG
<<
"-------InitMemory-------"
;
size_t
width
=
W
*
((
C
+
3
)
/
4
);
size_t
height
=
H
*
N
;
image_width_
=
width
;
image_height_
=
height
;
std
::
unique_ptr
<
half_t
[]
>
imageData
{};
int
count
=
0
;
if
(
tensorInput
!=
nullptr
)
{
...
...
@@ -95,9 +106,13 @@ class CLImage {
0
,
// size_t image_row_pitch
reinterpret_cast
<
void
*>
(
imageData
.
get
()),
// void *host_ptr
&
err
);
if
(
err
!=
CL_SUCCESS
)
{
// TODO(HaiPeng): error handling
PADDLE_MOBILE_THROW_EXCEPTION
(
" create image 2d error "
);
}
initialized_
=
true
;
}
void
Init
(
cl_context
context
,
DDim
ddim
)
{
Init
(
context
,
nullptr
,
ddim
);
}
...
...
@@ -109,8 +124,6 @@ class CLImage {
const
DDim
&
dims
()
const
{
return
tensor_dims_
;
}
std
::
vector
<
size_t
>
DefaultWorkSize
()
{
return
{};
}
cl_mem
GetCLImage
()
const
{
return
cl_image_
;
}
template
<
typename
T
>
...
...
@@ -120,24 +133,24 @@ class CLImage {
inline
int64_t
numel
()
const
{
return
product
(
tensor_dims_
);
}
int
ImageWidth
()
const
{
return
image_width_
;
}
in
line
size_
t
ImageWidth
()
const
{
return
image_width_
;
}
int
ImageHeight
()
const
{
return
image_height_
;
}
in
line
size_
t
ImageHeight
()
const
{
return
image_height_
;
}
int
CBlock
()
const
{
return
c_block_
;
}
in
line
size_
t
CBlock
()
const
{
return
c_block_
;
}
int
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
in
line
size_
t
WidthOfOneBlock
()
const
{
return
width_of_one_block_
;
}
int
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
in
line
size_
t
HeightOfOneBlock
()
const
{
return
height_of_one_block_
;
}
private:
bool
initialized_
=
false
;
cl_mem
cl_image_
;
in
t
image_width_
;
in
t
width_of_one_block_
;
in
t
height_of_one_block_
;
in
t
image_height_
;
in
t
c_block_
;
size_
t
image_width_
;
size_
t
width_of_one_block_
;
size_
t
height_of_one_block_
;
size_
t
image_height_
;
size_
t
c_block_
;
DDim
tensor_dims_
;
float
*
tensor_input_
;
cl_context
context_
;
...
...
src/framework/loader.cpp
浏览文件 @
4a6dd33f
...
...
@@ -95,15 +95,15 @@ void Loader<GPU_CL, Precision::FP32>::InitMemoryFromProgram(
*/
template
<
typename
Dtype
,
Precision
P
>
void
FusionAndPrintInfos
(
bool
optimize
,
bool
can_add_split
,
const
Program
<
Dtype
,
P
>
&
program
,
bool
optimize
,
bool
can_add_split
,
Program
<
Dtype
,
P
>
*
program
,
const
std
::
shared_ptr
<
ProgramDesc
>
&
originProgramDesc
)
{
if
(
optimize
)
{
ProgramOptimize
program_optimize
;
program
.
optimizeProgram
=
program
->
optimizeProgram
=
program_optimize
.
FusionOptimize
(
originProgramDesc
,
can_add_split
);
}
if
(
optimize
)
{
program
.
optimizeProgram
->
Description
(
"optimize: "
);
program
->
optimizeProgram
->
Description
(
"optimize: "
);
}
else
{
originProgramDesc
->
Description
(
"program: "
);
}
...
...
@@ -186,7 +186,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
// use originProgramDesc and scope to init tensors
InitMemoryFromProgram
(
originProgramDesc
,
scope
);
// perform fusion and print infos
FusionAndPrintInfos
(
optimize
,
can_add_split
,
program
,
originProgramDesc
);
FusionAndPrintInfos
(
optimize
,
can_add_split
,
&
program
,
originProgramDesc
);
paddle_mobile__framework__proto__program_desc__free_unpacked
(
c_program
,
NULL
);
return
program
;
...
...
@@ -195,7 +195,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
template
<
typename
Dtype
,
Precision
P
>
const
Program
<
Dtype
,
P
>
Loader
<
Dtype
,
P
>::
LoadCombinedMemory
(
size_t
read_size
,
const
uint8_t
*
buf
,
size_t
combined_params_len
,
const
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
)
{
uint8_t
*
combined_params_buf
,
bool
optimize
,
bool
quantification
)
{
bool
can_add_split
=
false
;
PaddleMobile__Framework__Proto__ProgramDesc
*
c_program
;
...
...
@@ -221,7 +221,7 @@ const Program<Dtype, P> Loader<Dtype, P>::LoadCombinedMemory(
auto
scope
=
std
::
make_shared
<
Scope
>
();
program
.
scope
=
scope
;
InitMemoryFromProgram
(
originProgramDesc
,
scope
);
FusionAndPrintInfos
(
optimize
,
can_add_split
,
program
,
originProgramDesc
);
FusionAndPrintInfos
(
optimize
,
can_add_split
,
&
program
,
originProgramDesc
);
paddle_mobile__framework__proto__program_desc__free_unpacked
(
c_program
,
nullptr
);
return
program
;
...
...
src/framework/loader.h
浏览文件 @
4a6dd33f
...
...
@@ -46,7 +46,7 @@ class Loader {
const
Program
<
Dtype
,
P
>
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
const
uint8_t
*
combined_params_buf
,
uint8_t
*
combined_params_buf
,
bool
optimize
=
false
,
bool
quantification
=
false
);
...
...
src/framework/program/program.h
浏览文件 @
4a6dd33f
...
...
@@ -32,7 +32,7 @@ class Program {
bool
combined
=
false
;
bool
quantification
=
false
;
size_t
combined_params_len
;
const
uint8_t
*
combined_params_buf
;
uint8_t
*
combined_params_buf
;
private:
};
...
...
src/io/paddle_mobile.cpp
浏览文件 @
4a6dd33f
...
...
@@ -68,9 +68,10 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
}
template
<
typename
Dtype
,
Precision
P
>
bool
PaddleMobile
<
Dtype
,
P
>::
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
const
uint8_t
*
combined_params_buf
)
{
bool
PaddleMobile
<
Dtype
,
P
>::
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
uint8_t
*
combined_params_buf
)
{
int
batch_size
=
1
;
bool
optimise
=
true
;
bool
quantification
=
false
;
...
...
src/io/paddle_mobile.h
浏览文件 @
4a6dd33f
...
...
@@ -83,7 +83,7 @@ class PaddleMobile {
*/
bool
LoadCombinedMemory
(
size_t
model_len
,
const
uint8_t
*
model_buf
,
size_t
combined_params_len
,
const
uint8_t
*
combined_params_buf
);
uint8_t
*
combined_params_buf
);
void
Clear
();
...
...
src/operators/kernel/cl/cl_kernel/elementwise_add_kernel.cl
浏览文件 @
4a6dd33f
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
elementwise_add
(
__global
float*
in,
__global
float*
out
)
{
int
num
=
get_global_id
(
0
)
;
out[num]
=
in[num]
*
0.1
+
102
;
...
...
src/operators/kernel/cl/conv_add_bn_kernel.cpp
已删除
100644 → 0
浏览文件 @
20d2c1cd
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef FUSION_CONVADDBN_OP
#include "operators/kernel/conv_add_bn_kernel.h"
namespace
paddle_mobile
{
namespace
operators
{
template
<
>
bool
ConvAddBNReluKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvAddBNReluParam
<
GPU_CL
>
*
param
)
{
return
true
;
}
template
<
>
void
ConvAddBNReluKernel
<
GPU_CL
,
float
>::
Compute
(
const
FusionConvAddBNReluParam
<
GPU_CL
>
&
param
)
{}
template
class
ConvAddBNReluKernel
<
GPU_CL
,
float
>;
}
// namespace operators
}
// namespace paddle_mobile
#endif
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
浏览文件 @
4a6dd33f
...
...
@@ -37,6 +37,7 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
auto
bias_ptr
=
bias
->
data
<
float
>
();
const
int
C
=
mean
->
numel
();
float
inv_std_ptr
[
C
];
for
(
int
i
=
0
;
i
<
C
;
i
++
)
{
inv_std_ptr
[
i
]
=
...
...
@@ -55,8 +56,13 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
framework
::
CLImage
*
new_scale
=
new
framework
::
CLImage
();
new_scale
->
Init
(
this
->
cl_helper_
.
CLContext
(),
new_scale_ptr
,
variance
->
dims
());
framework
::
CLImage
*
new_bias
=
new
framework
::
CLImage
();
new_bias
->
Init
(
this
->
cl_helper_
.
CLContext
(),
new_bias_ptr
,
variance
->
dims
());
param
->
SetNewScale
(
new_scale
);
param
->
SetNewBias
(
new_bias
);
...
...
@@ -65,10 +71,23 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
param
->
SetOffset
(
param
->
Filter
()
->
dims
()[
2
]
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]));
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
return
true
;
}
...
...
src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
4a6dd33f
...
...
@@ -21,12 +21,63 @@ namespace operators {
template
<
>
bool
ConvAddKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvAddParam
<
GPU_CL
>
*
param
)
{
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
return
true
;
}
template
<
>
void
ConvAddKernel
<
GPU_CL
,
float
>::
Compute
(
const
FusionConvAddParam
<
GPU_CL
>
&
param
)
{}
const
FusionConvAddParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
auto
biase
=
param
.
Bias
()
->
GetCLImage
();
auto
output
=
param
.
Output
();
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
input_c
=
param
.
Input
()
->
CBlock
();
int
input_width
=
param
.
Input
()
->
WidthOfOneBlock
();
int
input_height
=
param
.
Input
()
->
HeightOfOneBlock
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_width
);
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_height
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
}
template
class
ConvAddKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/conv_kernel.cpp
浏览文件 @
4a6dd33f
...
...
@@ -21,12 +21,62 @@ namespace operators {
template
<
>
bool
ConvKernel
<
GPU_CL
,
float
>::
Init
(
ConvParam
<
GPU_CL
>
*
param
)
{
// this->cl_helper_.AddKernel("conv_3x3", "conv_kernel.cl");
PADDLE_MOBILE_ENFORCE
(
param
->
Filter
()
->
dims
()[
2
]
==
param
->
Filter
()
->
dims
()[
3
]
&&
param
->
Paddings
()[
0
]
==
param
->
Paddings
()[
1
],
"need equal"
);
int
offset
=
static_cast
<
int
>
(
param
->
Filter
()
->
dims
()[
2
])
/
2
-
static_cast
<
int
>
(
param
->
Paddings
()[
1
]);
param
->
SetOffset
(
offset
);
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
1
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
)
{
this
->
cl_helper_
.
AddKernel
(
"depth_conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
if
(
param
->
Filter
()
->
WidthOfOneBlock
()
==
3
&&
param
->
Filter
()
->
HeightOfOneBlock
()
==
3
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_3x3"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
{
PADDLE_MOBILE_THROW_EXCEPTION
(
" not support "
);
}
return
true
;
}
template
<
>
void
ConvKernel
<
GPU_CL
,
float
>::
Compute
(
const
ConvParam
<
GPU_CL
>
&
param
)
{
auto
kernel
=
this
->
cl_helper_
.
KernelAt
(
0
);
auto
default_work_size
=
this
->
cl_helper_
.
DefaultWorkSize
(
*
param
.
Output
());
int
c_block
=
default_work_size
[
0
];
int
w
=
default_work_size
[
1
];
int
nh
=
default_work_size
[
2
];
auto
input
=
param
.
Input
()
->
GetCLImage
();
auto
filter
=
param
.
Filter
()
->
GetCLImage
();
auto
output
=
param
.
Output
();
int
stride
=
param
.
Strides
()[
0
];
int
offset
=
param
.
Offset
();
int
input_c
=
param
.
Input
()
->
CBlock
();
int
dilation
=
param
.
Dilations
()[
0
];
int
input_width
=
param
.
Input
()
->
WidthOfOneBlock
();
int
input_height
=
param
.
Input
()
->
HeightOfOneBlock
();
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
output
);
clSetKernelArg
(
kernel
,
6
,
sizeof
(
int
),
&
stride
);
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
offset
);
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
input_c
);
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
dilation
);
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
input_width
);
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_height
);
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
3
,
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
// auto kernel = this->cl_helper_.KernelAt(0);
// size_t global_work_size[3] = {1, 2, 3};
// clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel, 3, NULL,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录