Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
438e456c
O
Opencv
项目概览
Greenplum
/
Opencv
11 个月 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
438e456c
编写于
11月 20, 2017
作者:
A
Alexander Alekhin
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #10113 from wzw-intel:fusion
上级
75b980ac
45d11dde
变更
6
展开全部
隐藏空白更改
内联
并排
Showing
6 changed file
with
613 addition
and
449 deletion
+613
-449
modules/dnn/src/dnn.cpp
modules/dnn/src/dnn.cpp
+75
-2
modules/dnn/src/layers/convolution_layer.cpp
modules/dnn/src/layers/convolution_layer.cpp
+39
-4
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
+432
-432
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
+7
-2
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+48
-9
modules/dnn/src/opencl/conv_layer_spatial.cl
modules/dnn/src/opencl/conv_layer_spatial.cl
+12
-0
未找到文件。
modules/dnn/src/dnn.cpp
浏览文件 @
438e456c
...
...
@@ -1233,12 +1233,13 @@ struct Net::Impl
}
}
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
// For now, OpenCL target only support fusion with activation of ReLU/ChannelsPReLU
/Power
if
(
preferableTarget
!=
DNN_TARGET_OPENCL
||
(
preferableTarget
==
DNN_TARGET_OPENCL
&&
nextData
&&
(
!
nextData
->
type
.
compare
(
"ReLU"
)
||
!
nextData
->
type
.
compare
(
"ChannelsPReLU"
)))
)
!
nextData
->
type
.
compare
(
"ChannelsPReLU"
)
||
!
nextData
->
type
.
compare
(
"Power"
)))
)
{
Ptr
<
ActivationLayer
>
nextActivLayer
;
...
...
@@ -1253,6 +1254,78 @@ struct Net::Impl
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()));
activData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
{
nextData
=
&
layers
[
activData
->
consumers
[
0
].
lid
];
lpNext
=
LayerPin
(
activData
->
consumers
[
0
].
lid
,
0
);
}
}
}
// fuse convlution layer followed by eltwise + relu
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
{
Ptr
<
EltwiseLayer
>
nextEltwiseLayer
;
if
(
nextData
)
nextEltwiseLayer
=
nextData
->
layerInstance
.
dynamicCast
<
EltwiseLayer
>
();
if
(
!
nextEltwiseLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
)
{
LayerData
*
eltwiseData
=
nextData
;
// go down from the second input and find the first non-skipped layer.
LayerData
*
downLayerData
=
&
layers
[
eltwiseData
->
inputBlobsId
[
1
].
lid
];
while
(
downLayerData
->
skipFlags
[
DNN_BACKEND_DEFAULT
])
{
downLayerData
=
&
layers
[
downLayerData
->
inputBlobsId
[
0
].
lid
];
}
// second input layer is current layer.
if
(
ld
.
id
==
downLayerData
->
id
)
{
// go down from the first input and find the first non-skipped layer
downLayerData
=
&
layers
[
eltwiseData
->
inputBlobsId
[
0
].
lid
];
while
(
downLayerData
->
skipFlags
[
DNN_BACKEND_DEFAULT
])
{
if
(
!
downLayerData
->
type
.
compare
(
"Eltwise"
)
)
downLayerData
=
&
layers
[
downLayerData
->
inputBlobsId
[
1
].
lid
];
else
downLayerData
=
&
layers
[
downLayerData
->
inputBlobsId
[
0
].
lid
];
}
Ptr
<
ConvolutionLayer
>
convLayer
;
if
(
downLayerData
)
convLayer
=
downLayerData
->
layerInstance
.
dynamicCast
<
ConvolutionLayer
>
();
// first input layer is convolution layer
if
(
!
convLayer
.
empty
()
)
{
// fuse eltwise + activation layer
LayerData
*
firstConvLayerData
=
downLayerData
;
{
nextData
=
&
layers
[
eltwiseData
->
consumers
[
0
].
lid
];
lpNext
=
LayerPin
(
eltwiseData
->
consumers
[
0
].
lid
,
0
);
Ptr
<
ActivationLayer
>
nextActivLayer
;
if
(
nextData
)
nextActivLayer
=
nextData
->
layerInstance
.
dynamicCast
<
ActivationLayer
>
();
if
(
!
nextActivLayer
.
empty
()
&&
pinsToKeep
.
count
(
lpNext
)
==
0
&&
(
!
nextData
->
type
.
compare
(
"ReLU"
)
||
!
nextData
->
type
.
compare
(
"ChannelsPReLU"
)
||
!
nextData
->
type
.
compare
(
"Power"
))
&&
currLayer
->
setActivation
(
nextActivLayer
)
)
{
CV_Assert
(
firstConvLayerData
->
outputBlobs
.
size
()
==
1
&&
ld
.
inputBlobs
.
size
()
==
1
);
ld
.
inputBlobs
.
push_back
(
&
firstConvLayerData
->
outputBlobs
[
0
]);
printf_
((
"
\t
fused with %s
\n
"
,
nextEltwiseLayer
->
name
.
c_str
()));
printf_
((
"
\t
fused with %s
\n
"
,
nextActivLayer
->
name
.
c_str
()));
eltwiseData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
nextData
->
skipFlags
[
DNN_BACKEND_DEFAULT
]
=
true
;
ld
.
outputBlobs
=
layers
[
lpNext
.
lid
].
outputBlobs
;
}
}
}
}
}
}
}
...
...
modules/dnn/src/layers/convolution_layer.cpp
浏览文件 @
438e456c
...
...
@@ -142,6 +142,9 @@ public:
}
};
#define IS_POWER_LAYER(layer) \
(!layer.empty() && !layer->type.compare("Power"))
//TODO: simultaneously convolution and bias addition for cache optimization
class
ConvolutionLayerImpl
:
public
BaseConvolutionLayerImpl
{
...
...
@@ -161,6 +164,7 @@ public:
bool
newWeightAndBias
;
bool
newActiv
;
ocl4dnnFusedActiv_t
activType
;
float
power
;
#endif
ConvolutionLayerImpl
()
{
...
...
@@ -169,6 +173,7 @@ public:
newWeightAndBias
=
false
;
newActiv
=
false
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
power
=
0.
f
;
#endif
}
...
...
@@ -225,6 +230,22 @@ public:
#ifdef HAVE_OPENCL
newActiv
=
true
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
if
(
preferableTarget
==
DNN_TARGET_OPENCL
)
{
Ptr
<
PowerLayer
>
activ_power
=
activ
.
dynamicCast
<
PowerLayer
>
();
if
(
!
activ_power
.
empty
())
{
if
(
activ_power
->
scale
!=
1.
f
||
activ_power
->
shift
!=
0.
f
)
newWeightAndBias
=
true
;
if
(
activ_power
->
scale
!=
1.
f
)
weightsMat
.
release
();
power
=
activ_power
->
power
;
activType
=
OCL4DNN_CONV_FUSED_ACTIV_POWER
;
}
}
#endif
return
!
activ
.
empty
();
}
...
...
@@ -727,11 +748,12 @@ public:
biasvec
[
k
]
=
biasMat
.
at
<
float
>
(
k
);
}
if
(
!
bnorm
.
empty
()
||
!
scaleLayer
.
empty
()
)
if
(
!
bnorm
.
empty
()
||
!
scaleLayer
.
empty
()
||
IS_POWER_LAYER
(
activ
)
)
{
Mat
scale
,
shift
,
scale2
,
shift2
;
const
float
*
scaleptr
=
0
,
*
shiftptr
=
0
;
const
float
*
scaleptr2
=
0
,
*
shiftptr2
=
0
;
float
a
=
1.
f
,
b
=
0.
f
;
if
(
!
bnorm
.
empty
()
)
{
...
...
@@ -758,7 +780,14 @@ public:
}
}
if
(
shiftptr
||
shiftptr2
)
if
(
IS_POWER_LAYER
(
activ
)
)
{
Ptr
<
PowerLayer
>
activ_power
=
activ
.
dynamicCast
<
PowerLayer
>
();
a
=
activ_power
->
scale
;
b
=
activ_power
->
shift
;
}
if
(
shiftptr
||
shiftptr2
||
b
!=
0.
f
)
fusedBias
=
true
;
for
(
int
i
=
0
;
i
<
outCn
;
i
++
)
...
...
@@ -771,9 +800,9 @@ public:
int
j
,
wcols
=
weightsMat
.
cols
;
for
(
j
=
0
;
j
<
wcols
;
j
++
)
w_i
[
j
]
*=
(
s1
*
s2
);
w_i
[
j
]
*=
(
s1
*
s2
*
a
);
biasvec
[
i
]
=
biasvec
[
i
]
*
(
s1
*
s2
)
+
(
delta1
*
s2
+
delta2
);
biasvec
[
i
]
=
biasvec
[
i
]
*
(
s1
*
s2
*
a
)
+
(
delta1
*
s2
*
a
+
delta2
*
a
+
b
);
}
}
biasvec
[
outCn
]
=
biasvec
[
outCn
+
1
]
=
biasvec
[
outCn
-
1
];
...
...
@@ -827,10 +856,15 @@ public:
CV_Assert
(
!
reluslope
.
empty
());
convolutionOp
->
setActivPReLU
(
true
,
reluslope
);
}
else
if
(
activType
==
OCL4DNN_CONV_FUSED_ACTIV_POWER
)
{
convolutionOp
->
setActivPower
(
true
,
power
);
}
else
{
convolutionOp
->
setActivReLU
(
false
,
0
);
convolutionOp
->
setActivPReLU
(
false
,
reluslope
);
convolutionOp
->
setActivPower
(
false
,
1.
f
);
}
newActiv
=
false
;
}
...
...
@@ -840,6 +874,7 @@ public:
int
batch_size
=
inpMat
.
size
[
0
];
return
convolutionOp
->
Forward
(
inpMat
,
inputs
.
size
()
==
2
?
inputs
[
1
]
:
UMat
(),
umat_blobs
[
0
],
(
hasBias
()
||
fusedBias
)
?
umat_blobs
[
1
]
:
UMat
(),
outMat
,
...
...
modules/dnn/src/ocl4dnn/include/default_kernel_config.hpp
浏览文件 @
438e456c
此差异已折叠。
点击以展开。
modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp
浏览文件 @
438e456c
...
...
@@ -77,6 +77,7 @@ typedef enum {
OCL4DNN_CONV_FUSED_ACTIV_NONE
=
0
,
OCL4DNN_CONV_FUSED_ACTIV_RELU
=
1
,
OCL4DNN_CONV_FUSED_ACTIV_PRELU
=
2
,
OCL4DNN_CONV_FUSED_ACTIV_POWER
=
3
}
ocl4dnnFusedActiv_t
;
template
<
typename
Dtype
>
...
...
@@ -86,11 +87,13 @@ class OCL4DNNConvSpatial
explicit
OCL4DNNConvSpatial
(
OCL4DNNConvConfig
config
);
~
OCL4DNNConvSpatial
();
bool
Forward
(
const
UMat
&
bottom_data
,
const
UMat
&
bottom_data2
,
const
UMat
&
weight
,
const
UMat
&
bias
,
UMat
&
top_data
,
int32_t
batch_size
);
void
setActivReLU
(
bool
fuse_activ
,
float
slope
);
void
setActivPReLU
(
bool
fuse_activ
,
std
::
vector
<
float
>
&
slope
);
void
setActivPower
(
bool
fuse_activ
,
float
power
);
void
setBias
(
bool
bias_term
);
private:
...
...
@@ -252,8 +255,8 @@ class OCL4DNNConvSpatial
int
lx
,
int
ly
,
int
lz
,
bool
swizzle
,
bool
nullLocal
);
void
generateTunerItems
(
std
::
vector
<
cv
::
Ptr
<
tunerParam
>
>
&
tunerItems
);
void
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
);
void
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
);
void
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
,
bool
fused_eltwise
);
void
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
bool
fused_eltwise
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
);
int32_t
group_
;
bool
bias_term_
;
...
...
@@ -305,6 +308,8 @@ class OCL4DNNConvSpatial
float
negative_slope_
;
UMat
negative_slope_umat_
;
ocl4dnnFusedActiv_t
fused_activ_
;
float
power_
;
bool
fused_eltwise_
;
};
typedef
enum
{
...
...
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
浏览文件 @
438e456c
...
...
@@ -79,6 +79,8 @@ OCL4DNNConvSpatial<Dtype>::OCL4DNNConvSpatial(OCL4DNNConvConfig config)
group_
=
config
.
group
;
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
fused_eltwise_
=
false
;
power_
=
1.
f
;
negative_slope_
=
0
;
prev_kernel_type_
=
-
1
;
tuned_
=
false
;
...
...
@@ -141,8 +143,11 @@ OCL4DNNConvSpatial<Dtype>::~OCL4DNNConvSpatial()
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
)
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionDefine
(
ocl4dnnFusedActiv_t
fused_activ
,
bool
fused_eltwise
)
{
if
(
fused_eltwise
)
addDef
(
"FUSED_CONV_ELTWISE"
,
1
);
switch
(
fused_activ
)
{
case
OCL4DNN_CONV_FUSED_ACTIV_RELU
:
addDef
(
"FUSED_CONV_RELU"
,
1
);
...
...
@@ -150,6 +155,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ)
case
OCL4DNN_CONV_FUSED_ACTIV_PRELU
:
addDef
(
"FUSED_CONV_PRELU"
,
1
);
break
;
case
OCL4DNN_CONV_FUSED_ACTIV_POWER
:
addDef
(
"FUSED_CONV_POWER"
,
1
);
break
;
default:
;
}
...
...
@@ -157,8 +165,11 @@ void OCL4DNNConvSpatial<Dtype>::setFusionDefine(ocl4dnnFusedActiv_t fused_activ)
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
)
void
OCL4DNNConvSpatial
<
Dtype
>::
setFusionArg
(
ocl4dnnFusedActiv_t
fused_activ
,
bool
fused_eltwise
,
ocl
::
Kernel
&
kernel
,
cl_uint
&
argIdx
)
{
if
(
fused_eltwise
)
kernel
.
set
(
argIdx
++
,
(
cl_mem
)
bottom_data2_
.
handle
(
ACCESS_READ
));
switch
(
fused_activ
)
{
case
OCL4DNN_CONV_FUSED_ACTIV_RELU
:
kernel
.
set
(
argIdx
++
,
(
float
)
negative_slope_
);
...
...
@@ -166,6 +177,9 @@ void OCL4DNNConvSpatial<Dtype>::setFusionArg(ocl4dnnFusedActiv_t fused_activ, oc
case
OCL4DNN_CONV_FUSED_ACTIV_PRELU
:
kernel
.
set
(
argIdx
++
,
(
cl_mem
)
negative_slope_umat_
.
handle
(
ACCESS_READ
));
break
;
case
OCL4DNN_CONV_FUSED_ACTIV_POWER
:
kernel
.
set
(
argIdx
++
,
(
float
)
power_
);
break
;
default:
;
}
...
...
@@ -255,7 +269,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"ALIGNED_NUM_FILTERS"
,
(
int
)
alignSize
(
M_
,
simd_size
));
addDef
(
"OUT_BLOCK_SIZE"
,
(
output_block_width
*
output_block_height
));
addDef
(
"APPLY_BIAS"
,
bias_term_
);
setFusionDefine
(
fused_activ_
);
setFusionDefine
(
fused_activ_
,
fused_eltwise_
);
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
...
...
@@ -277,7 +291,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"APPLY_BIAS"
,
bias_term_
);
addDef
(
"OUTPUT_Z"
,
M_
);
addDef
(
"ZPAR"
,
1
);
setFusionDefine
(
fused_activ_
);
setFusionDefine
(
fused_activ_
,
fused_eltwise_
);
src_
=
cv
::
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
...
...
@@ -314,7 +328,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
addDef
(
"TILE_N_LAST"
,
M_
%
32
);
addDef
(
"TILE_N_LAST_DIV8"
,
(
M_
%
32
)
/
8
);
addDef
(
"APPLY_BIAS"
,
bias_term_
);
setFusionDefine
(
fused_activ_
);
setFusionDefine
(
fused_activ_
,
fused_eltwise_
);
src_
=
ocl
::
dnn
::
conv_layer_spatial_oclsrc
;
}
}
...
...
@@ -370,14 +384,37 @@ void OCL4DNNConvSpatial<Dtype>::setActivPReLU(bool fuse_activ, std::vector<float
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
}
template
<
typename
Dtype
>
void
OCL4DNNConvSpatial
<
Dtype
>::
setActivPower
(
bool
fuse_activ
,
float
power
)
{
if
(
fuse_activ
)
{
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_POWER
;
power_
=
power
;
}
else
fused_activ_
=
OCL4DNN_CONV_FUSED_ACTIV_NONE
;
}
template
<
typename
Dtype
>
bool
OCL4DNNConvSpatial
<
Dtype
>::
Forward
(
const
UMat
&
bottom
,
const
UMat
&
bottom2
,
const
UMat
&
weight
,
const
UMat
&
bias
,
UMat
&
top
,
int32_t
numImages
)
{
num_
=
numImages
;
if
(
!
bottom2
.
empty
())
{
fused_eltwise_
=
true
;
bottom_data2_
=
bottom2
;
}
else
{
fused_eltwise_
=
false
;
}
prepareKernel
(
bottom
,
top
,
weight
,
bias
,
numImages
);
if
(
bestKernelConfig
.
empty
())
return
false
;
...
...
@@ -428,7 +465,8 @@ void OCL4DNNConvSpatial<Dtype>::generateKey()
<<
"p"
<<
pad_w_
<<
"x"
<<
pad_h_
<<
"_"
<<
"num"
<<
num_
<<
"_"
<<
"M"
<<
M_
<<
"_"
<<
"activ"
<<
fused_activ_
;
<<
"activ"
<<
fused_activ_
<<
"_"
<<
"eltwise"
<<
fused_eltwise_
;
key_
=
ocl
::
Device
::
getDefault
().
vendorName
()
+
"_EU"
+
cv
::
format
(
"%d"
,
ocl
::
Device
::
getDefault
().
maxComputeUnits
())
+
"_"
+
keyBuilder
.
str
();
...
...
@@ -678,7 +716,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
setFusionArg
(
fused_activ_
,
fused_eltwise_
,
kernel
,
argIdx
);
UMat
img_buffer
;
if
(
image_offset
)
...
...
@@ -771,7 +809,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
setFusionArg
(
fused_activ_
,
fused_eltwise_
,
kernel
,
argIdx
);
UMat
img_buffer
;
if
(
image_offset
)
...
...
@@ -888,7 +926,7 @@ bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
return
false
;
cl_uint
argIdx
=
0
;
setFusionArg
(
fused_activ_
,
kernel
,
argIdx
);
setFusionArg
(
fused_activ_
,
fused_eltwise_
,
kernel
,
argIdx
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
bottom
));
kernel
.
set
(
argIdx
++
,
image_offset
);
kernel
.
set
(
argIdx
++
,
ocl
::
KernelArg
::
PtrReadOnly
(
weight
));
...
...
@@ -1491,6 +1529,7 @@ void OCL4DNNConvSpatial<Dtype>::prepareKernel(const UMat &bottom, UMat &top,
if
(
loadCachedConfig
())
// check in-memory cache
return
;
if
(
loadTunedConfig
())
// check external storage
return
;
...
...
modules/dnn/src/opencl/conv_layer_spatial.cl
浏览文件 @
438e456c
...
...
@@ -52,12 +52,21 @@
#
elif
defined
(
FUSED_CONV_PRELU
)
#
define
ACTIVATION_RELU_FUNCTION
(
x,
c
)
((
Dtype
)(
x
)
>
0
?
(
Dtype
)(
x
)
:
((
Dtype
)(
x
)
*
(
Dtype
)(
negative_slope[c]
)))
#
define
NEGATIVE_SLOPE_ARG
__global
const
Dtype
*negative_slope,
#
elif
defined
(
FUSED_CONV_POWER
)
#
define
ACTIVATION_RELU_FUNCTION
(
x,
c
)
pow
(
x,
power
)
#
define
NEGATIVE_SLOPE_ARG
Dtype
power,
#
else
#
define
ACTIVATION_RELU_FUNCTION
(
x,
c
)
(
x
)
#
define
NEGATIVE_SLOPE_ARG
#
endif
#
ifdef
FUSED_CONV_ELTWISE
#
define
ACTIVATION_FUNCTION
(
_dst_,
_offset_,
_data_,
_channel_
)
do
{
(
_dst_
)
[
(
_offset_
)
]
=
ACTIVATION_RELU_FUNCTION
(
eltwise_data[
(
_offset_
)
]
+
(
_data_
)
,
_channel_
)
;} while(0)
#
define
ELTWISE_DATA_ARG
__global
Dtype*
eltwise_data,
#
else
#
define
ACTIVATION_FUNCTION
(
_dst_,
_offset_,
_data_,
_channel_
)
do
{
(
_dst_
)
[
(
_offset_
)
]
=
ACTIVATION_RELU_FUNCTION
(
_data_,
_channel_
)
;} while(0)
#
define
ELTWISE_DATA_ARG
#
endif
#
define
__CAT
(
x,
y
)
x##y
...
...
@@ -99,6 +108,7 @@
#ifdef KERNEL_BASIC
__kernel void ConvolveBasic(
ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG
__global Dtype* image_data,
int image_offset,
...
...
@@ -193,6 +203,7 @@ __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
#endif
__kernel void
convolve_simd(
ELTWISE_DATA_ARG
NEGATIVE_SLOPE_ARG
__global Dtype* inputs_base,
filter_qualifier Dtype* weights_base,
...
...
@@ -413,6 +424,7 @@ typedef struct float0 { float s0; } float0; //never used but makes compiler happ
#define ROW_PITCH input_width
#define GEMM_LIKE_KERNEL_ARGS \
ELTWISE_DATA_ARG \
NEGATIVE_SLOPE_ARG \
const __global Dtype *src0, \
const __global Dtype *src1, \
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录