Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Greenplum
Opencv
提交
e1e92614
O
Opencv
项目概览
Greenplum
/
Opencv
大约 1 年 前同步成功
通知
7
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
O
Opencv
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
未验证
提交
e1e92614
编写于
9月 16, 2022
作者:
A
Alexander Smorkalov
提交者:
GitHub
9月 16, 2022
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #22479 from scottchou007:master
Fix issues in opencv_test_dnn from conv48 kernels without bias
上级
819e5b19
a3cb2020
变更
4
展开全部
隐藏空白更改
内联
并排
Showing
4 changed file
with
1050 addition
and
1 deletion
+1050
-1
modules/dnn/src/vkcom/shader/conv48_nobias.comp
modules/dnn/src/vkcom/shader/conv48_nobias.comp
+134
-0
modules/dnn/src/vkcom/shader/conv48_nobias_spv.cpp
modules/dnn/src/vkcom/shader/conv48_nobias_spv.cpp
+913
-0
modules/dnn/src/vkcom/shader/spv_shader.hpp
modules/dnn/src/vkcom/shader/spv_shader.hpp
+1
-0
modules/dnn/src/vkcom/src/op_conv.cpp
modules/dnn/src/vkcom/src/op_conv.cpp
+2
-1
未找到文件。
modules/dnn/src/vkcom/shader/conv48_nobias.comp
0 → 100644
浏览文件 @
e1e92614
#version 450
layout (constant_id = 0) const int LOCAL_SZ_X = 0;
layout (constant_id = 1) const int LOCAL_SZ_Y = 0;
layout (constant_id = 2) const int LOCAL_SZ_Z = 0;
layout (constant_id = 3) const int IN_H = 0;
layout (constant_id = 4) const int IN_W = 0;
layout (constant_id = 5) const int OUT_W = 0;
layout (constant_id = 6) const int STRIDE_H = 0;
layout (constant_id = 7) const int STRIDE_W = 0;
layout (constant_id = 8) const int PAD_H = 0;
layout (constant_id = 9) const int PAD_W = 0;
layout (constant_id = 10) const int FILTER_H = 0;
layout (constant_id = 11) const int FILTER_W = 0;
layout (constant_id = 12) const int CHANNELS = 0;
layout (constant_id = 13) const int BATCH = 0;
layout (constant_id = 14) const int M = 0;
layout (constant_id = 15) const int K = 0;
layout (constant_id = 16) const int N = 0;
layout (constant_id = 17) const int TAIL_M = 0;
layout (constant_id = 18) const int DILATION_H = 0;
layout (constant_id = 19) const int DILATION_W = 0;
#if defined(ACTIVATION_RELU)
#define ACTIVATION_FUNCTION(x) clamp(x, vec4(0.0), vec4(999999999.0))
#elif defined(ACTIVATION_RELU1)
#define ACTIVATION_FUNCTION(x) clamp(x, vec4(-1.0), vec4(1.0))
#elif defined(ACTIVATION_RELU6)
#define ACTIVATION_FUNCTION(x) clamp(x, vec4(0.0), vec4(6.0))
#else
#define ACTIVATION_FUNCTION(x) (x)
#endif
layout(binding = 0) readonly buffer Input0{
float data[];
} src0;
layout(binding = 1) readonly buffer Input1 {
vec4 data[];
} bias;
layout(binding = 2) readonly buffer Input3{
vec4 data[];
} src1;
layout(binding = 3) writeonly buffer Output{
vec4 data[];
} out0;
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
#define VEC_SIZE 4
#define BLOCK_H 4
#define BLOCK_W 8
#define FILTER_AREA (FILTER_H * FILTER_W)
#define LOAD_A(elm_idx, a_component) \
src0_x = org_x + ((i * VEC_SIZE + elm_idx) % FILTER_W) * DILATION_W; \
src0_y = org_y + (((i * VEC_SIZE + elm_idx) % FILTER_AREA) / FILTER_W) * DILATION_H; \
src0_z = (i * VEC_SIZE + elm_idx) / FILTER_AREA; \
if(src0_y >= 0 && src0_y < IN_H && src0_x >= 0 && src0_x < IN_W) \
{ \
a_component = src0.data[input_batch_offset + src0_z * (IN_H * IN_W) + src0_y * IN_W + src0_x]; \
}
#define A_MULTIPLY_BTILE(a, sliver_num, comp) \
dst_x = (out_y + sliver_num) % OUT_W; \
dst_y = (out_y + sliver_num) / OUT_W; \
org_y = dst_y * STRIDE_H - PAD_H; \
org_x = dst_x * STRIDE_W - PAD_W; \
LOAD_A(0, a.x); \
LOAD_A(1, a.y); \
LOAD_A(2, a.z); \
LOAD_A(3, a.w); \
dot0.comp += dot(brow0, a); \
dot1.comp += dot(brow1, a); \
dot2.comp += dot(brow2, a); \
dot3.comp += dot(brow3, a); \
dot4.comp += dot(brow4, a); \
dot5.comp += dot(brow5, a); \
dot6.comp += dot(brow6, a); \
dot7.comp += dot(brow7, a);
void main()
{
int gx = int(gl_GlobalInvocationID.x);
int gy = int(gl_GlobalInvocationID.y);
int gz = int(gl_GlobalInvocationID.z);
int out_x = BLOCK_W * gx;
int out_y = BLOCK_H * gy;
int input_batch_offset = gz * IN_H * IN_W * CHANNELS;
int output_batch_offset = gz * M * N / VEC_SIZE;
if (out_x < N && gy < M / BLOCK_H)
{
int width0 = K / VEC_SIZE;
int width1 = N / VEC_SIZE;
int src1_read0_offset = out_x * width0;
vec4 dot0 = vec4(0.f);
vec4 dot1 = vec4(0.f);
vec4 dot2 = vec4(0.f);
vec4 dot3 = vec4(0.f);
vec4 dot4 = vec4(0.f);
vec4 dot5 = vec4(0.f);
vec4 dot6 = vec4(0.f);
vec4 dot7 = vec4(0.f);
int i = 0;
do
{
int dst_x, dst_y, org_x, org_y, src0_x, src0_y, src0_z;
vec4 a0 = vec4(0.f), a1 = vec4(0.f), a2 = vec4(0.f), a3 = vec4(0.f);
vec4 brow0 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow1 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow2 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow3 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow4 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow5 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow6 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
vec4 brow7 = src1.data[src1_read0_offset]; src1_read0_offset += width0;
src1_read0_offset += 1 - BLOCK_W * width0;
A_MULTIPLY_BTILE(a0, 0, x);
A_MULTIPLY_BTILE(a1, 1, y);
A_MULTIPLY_BTILE(a2, 2, z);
A_MULTIPLY_BTILE(a3, 3, w);
i++;
}
while( i < width0 );
out0.data[output_batch_offset + (out_x + 0) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot0);
out0.data[output_batch_offset + (out_x + 1) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot1);
out0.data[output_batch_offset + (out_x + 2) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot2);
out0.data[output_batch_offset + (out_x + 3) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot3);
out0.data[output_batch_offset + (out_x + 4) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot4);
out0.data[output_batch_offset + (out_x + 5) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot5);
out0.data[output_batch_offset + (out_x + 6) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot6);
out0.data[output_batch_offset + (out_x + 7) * M / VEC_SIZE + gy] = ACTIVATION_FUNCTION(dot7);
}
}
modules/dnn/src/vkcom/shader/conv48_nobias_spv.cpp
0 → 100644
浏览文件 @
e1e92614
此差异已折叠。
点击以展开。
modules/dnn/src/vkcom/shader/spv_shader.hpp
浏览文件 @
e1e92614
...
...
@@ -14,6 +14,7 @@ namespace cv { namespace dnn { namespace vkcom {
extern
const
unsigned
int
dw_conv_spv
[
1760
];
extern
const
unsigned
int
permute_spv
[
765
];
extern
const
unsigned
int
conv48_spv
[
7458
];
extern
const
unsigned
int
conv48_nobias_spv
[
7182
];
extern
const
unsigned
int
lrn_spv
[
1845
];
extern
const
unsigned
int
concat_spv
[
541
];
extern
const
unsigned
int
avg_pool_spv
[
1538
];
...
...
modules/dnn/src/vkcom/src/op_conv.cpp
浏览文件 @
e1e92614
...
...
@@ -167,7 +167,8 @@ bool OpConv::forward(Tensor& in, Tensor& filter_weights, Tensor& bias, Tensor& o
config_
.
local_size_z
=
1
;
config_
.
block_height
=
4
;
config_
.
block_width
=
8
;
createShaderModule
(
conv48_spv
,
sizeof
(
conv48_spv
));
has_bias_
?
createShaderModule
(
conv48_spv
,
sizeof
(
conv48_spv
))
:
createShaderModule
(
conv48_nobias_spv
,
sizeof
(
conv48_nobias_spv
));
// specialization constants
VkSpecializationInfo
spec_info
;
ShaderConstant
shader_constant
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录