Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
177dec94
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
177dec94
编写于
9月 20, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mgb/opr): add bgr2gray mode for cvtcolor opr
GitOrigin-RevId: d50415b236080a13d31c43158f9d03e2aef48d59
上级
000517c6
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
170 addition
and
5 deletion
+170
-5
dnn/src/arm_common/cvt_color/opr_impl.cpp
dnn/src/arm_common/cvt_color/opr_impl.cpp
+19
-4
dnn/src/common/cv/cvt_color.h
dnn/src/common/cv/cvt_color.h
+0
-1
dnn/src/cuda/cvt_color/cvt_color.cu
dnn/src/cuda/cvt_color/cvt_color.cu
+73
-0
dnn/src/naive/cvt_color/opr_impl.cpp
dnn/src/naive/cvt_color/opr_impl.cpp
+20
-0
dnn/src/x86/cvt_color/opr_impl.cpp
dnn/src/x86/cvt_color/opr_impl.cpp
+45
-0
dnn/test/common/cvt_color.h
dnn/test/common/cvt_color.h
+6
-0
imperative/python/test/unit/functional/test_functional.py
imperative/python/test/unit/functional/test_functional.py
+7
-0
未找到文件。
dnn/src/arm_common/cvt_color/opr_impl.cpp
浏览文件 @
177dec94
...
...
@@ -748,11 +748,16 @@ void cvt_BT601_yuv_transform(const Mat8u& src, Mat8u& dst) {
}
// namespace
template
<
bool
rgb
=
true
>
void
cvt_rgb2gray_32f_neon
(
const
Mat32f
&
src
,
Mat32f
&
dst
)
{
static
const
float
coef
[]
=
{
0.299
f
,
0.587
f
,
0.114
f
};
// load coef into neon types
const
float32x4_t
v_cr
(
vdupq_n_f32
(
coef
[
0
])),
v_cg
(
vdupq_n_f32
(
coef
[
1
])),
v_cb
(
vdupq_n_f32
(
coef
[
2
]));
float
coef_c0
=
rgb
?
coef
[
0
]
:
coef
[
2
];
float
coef_c1
=
coef
[
1
];
float
coef_c2
=
rgb
?
coef
[
2
]
:
coef
[
0
];
const
float32x4_t
v_cr
(
vdupq_n_f32
(
coef_c0
)),
v_cg
(
vdupq_n_f32
(
coef_c1
)),
v_cb
(
vdupq_n_f32
(
coef_c2
));
#define EXPAND(offset) \
v_src = vld3q_f32(psrc + offset * 3); \
...
...
@@ -796,7 +801,7 @@ void cvt_rgb2gray_32f_neon(const Mat32f& src, Mat32f& dst) {
}
// loop over left pixels
for
(;
psrc
<
pend
;
psrc
+=
3
,
pdst
+=
1
)
{
*
pdst
=
psrc
[
0
]
*
coef
[
0
]
+
psrc
[
1
]
*
coef
[
1
]
+
psrc
[
2
]
*
coef
[
2
]
;
*
pdst
=
psrc
[
0
]
*
coef
_c0
+
psrc
[
1
]
*
coef_c1
+
psrc
[
2
]
*
coef_c2
;
}
}
#undef EXPAND
...
...
@@ -1187,7 +1192,7 @@ void cvt_rgb2gray<float>(const Mat32f& src, Mat32f& dst) {
megdnn_assert
(
src
.
rows
()
==
dst
.
rows
());
megdnn_assert
(
src
.
cols
()
==
dst
.
cols
());
return
cvt_rgb2gray_32f_neon
(
src
,
dst
);
return
cvt_rgb2gray_32f_neon
<
true
>
(
src
,
dst
);
}
// gray2rgb
...
...
@@ -1381,6 +1386,16 @@ void cvt_bgr2gray<uchar>(const Mat8u& src, Mat8u& dst) {
}
}
template
<
>
void
cvt_bgr2gray
<
float
>
(
const
Mat32f
&
src
,
Mat32f
&
dst
)
{
megdnn_assert
(
src
.
channels
()
==
3
);
megdnn_assert
(
dst
.
channels
()
==
1
);
megdnn_assert
(
src
.
rows
()
==
dst
.
rows
());
megdnn_assert
(
src
.
cols
()
==
dst
.
cols
());
return
cvt_rgb2gray_32f_neon
<
false
>
(
src
,
dst
);
}
template
<
>
void
cvt_bgr2rgb
<
uchar
>
(
const
Mat8u
&
src
,
Mat8u
&
dst
)
{
return
cvt_rgb2bgr
<
uchar
>
(
src
,
dst
);
...
...
dnn/src/common/cv/cvt_color.h
浏览文件 @
177dec94
...
...
@@ -45,7 +45,6 @@
_cb(cvt_rgba2bgr, float) \
_cb(cvt_rgba2gray, float) \
_cb(cvt_rgb2bgr, float) \
_cb(cvt_bgr2gray, float) \
_cb(cvt_bgr2rgb, float) \
_cb(cvt_yuv2gray_nv21, float) \
_cb(cvt_yuv2rgb_nv21, float) \
...
...
dnn/src/cuda/cvt_color/cvt_color.cu
浏览文件 @
177dec94
...
...
@@ -145,6 +145,73 @@ __global__ void cvt_rgb2gray_32f_kernel(const float* src, float* dst,
}
}
__global__
void
cvt_bgr2gray_8u_kernel
(
const
uchar
*
src
,
uchar
*
dst
,
const
size_t
rows
,
const
size_t
cols
,
const
size_t
src_step
,
const
size_t
dst_step
)
{
size_t
t
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
t
<
(
rows
*
cols
)
/
U8_PROCESS_PER_THREADS_X
)
{
size_t
offset
=
t
*
U8_PROCESS_PER_THREADS_X
;
src
+=
3
*
offset
;
dst
+=
1
*
offset
;
uchar
temp_des
[
4
];
uchar
temp_src
[
12
];
*
((
uint3
*
)
temp_src
)
=
*
((
uint3
*
)
src
);
temp_des
[
0
]
=
(
temp_src
[
0
]
*
1868
+
temp_src
[
1
]
*
9617
+
temp_src
[
2
]
*
4899
+
(
1
<<
13
))
>>
14
;
temp_des
[
1
]
=
(
temp_src
[
3
]
*
1868
+
temp_src
[
4
]
*
9617
+
temp_src
[
5
]
*
4899
+
(
1
<<
13
))
>>
14
;
temp_des
[
2
]
=
(
temp_src
[
6
]
*
1868
+
temp_src
[
7
]
*
9617
+
temp_src
[
8
]
*
4899
+
(
1
<<
13
))
>>
14
;
temp_des
[
3
]
=
(
temp_src
[
9
]
*
1868
+
temp_src
[
10
]
*
9617
+
temp_src
[
11
]
*
4899
+
(
1
<<
13
))
>>
14
;
*
((
uint32_t
*
)
dst
)
=
*
((
uint32_t
*
)
temp_des
);
}
else
if
(
t
==
(
rows
*
cols
)
/
U8_PROCESS_PER_THREADS_X
)
{
size_t
rest
=
(
rows
*
cols
)
%
U8_PROCESS_PER_THREADS_X
;
if
(
rest
!=
0
)
{
size_t
offset
=
t
*
U8_PROCESS_PER_THREADS_X
;
src
+=
3
*
offset
;
dst
+=
1
*
offset
;
for
(
int
i
=
0
;
i
<
rest
;
i
++
,
src
+=
3
,
dst
+=
1
)
dst
[
0
]
=
(
src
[
0
]
*
1868
+
src
[
1
]
*
9617
+
src
[
2
]
*
4899
+
(
1
<<
13
))
>>
14
;
}
}
}
__global__
void
cvt_bgr2gray_32f_kernel
(
const
float
*
src
,
float
*
dst
,
const
size_t
rows
,
const
size_t
cols
,
const
size_t
src_step
,
const
size_t
dst_step
)
{
size_t
t
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
t
<
rows
*
cols
)
{
size_t
offset
=
t
;
src
+=
offset
*
3
;
dst
+=
offset
*
1
;
float
temp_src
[
3
],
temp_dst
;
*
((
float3
*
)
temp_src
)
=
*
((
float3
*
)
src
);
temp_dst
=
temp_src
[
0
]
*
0.114
f
+
temp_src
[
1
]
*
0.587
f
+
temp_src
[
2
]
*
0.299
f
;
dst
[
0
]
=
temp_dst
;
}
}
__global__
void
cvt_gray2rgb_8u_kernel
(
const
uchar
*
src
,
uchar
*
dst
,
const
size_t
rows
,
const
size_t
cols
,
const
size_t
src_step
,
...
...
@@ -683,6 +750,9 @@ void cvt_color_8u_proxy(const uchar* src, uchar* dst, const size_t src_rows,
case
CvtColor
::
Mode
::
RGB2GRAY
:
CALL_CVT_OPR_8U_KERNEL
(
rgb2gray
)
break
;
case
CvtColor
::
Mode
::
BGR2GRAY
:
CALL_CVT_OPR_8U_KERNEL
(
bgr2gray
)
break
;
case
CvtColor
::
Mode
::
RGB2YUV
:
CALL_CVT_OPR_8U_KERNEL
(
rgb2yuv
)
break
;
...
...
@@ -739,6 +809,9 @@ void cvt_color_32f_proxy(const float* src, float* dst, const size_t src_rows,
case
CvtColor
::
Mode
::
RGB2GRAY
:
CALL_CVT_OPR_32F_KERNEL
(
rgb2gray
)
break
;
case
CvtColor
::
Mode
::
BGR2GRAY
:
CALL_CVT_OPR_32F_KERNEL
(
bgr2gray
)
break
;
case
CvtColor
::
Mode
::
RGB2YUV
:
CALL_CVT_OPR_32F_KERNEL
(
rgb2yuv
)
break
;
...
...
dnn/src/naive/cvt_color/opr_impl.cpp
浏览文件 @
177dec94
...
...
@@ -684,6 +684,26 @@ void cvt_bgr2gray<uchar>(const Mat8u& src, Mat8u& dst) {
}
}
template
<
>
void
cvt_bgr2gray
<
float
>
(
const
Mat32f
&
src
,
Mat32f
&
dst
)
{
megdnn_assert
(
src
.
channels
()
==
3
);
megdnn_assert
(
dst
.
channels
()
==
1
);
megdnn_assert
(
src
.
rows
()
==
dst
.
rows
());
megdnn_assert
(
src
.
cols
()
==
dst
.
cols
());
const
float
coef_r
=
0.299
f
,
coef_g
=
0.587
f
,
coef_b
=
0.114
f
;
for
(
size_t
r
=
0
;
r
<
src
.
rows
();
++
r
)
{
for
(
size_t
c
=
0
;
c
<
src
.
cols
();
++
c
)
{
float
B
=
src
.
at
(
r
,
c
,
0
);
float
G
=
src
.
at
(
r
,
c
,
1
);
float
R
=
src
.
at
(
r
,
c
,
2
);
float
&
Y
=
dst
.
at
(
r
,
c
,
0
);
Y
=
R
*
coef_r
+
G
*
coef_g
+
B
*
coef_b
;
}
}
}
template
<
>
void
cvt_bgr2rgb
<
uchar
>
(
const
Mat8u
&
src
,
Mat8u
&
dst
)
{
return
cvt_rgb2bgr
<
uchar
>
(
src
,
dst
);
...
...
dnn/src/x86/cvt_color/opr_impl.cpp
浏览文件 @
177dec94
...
...
@@ -1311,6 +1311,41 @@ void cvt_rgb2gray_32f_SSE_4_2(const Mat32f& src, Mat32f& dst) {
}
}
MEGDNN_ATTRIBUTE_TARGET
(
"sse4.2"
)
void
cvt_bgr2gray_32f_SSE_4_2
(
const
Mat32f
&
src
,
Mat32f
&
dst
)
{
const
float
coef_r
=
0.299
f
,
coef_g
=
0.587
f
,
coef_b
=
0.114
f
;
__m128
v_coef_r
=
_mm_set1_ps
(
coef_r
);
__m128
v_coef_g
=
_mm_set1_ps
(
coef_g
);
__m128
v_coef_b
=
_mm_set1_ps
(
coef_b
);
for
(
size_t
r
=
0
;
r
<
src
.
rows
();
++
r
)
{
const
float
*
psrc
=
src
.
ptr
(
r
);
float
*
pdst
=
dst
.
ptr
(
r
);
const
float
*
const
pend
=
psrc
+
src
.
cols
()
*
3
;
__m128
v_r
,
v_g
,
v_b
,
ans
;
for
(;
psrc
<=
pend
-
4
*
3
;
psrc
+=
4
*
3
,
pdst
+=
4
)
{
v_b
=
_mm_set_ps
(
psrc
[
9
],
psrc
[
6
],
psrc
[
3
],
psrc
[
0
]);
v_b
=
_mm_mul_ps
(
v_b
,
v_coef_b
);
v_g
=
_mm_set_ps
(
psrc
[
10
],
psrc
[
7
],
psrc
[
4
],
psrc
[
1
]);
v_g
=
_mm_mul_ps
(
v_g
,
v_coef_g
);
v_r
=
_mm_set_ps
(
psrc
[
11
],
psrc
[
8
],
psrc
[
5
],
psrc
[
2
]);
v_r
=
_mm_mul_ps
(
v_r
,
v_coef_r
);
ans
=
_mm_add_ps
(
v_r
,
_mm_add_ps
(
v_g
,
v_b
));
_mm_storeu_ps
(
pdst
,
ans
);
}
for
(;
psrc
<
pend
;
psrc
+=
3
,
pdst
+=
1
)
{
pdst
[
0
]
=
psrc
[
1
]
*
coef_g
+
psrc
[
0
]
*
coef_b
+
psrc
[
2
]
*
coef_r
;
}
}
}
MEGDNN_ATTRIBUTE_TARGET
(
"sse4.2"
)
void
cvt_rgba2rgb_8u_SSE_4_2
(
const
Mat8u
&
src
,
Mat8u
&
dst
)
{
__m128i
dst_data0
,
dst_data1
,
dst_data2
;
...
...
@@ -1705,6 +1740,16 @@ void cvt_bgr2gray<uchar>(const Mat8u& src, Mat8u& dst) {
}
}
template
<
>
void
cvt_bgr2gray
<
float
>
(
const
Mat32f
&
src
,
Mat32f
&
dst
)
{
megdnn_assert
(
src
.
channels
()
==
3
);
megdnn_assert
(
dst
.
channels
()
==
1
);
megdnn_assert
(
src
.
rows
()
==
dst
.
rows
());
megdnn_assert
(
src
.
cols
()
==
dst
.
cols
());
return
cvt_bgr2gray_32f_SSE_4_2
(
src
,
dst
);
}
template
<
>
void
cvt_bgr2rgb
<
uchar
>
(
const
Mat8u
&
src
,
Mat8u
&
dst
)
{
return
cvt_rgb2bgr
<
uchar
>
(
src
,
dst
);
...
...
dnn/test/common/cvt_color.h
浏览文件 @
177dec94
...
...
@@ -133,6 +133,9 @@ inline std::vector<TestArg> get_cuda_args() {
for
(
size_t
i
=
2
;
i
<=
10
;
++
i
)
{
for
(
size_t
j
=
2
;
j
<=
10
;
++
j
)
{
cur_param
.
mode
=
Mode
::
RGB2GRAY
;
args
.
emplace_back
(
cur_param
,
TensorShape
{
1
,
i
,
j
,
3
},
dtype
::
Uint8
());
cur_param
.
mode
=
Mode
::
BGR2GRAY
;
args
.
emplace_back
(
cur_param
,
TensorShape
{
1
,
i
,
j
,
3
},
dtype
::
Uint8
());
cur_param
.
mode
=
Mode
::
RGB2YUV
;
...
...
@@ -146,6 +149,9 @@ inline std::vector<TestArg> get_cuda_args() {
dtype
::
Uint8
());
// float32 test
cur_param
.
mode
=
Mode
::
RGB2GRAY
;
args
.
emplace_back
(
cur_param
,
TensorShape
{
1
,
i
,
j
,
3
},
dtype
::
Float32
());
cur_param
.
mode
=
Mode
::
BGR2GRAY
;
args
.
emplace_back
(
cur_param
,
TensorShape
{
1
,
i
,
j
,
3
},
dtype
::
Float32
());
cur_param
.
mode
=
Mode
::
RGB2YUV
;
...
...
imperative/python/test/unit/functional/test_functional.py
浏览文件 @
177dec94
...
...
@@ -1057,12 +1057,19 @@ def test_cvt_color():
def
rgb2gray
(
rgb
):
return
np
.
dot
(
rgb
[...,
:
3
],
[
0.299
,
0.587
,
0.114
])
def
bgr2gray
(
bgr
):
return
np
.
dot
(
bgr
[...,
:
3
],
[
0.114
,
0.587
,
0.299
])
inp
=
np
.
random
.
randn
(
3
,
3
,
3
,
3
).
astype
(
np
.
float32
)
out
=
np
.
expand_dims
(
rgb2gray
(
inp
),
3
).
astype
(
np
.
float32
)
x
=
tensor
(
inp
)
y
=
F
.
vision
.
cvt_color
(
x
,
mode
=
"RGB2GRAY"
)
np
.
testing
.
assert_allclose
(
y
.
numpy
(),
out
,
atol
=
1e-5
)
out1
=
np
.
expand_dims
(
bgr2gray
(
inp
),
3
).
astype
(
np
.
float32
)
y1
=
F
.
vision
.
cvt_color
(
x
,
mode
=
"BGR2GRAY"
)
np
.
testing
.
assert_allclose
(
y1
.
numpy
(),
out1
,
atol
=
1e-5
)
@
pytest
.
mark
.
parametrize
(
"val"
,
[
2
,
[
2
,],
[
2
,
3
]])
def
test_ones
(
val
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录