Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
c7b6ef35
MegEngine
项目概览
MegEngine 天元
/
MegEngine
接近 2 年 前同步成功
通知
414
Star
4708
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
c7b6ef35
编写于
7月 08, 2020
作者:
M
Megvii Engine Team
提交者:
Xinran Xu
8月 25, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn/cuda): add warp perspective backward mat idx
GitOrigin-RevId: b4b494bb69eeffed564fb86e587e59fa409f7426
上级
a773d076
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
965 addition
and
580 deletion
+965
-580
dnn/include/megdnn/oprs/imgproc.h
dnn/include/megdnn/oprs/imgproc.h
+36
-0
dnn/src/common/warp_perspective.cpp
dnn/src/common/warp_perspective.cpp
+6
-4
dnn/src/cuda/warp_perspective/backward_data.cpp
dnn/src/cuda/warp_perspective/backward_data.cpp
+38
-16
dnn/src/cuda/warp_perspective/backward_data.cu
dnn/src/cuda/warp_perspective/backward_data.cu
+54
-33
dnn/src/cuda/warp_perspective/backward_mat.cpp
dnn/src/cuda/warp_perspective/backward_mat.cpp
+24
-8
dnn/src/cuda/warp_perspective/backward_mat.cu
dnn/src/cuda/warp_perspective/backward_mat.cu
+44
-36
dnn/src/cuda/warp_perspective/common.h
dnn/src/cuda/warp_perspective/common.h
+29
-34
dnn/src/cuda/warp_perspective/opr_impl.h
dnn/src/cuda/warp_perspective/opr_impl.h
+12
-6
dnn/src/naive/warp_perspective/opr_impl.cpp
dnn/src/naive/warp_perspective/opr_impl.cpp
+35
-14
dnn/src/naive/warp_perspective/opr_impl.h
dnn/src/naive/warp_perspective/opr_impl.h
+141
-114
dnn/test/common/warp_perspective.cpp
dnn/test/common/warp_perspective.cpp
+34
-5
dnn/test/common/warp_perspective.h
dnn/test/common/warp_perspective.h
+4
-0
dnn/test/cuda/warp_perspective.cpp
dnn/test/cuda/warp_perspective.cpp
+128
-104
src/opr/impl/imgproc.cpp
src/opr/impl/imgproc.cpp
+153
-39
src/opr/impl/imgproc.sereg.h
src/opr/impl/imgproc.sereg.h
+48
-2
src/opr/include/megbrain/opr/imgproc.h
src/opr/include/megbrain/opr/imgproc.h
+176
-165
src/opr/test/imgproc.cpp
src/opr/test/imgproc.cpp
+3
-0
未找到文件。
dnn/include/megdnn/oprs/imgproc.h
浏览文件 @
c7b6ef35
...
@@ -105,15 +105,32 @@ class WarpPerspectiveBackwardData: public WarpPerspectiveBase {
...
@@ -105,15 +105,32 @@ class WarpPerspectiveBackwardData: public WarpPerspectiveBase {
* \param[out] grad the backpropagated gradient wrt. src
* \param[out] grad the backpropagated gradient wrt. src
* \param[out] workspace temporary workspace to perform backward
* \param[out] workspace temporary workspace to perform backward
*/
*/
void
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
exec
(
mat
,
{},
diff
,
grad
,
workspace
);
}
virtual
void
exec
(
_megdnn_tensor_in
mat
,
virtual
void
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
=
0
;
_megdnn_workspace
workspace
)
=
0
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
mat
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
{
return
get_workspace_in_bytes
(
mat
,
{},
diff
,
grad
);
}
virtual
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
mat
,
virtual
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
=
0
;
const
TensorLayout
&
grad
)
=
0
;
protected:
protected:
void
check_exec
(
const
TensorLayout
&
mat
,
void
check_exec
(
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
TensorLayout
&
grad
,
size_t
workspace_in_bytes
);
size_t
workspace_in_bytes
);
...
@@ -129,18 +146,37 @@ class WarpPerspectiveBackwardMat: public WarpPerspectiveBase {
...
@@ -129,18 +146,37 @@ class WarpPerspectiveBackwardMat: public WarpPerspectiveBase {
* \param[out] grad the backpropagated gradient wrt. mat
* \param[out] grad the backpropagated gradient wrt. mat
* \param[out] workspace temporary workspace to perform backward
* \param[out] workspace temporary workspace to perform backward
*/
*/
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
exec
(
src
,
mat
,
{},
diff
,
grad
,
workspace
);
}
virtual
void
exec
(
_megdnn_tensor_in
src
,
virtual
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
=
0
;
_megdnn_workspace
workspace
)
=
0
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
{
return
get_workspace_in_bytes
(
src
,
mat
,
{},
diff
,
grad
);
}
virtual
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
virtual
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
=
0
;
const
TensorLayout
&
grad
)
=
0
;
protected:
protected:
void
check_exec
(
const
TensorLayout
&
src
,
void
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
TensorLayout
&
grad
,
size_t
workspace_in_bytes
);
size_t
workspace_in_bytes
);
...
...
dnn/src/common/warp_perspective.cpp
浏览文件 @
c7b6ef35
...
@@ -255,29 +255,31 @@ void WarpPerspectiveForward::check_exec_allow_nhwc_mat_idx(
...
@@ -255,29 +255,31 @@ void WarpPerspectiveForward::check_exec_allow_nhwc_mat_idx(
}
}
void
WarpPerspectiveBackwardData
::
check_exec
(
const
TensorLayout
&
mat
,
void
WarpPerspectiveBackwardData
::
check_exec
(
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
TensorLayout
&
grad
,
size_t
workspace_in_bytes
)
{
size_t
workspace_in_bytes
)
{
check_layout_fwd
(
grad
,
mat
,
diff
);
check_layout_fwd
(
grad
,
mat
,
mat_idx
,
diff
);
megdnn_assert
(
grad
.
dtype
==
dtype
::
Float32
()
MEGDNN_INC_FLOAT16
(
megdnn_assert
(
grad
.
dtype
==
dtype
::
Float32
()
MEGDNN_INC_FLOAT16
(
||
grad
.
dtype
==
dtype
::
BFloat16
()),
||
grad
.
dtype
==
dtype
::
BFloat16
()),
"Backward WarpPerspective only supports Float32/BFloat16."
);
"Backward WarpPerspective only supports Float32/BFloat16."
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
mat
,
diff
,
grad
);
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
mat
,
mat_idx
,
diff
,
grad
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
}
}
void
WarpPerspectiveBackwardMat
::
check_exec
(
const
TensorLayout
&
src
,
void
WarpPerspectiveBackwardMat
::
check_exec
(
const
TensorLayout
&
src
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
,
const
TensorLayout
&
grad
,
size_t
workspace_in_bytes
)
{
size_t
workspace_in_bytes
)
{
check_layout_fwd
(
src
,
mat
,
diff
);
check_layout_fwd
(
src
,
mat
,
mat_idx
,
diff
);
megdnn_assert_eq_layout
(
mat
,
grad
);
megdnn_assert_eq_layout
(
mat
,
grad
);
megdnn_assert
(
grad
.
dtype
==
dtype
::
Float32
()
MEGDNN_INC_FLOAT16
(
megdnn_assert
(
grad
.
dtype
==
dtype
::
Float32
()
MEGDNN_INC_FLOAT16
(
||
grad
.
dtype
==
dtype
::
BFloat16
()),
||
grad
.
dtype
==
dtype
::
BFloat16
()),
"Backward WarpPerspective only supports Float32/BFloat16."
);
"Backward WarpPerspective only supports Float32/BFloat16."
);
auto
required_workspace_in_bytes
=
auto
required_workspace_in_bytes
=
get_workspace_in_bytes
(
src
,
mat
,
diff
,
grad
);
get_workspace_in_bytes
(
src
,
mat
,
mat_idx
,
diff
,
grad
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
megdnn_assert
(
workspace_in_bytes
>=
required_workspace_in_bytes
);
}
}
...
...
dnn/src/cuda/warp_perspective/backward_data.cpp
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "src/cuda/warp_perspective/opr_impl.h"
#include "src/cuda/warp_perspective/opr_impl.h"
...
@@ -18,8 +19,8 @@ namespace megdnn {
...
@@ -18,8 +19,8 @@ namespace megdnn {
namespace
cuda
{
namespace
cuda
{
WorkspaceBundle
WarpPerspectiveBackwardDataImpl
::
get_workspace_bundle
(
WorkspaceBundle
WarpPerspectiveBackwardDataImpl
::
get_workspace_bundle
(
void
*
ptr
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
diff
,
void
*
ptr
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
grad
)
const
{
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
const
{
SmallVector
<
size_t
>
sizes
;
SmallVector
<
size_t
>
sizes
;
TensorLayout
fmat
=
mat
;
TensorLayout
fmat
=
mat
;
TensorLayout
fdiff
=
diff
;
TensorLayout
fdiff
=
diff
;
...
@@ -33,20 +34,24 @@ WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle(
...
@@ -33,20 +34,24 @@ WorkspaceBundle WarpPerspectiveBackwardDataImpl::get_workspace_bundle(
get_workspace
(
fmat
);
get_workspace
(
fmat
);
get_workspace
(
fdiff
);
get_workspace
(
fdiff
);
get_workspace
(
fgrad
);
get_workspace
(
fgrad
);
sizes
.
push_back
(
get_float32_workspace_in_bytes
(
fmat
,
fdiff
,
fgrad
));
sizes
.
push_back
(
get_float32_workspace_in_bytes
(
fmat
,
mat_idx
,
fdiff
,
fgrad
));
return
{
ptr
,
std
::
move
(
sizes
)};
return
{
ptr
,
std
::
move
(
sizes
)};
}
}
void
WarpPerspectiveBackwardDataImpl
::
exec
(
_megdnn_tensor_in
smat
,
void
WarpPerspectiveBackwardDataImpl
::
exec
(
_megdnn_tensor_in
smat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
sdiff
,
_megdnn_tensor_in
sdiff
,
_megdnn_tensor_out
sgrad
,
_megdnn_tensor_out
sgrad
,
_megdnn_workspace
sworkspace
)
{
_megdnn_workspace
sworkspace
)
{
check_exec
(
smat
.
layout
,
sdiff
.
layout
,
sgrad
.
layout
,
sworkspace
.
size
);
check_exec
(
smat
.
layout
,
mat_idx
.
layout
,
sdiff
.
layout
,
sgrad
.
layout
,
sworkspace
.
size
);
TensorND
mat
=
smat
;
TensorND
mat
=
smat
;
TensorND
diff
=
sdiff
;
TensorND
diff
=
sdiff
;
TensorND
grad
=
sgrad
;
TensorND
grad
=
sgrad
;
auto
bundle
=
get_workspace_bundle
(
sworkspace
.
raw_ptr
,
smat
.
layout
,
auto
bundle
=
sdiff
.
layout
,
sgrad
.
layout
);
get_workspace_bundle
(
sworkspace
.
raw_ptr
,
smat
.
layout
,
mat_idx
.
layout
,
sdiff
.
layout
,
sgrad
.
layout
);
auto
ctypecvt
=
CompTypeCvter
<
dtype
::
BFloat16
,
dtype
::
Float32
>
(
auto
ctypecvt
=
CompTypeCvter
<
dtype
::
BFloat16
,
dtype
::
Float32
>
(
concrete_handle
(
this
->
handle
()),
&
bundle
);
concrete_handle
(
this
->
handle
()),
&
bundle
);
if
(
sgrad
.
layout
.
dtype
.
enumv
()
==
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
if
(
sgrad
.
layout
.
dtype
.
enumv
()
==
DTypeTrait
<
dtype
::
BFloat16
>::
enumv
)
{
...
@@ -60,6 +65,15 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
...
@@ -60,6 +65,15 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
auto
N
=
grad
.
layout
.
shape
[
0
],
C
=
grad
.
layout
.
shape
[
1
],
auto
N
=
grad
.
layout
.
shape
[
0
],
C
=
grad
.
layout
.
shape
[
1
],
IH
=
grad
.
layout
.
shape
[
2
],
IW
=
grad
.
layout
.
shape
[
3
],
IH
=
grad
.
layout
.
shape
[
2
],
IW
=
grad
.
layout
.
shape
[
3
],
OH
=
diff
.
layout
.
shape
[
2
],
OW
=
diff
.
layout
.
shape
[
3
];
OH
=
diff
.
layout
.
shape
[
2
],
OW
=
diff
.
layout
.
shape
[
3
];
int
*
midx_ptr
=
nullptr
;
if
(
mat_idx
.
raw_ptr
)
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
1
);
N
=
mat_idx
.
layout
.
shape
[
0
];
midx_ptr
=
mat_idx
.
ptr
<
int
>
();
}
else
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
0
);
}
auto
bval
=
param
().
border_val
;
auto
bval
=
param
().
border_val
;
auto
bmode
=
warp_perspective
::
get_bmode
(
param
().
bmode
);
auto
bmode
=
warp_perspective
::
get_bmode
(
param
().
bmode
);
...
@@ -67,10 +81,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
...
@@ -67,10 +81,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
size_t
max_batch_x_channel
=
max_batch_x_channel_size
();
size_t
max_batch_x_channel
=
max_batch_x_channel_size
();
if
(
batch_x_channel_size
<=
max_batch_x_channel
)
{
if
(
batch_x_channel_size
<=
max_batch_x_channel
)
{
warp_perspective
::
backward_data_proxy
(
warp_perspective
::
backward_data_proxy
(
mat
.
ptr
<
dt_float32
>
(),
diff
.
ptr
<
dt_float32
>
(),
mat
.
ptr
<
dt_float32
>
(),
midx_ptr
,
diff
.
ptr
<
dt_float32
>
(),
grad
.
ptr
<
dt_float32
>
(),
grad
.
ptr
<
dt_float32
>
(),
reinterpret_cast
<
float
*>
(
workspace
.
raw_ptr
),
N
,
C
,
IH
,
IW
,
reinterpret_cast
<
float
*>
(
workspace
.
raw_ptr
),
N
,
OH
,
OW
,
bval
,
bmode
,
stream
);
grad
.
layout
.
shape
[
0
],
C
,
IH
,
IW
,
OH
,
OW
,
bval
,
bmode
,
stream
);
}
else
{
}
else
{
dt_float32
*
mat_ptr
=
mat
.
ptr
<
dt_float32
>
();
dt_float32
*
mat_ptr
=
mat
.
ptr
<
dt_float32
>
();
dt_float32
*
diff_ptr
=
diff
.
ptr
<
dt_float32
>
();
dt_float32
*
diff_ptr
=
diff
.
ptr
<
dt_float32
>
();
...
@@ -80,10 +95,10 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
...
@@ -80,10 +95,10 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
size_t
curr_batch_size
=
size_t
curr_batch_size
=
N
>
max_batch_size
?
max_batch_size
:
N
;
N
>
max_batch_size
?
max_batch_size
:
N
;
warp_perspective
::
backward_data_proxy
(
warp_perspective
::
backward_data_proxy
(
mat_ptr
,
diff_ptr
,
grad_ptr
,
mat_ptr
,
midx_ptr
,
diff_ptr
,
grad_ptr
,
reinterpret_cast
<
float
*>
(
workspace
.
raw_ptr
),
reinterpret_cast
<
float
*>
(
workspace
.
raw_ptr
),
curr_batch_size
,
C
,
IH
,
IW
,
OH
,
OW
,
bval
,
bmode
,
curr_batch_size
,
grad
.
layout
.
shape
[
0
],
C
,
IH
,
IW
,
OH
,
stream
);
OW
,
bval
,
bmode
,
stream
);
if
(
N
<=
max_batch_size
)
{
if
(
N
<=
max_batch_size
)
{
break
;
break
;
...
@@ -91,7 +106,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
...
@@ -91,7 +106,11 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
N
-=
max_batch_size
;
N
-=
max_batch_size
;
mat_ptr
+=
curr_batch_size
*
mat
.
layout
.
stride
[
0
];
mat_ptr
+=
curr_batch_size
*
mat
.
layout
.
stride
[
0
];
diff_ptr
+=
curr_batch_size
*
diff
.
layout
.
stride
[
0
];
diff_ptr
+=
curr_batch_size
*
diff
.
layout
.
stride
[
0
];
grad_ptr
+=
curr_batch_size
*
grad
.
layout
.
stride
[
0
];
if
(
midx_ptr
==
nullptr
)
{
grad_ptr
+=
curr_batch_size
*
grad
.
layout
.
stride
[
0
];
}
else
{
midx_ptr
+=
curr_batch_size
;
}
}
}
}
}
}
}
...
@@ -102,8 +121,8 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
...
@@ -102,8 +121,8 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in smat,
}
}
size_t
WarpPerspectiveBackwardDataImpl
::
get_float32_workspace_in_bytes
(
size_t
WarpPerspectiveBackwardDataImpl
::
get_float32_workspace_in_bytes
(
const
TensorLayout
&
/* mat */
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
/* mat */
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
grad
)
const
{
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
const
{
auto
N
=
grad
.
shape
[
0
],
C
=
grad
.
shape
[
1
],
IH
=
grad
.
shape
[
2
],
auto
N
=
grad
.
shape
[
0
],
C
=
grad
.
shape
[
1
],
IH
=
grad
.
shape
[
2
],
IW
=
grad
.
shape
[
3
];
IW
=
grad
.
shape
[
3
];
auto
OH
=
diff
.
shape
[
2
],
OW
=
diff
.
shape
[
3
];
auto
OH
=
diff
.
shape
[
2
],
OW
=
diff
.
shape
[
3
];
...
@@ -112,6 +131,9 @@ size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes(
...
@@ -112,6 +131,9 @@ size_t WarpPerspectiveBackwardDataImpl::get_float32_workspace_in_bytes(
size_t
max_batch_size
=
N
;
size_t
max_batch_size
=
N
;
size_t
max_batch_x_channel
=
max_batch_x_channel_size
();
size_t
max_batch_x_channel
=
max_batch_x_channel_size
();
if
(
N
*
C
>
max_batch_x_channel
)
{
if
(
N
*
C
>
max_batch_x_channel
)
{
/* when batch size is too large, the workspace only contains part of grad,
this will cause out of range with mat idx */
megdnn_assert
(
mat_idx
.
ndim
==
0
,
"batch size is too large, it's unsupported with mat idx backward."
);
max_batch_size
=
max_batch_x_channel
/
C
;
max_batch_size
=
max_batch_x_channel
/
C
;
}
}
...
...
dnn/src/cuda/warp_perspective/backward_data.cu
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "src/cuda/warp_perspective/common.h"
#include "src/cuda/warp_perspective/common.h"
...
@@ -20,16 +21,21 @@ namespace warp_perspective {
...
@@ -20,16 +21,21 @@ namespace warp_perspective {
const
int
factor
=
4
;
const
int
factor
=
4
;
template
<
typename
Getter
,
int
factor
>
template
<
typename
Getter
,
int
factor
>
__global__
void
warp_perspective_bwd_data_kernel
(
const
float
*
hidden
,
__global__
void
warp_perspective_bwd_data_kernel
(
const
float
*
hidden
,
const
float
*
mat
,
float
*
dst
,
const
float
*
mat
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
)
const
int
*
midx
,
float
*
dst
,
{
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
)
{
Getter
getter
;
Getter
getter
;
int
n
=
blockIdx
.
z
;
int
n
=
blockIdx
.
z
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
hidden
+=
n
*
C
*
OH
*
OW
;
hidden
+=
n
*
C
*
OH
*
OW
;
dst
+=
n
*
C
*
factor
*
IH
*
IW
;
if
(
midx
)
{
dst
+=
midx
[
n
]
*
C
*
factor
*
IH
*
IW
;
}
else
{
dst
+=
n
*
C
*
factor
*
IH
*
IW
;
}
mat
+=
n
*
3
*
3
;
mat
+=
n
*
3
*
3
;
if
(
ow
<
OW
&&
oh
<
OH
)
{
if
(
ow
<
OW
&&
oh
<
OH
)
{
float
denominator
=
mat
[
6
]
*
ow
+
mat
[
7
]
*
oh
+
mat
[
8
];
float
denominator
=
mat
[
6
]
*
ow
+
mat
[
7
]
*
oh
+
mat
[
8
];
...
@@ -72,15 +78,19 @@ __global__ void add_up_kernel(const float *src, float *dst,
...
@@ -72,15 +78,19 @@ __global__ void add_up_kernel(const float *src, float *dst,
}
}
template
<
int
factor
>
template
<
int
factor
>
__global__
void
warp_perspective_bwd_data_constant_kernel
(
const
float
*
hidden
,
__global__
void
warp_perspective_bwd_data_constant_kernel
(
const
float
*
mat
,
float
*
dst
,
const
float
*
hidden
,
const
float
*
mat
,
const
int
*
midx
,
float
*
dst
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
)
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
)
{
{
int
n
=
blockIdx
.
z
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
hidden
+=
blockIdx
.
z
*
C
*
OH
*
OW
;
hidden
+=
n
*
C
*
OH
*
OW
;
dst
+=
blockIdx
.
z
*
C
*
factor
*
IH
*
IW
;
if
(
midx
)
{
mat
+=
blockIdx
.
z
*
3
*
3
;
dst
+=
midx
[
n
]
*
C
*
factor
*
IH
*
IW
;
}
else
{
dst
+=
n
*
C
*
factor
*
IH
*
IW
;
}
mat
+=
n
*
3
*
3
;
if
(
ow
<
OW
&&
oh
<
OH
)
{
if
(
ow
<
OW
&&
oh
<
OH
)
{
float
denominator
=
mat
[
6
]
*
ow
+
mat
[
7
]
*
oh
+
mat
[
8
];
float
denominator
=
mat
[
6
]
*
ow
+
mat
[
7
]
*
oh
+
mat
[
8
];
float
iw
=
(
mat
[
0
]
*
ow
+
mat
[
1
]
*
oh
+
mat
[
2
])
/
denominator
;
float
iw
=
(
mat
[
0
]
*
ow
+
mat
[
1
]
*
oh
+
mat
[
2
])
/
denominator
;
...
@@ -119,30 +129,35 @@ __global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden,
...
@@ -119,30 +129,35 @@ __global__ void warp_perspective_bwd_data_constant_kernel(const float *hidden,
}
}
}
}
size_t
get_backward_data_workspace_in_bytes
(
size_t
get_backward_data_workspace_in_bytes
(
int
N
,
int
C
,
int
IH
,
int
IW
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
/* OH */
,
int
/* OW */
,
int
/* OH */
,
int
/* OW */
,
BorderMode
/* bmode */
)
BorderMode
/* bmode */
)
{
{
return
N
*
C
*
IH
*
IW
*
factor
*
sizeof
(
float
);
return
N
*
C
*
IH
*
IW
*
factor
*
sizeof
(
float
);
}
}
void
backward_data_proxy
(
const
float
*
mat
,
const
float
*
diff
,
void
backward_data_proxy
(
const
float
*
mat
,
const
int
*
midx
,
const
float
*
diff
,
float
*
grad
,
float
*
workspace
,
float
*
grad
,
float
*
workspace
,
int
N
,
int
N_SRC
,
int
C
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
BorderMode
mode
,
cudaStream_t
stream
)
BorderMode
mode
,
cudaStream_t
stream
)
{
{
(
void
)
bval
;
(
void
)
bval
;
(
void
)
grad
;
(
void
)
grad
;
const
int
BY
=
16
,
BX
=
32
;
const
int
BY
=
16
,
BX
=
32
;
{
{
dim3
threads
(
BX
,
BY
);
dim3
threads
(
BX
,
BY
);
dim3
blocks
((
OW
+
BX
-
1
)
/
BX
,
(
OH
+
BY
-
1
)
/
BY
,
N
);
dim3
blocks
((
OW
+
BX
-
1
)
/
BX
,
(
OH
+
BY
-
1
)
/
BY
,
N
);
cuda_check
(
cudaMemsetAsync
(
workspace
,
0
,
sizeof
(
float
)
*
factor
*
N
*
C
*
IH
*
IW
,
if
(
midx
)
{
cuda_check
(
cudaMemsetAsync
(
workspace
,
0
,
sizeof
(
float
)
*
factor
*
N_SRC
*
C
*
IH
*
IW
,
stream
));
stream
));
#define DISPATCH(Getter) \
}
else
{
warp_perspective_bwd_data_kernel<Getter, factor><<<blocks, threads, \
cuda_check
(
cudaMemsetAsync
(
workspace
,
0
,
0, stream>>>(diff, mat, workspace, N, C, IH, IW, OH, OW);
sizeof
(
float
)
*
factor
*
N
*
C
*
IH
*
IW
,
stream
));
}
#define DISPATCH(Getter) \
warp_perspective_bwd_data_kernel<Getter, factor> \
<<<blocks, threads, 0, stream>>>(diff, mat, midx, workspace, N, C, \
IH, IW, OH, OW);
switch
(
mode
)
{
switch
(
mode
)
{
case
BORDER_REPLICATE
:
case
BORDER_REPLICATE
:
DISPATCH
(
ReplicateGetter
);
DISPATCH
(
ReplicateGetter
);
...
@@ -158,8 +173,9 @@ void backward_data_proxy(const float *mat, const float *diff,
...
@@ -158,8 +173,9 @@ void backward_data_proxy(const float *mat, const float *diff,
break
;
break
;
case
BORDER_CONSTANT
:
case
BORDER_CONSTANT
:
warp_perspective_bwd_data_constant_kernel
<
factor
>
warp_perspective_bwd_data_constant_kernel
<
factor
>
<<<
blocks
,
threads
,
0
,
stream
>>>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
diff
,
mat
,
midx
,
(
diff
,
mat
,
workspace
,
N
,
C
,
IH
,
IW
,
OH
,
OW
);
workspace
,
N
,
C
,
IH
,
IW
,
OH
,
OW
);
break
;
break
;
default:
default:
break
;
break
;
...
@@ -169,9 +185,15 @@ void backward_data_proxy(const float *mat, const float *diff,
...
@@ -169,9 +185,15 @@ void backward_data_proxy(const float *mat, const float *diff,
{
{
int
THREADS
=
512
;
int
THREADS
=
512
;
dim3
threads
(
THREADS
);
dim3
threads
(
THREADS
);
dim3
blocks
((
IH
*
IW
+
THREADS
-
1
)
/
THREADS
,
N
*
C
);
if
(
midx
)
{
add_up_kernel
<
factor
><<<
blocks
,
threads
,
0
,
stream
>>>
(
workspace
,
grad
,
dim3
blocks
((
IH
*
IW
+
THREADS
-
1
)
/
THREADS
,
N_SRC
*
C
);
IH
*
IW
);
add_up_kernel
<
factor
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
workspace
,
grad
,
IH
*
IW
);
}
else
{
dim3
blocks
((
IH
*
IW
+
THREADS
-
1
)
/
THREADS
,
N
*
C
);
add_up_kernel
<
factor
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
workspace
,
grad
,
IH
*
IW
);
}
}
}
after_kernel_launch
();
after_kernel_launch
();
}
}
...
@@ -181,4 +203,3 @@ void backward_data_proxy(const float *mat, const float *diff,
...
@@ -181,4 +203,3 @@ void backward_data_proxy(const float *mat, const float *diff,
}
// namespace megdnn
}
// namespace megdnn
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/cuda/warp_perspective/backward_mat.cpp
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "src/cuda/warp_perspective/opr_impl.h"
#include "src/cuda/warp_perspective/opr_impl.h"
...
@@ -40,15 +41,17 @@ WorkspaceBundle WarpPerspectiveBackwardMatImpl::get_workspace_bundle(
...
@@ -40,15 +41,17 @@ WorkspaceBundle WarpPerspectiveBackwardMatImpl::get_workspace_bundle(
void
WarpPerspectiveBackwardMatImpl
::
exec
(
_megdnn_tensor_in
ssrc
,
void
WarpPerspectiveBackwardMatImpl
::
exec
(
_megdnn_tensor_in
ssrc
,
_megdnn_tensor_in
smat
,
_megdnn_tensor_in
smat
,
_megdnn_tensor_in
smat_idx
,
_megdnn_tensor_in
sdiff
,
_megdnn_tensor_in
sdiff
,
_megdnn_tensor_out
sgrad
,
_megdnn_tensor_out
sgrad
,
_megdnn_workspace
sworkspace
)
{
_megdnn_workspace
sworkspace
)
{
check_exec
(
ssrc
.
layout
,
smat
.
layout
,
s
diff
.
layout
,
sgrad
.
layout
,
check_exec
(
ssrc
.
layout
,
smat
.
layout
,
s
mat_idx
.
layout
,
sdiff
.
layout
,
sworkspace
.
size
);
s
grad
.
layout
,
s
workspace
.
size
);
TensorND
src
=
ssrc
;
TensorND
src
=
ssrc
;
TensorND
mat
=
smat
;
TensorND
mat
=
smat
;
TensorND
diff
=
sdiff
;
TensorND
diff
=
sdiff
;
TensorND
grad
=
sgrad
;
TensorND
grad
=
sgrad
;
TensorND
mat_idx
=
smat_idx
;
auto
bundle
=
get_workspace_bundle
(
sworkspace
.
raw_ptr
,
ssrc
.
layout
,
auto
bundle
=
get_workspace_bundle
(
sworkspace
.
raw_ptr
,
ssrc
.
layout
,
smat
.
layout
,
sdiff
.
layout
,
sgrad
.
layout
);
smat
.
layout
,
sdiff
.
layout
,
sgrad
.
layout
);
auto
ctypecvt
=
CompTypeCvter
<
dtype
::
BFloat16
,
dtype
::
Float32
>
(
auto
ctypecvt
=
CompTypeCvter
<
dtype
::
BFloat16
,
dtype
::
Float32
>
(
...
@@ -64,6 +67,15 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
...
@@ -64,6 +67,15 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
auto
N
=
src
.
layout
.
shape
[
0
],
C
=
src
.
layout
.
shape
[
1
],
auto
N
=
src
.
layout
.
shape
[
0
],
C
=
src
.
layout
.
shape
[
1
],
IH
=
src
.
layout
.
shape
[
2
],
IW
=
src
.
layout
.
shape
[
3
],
IH
=
src
.
layout
.
shape
[
2
],
IW
=
src
.
layout
.
shape
[
3
],
OH
=
diff
.
layout
.
shape
[
2
],
OW
=
diff
.
layout
.
shape
[
3
];
OH
=
diff
.
layout
.
shape
[
2
],
OW
=
diff
.
layout
.
shape
[
3
];
int
*
midx_ptr
=
nullptr
;
if
(
mat_idx
.
raw_ptr
)
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
1
);
N
=
mat_idx
.
layout
.
shape
[
0
];
midx_ptr
=
mat_idx
.
ptr
<
int
>
();
}
else
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
0
);
}
auto
bval
=
param
().
border_val
;
auto
bval
=
param
().
border_val
;
auto
bmode
=
warp_perspective
::
get_bmode
(
param
().
bmode
);
auto
bmode
=
warp_perspective
::
get_bmode
(
param
().
bmode
);
...
@@ -71,7 +83,7 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
...
@@ -71,7 +83,7 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
size_t
max_batch_x_channel
=
max_batch_x_channel_size
();
size_t
max_batch_x_channel
=
max_batch_x_channel_size
();
if
(
batch_x_channel_size
<=
max_batch_x_channel
)
{
if
(
batch_x_channel_size
<=
max_batch_x_channel
)
{
warp_perspective
::
backward_mat_proxy
(
warp_perspective
::
backward_mat_proxy
(
src
.
ptr
<
dt_float32
>
(),
mat
.
ptr
<
dt_float32
>
(),
src
.
ptr
<
dt_float32
>
(),
mat
.
ptr
<
dt_float32
>
(),
midx_ptr
,
diff
.
ptr
<
dt_float32
>
(),
grad
.
ptr
<
dt_float32
>
(),
N
,
C
,
IH
,
diff
.
ptr
<
dt_float32
>
(),
grad
.
ptr
<
dt_float32
>
(),
N
,
C
,
IH
,
IW
,
OH
,
OW
,
bval
,
bmode
,
stream
);
IW
,
OH
,
OW
,
bval
,
bmode
,
stream
);
}
else
{
}
else
{
...
@@ -84,14 +96,19 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
...
@@ -84,14 +96,19 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
size_t
curr_batch_size
=
size_t
curr_batch_size
=
N
>
max_batch_size
?
max_batch_size
:
N
;
N
>
max_batch_size
?
max_batch_size
:
N
;
warp_perspective
::
backward_mat_proxy
(
warp_perspective
::
backward_mat_proxy
(
src_ptr
,
mat_ptr
,
diff_ptr
,
grad_ptr
,
curr_batch_size
,
src_ptr
,
mat_ptr
,
midx_ptr
,
diff_ptr
,
grad_ptr
,
C
,
IH
,
IW
,
OH
,
OW
,
bval
,
bmode
,
stream
);
curr_batch_size
,
C
,
IH
,
IW
,
OH
,
OW
,
bval
,
bmode
,
stream
);
if
(
N
<=
max_batch_size
)
{
if
(
N
<=
max_batch_size
)
{
break
;
break
;
}
else
{
}
else
{
N
-=
max_batch_size
;
N
-=
max_batch_size
;
src_ptr
+=
curr_batch_size
*
src
.
layout
.
stride
[
0
];
if
(
midx_ptr
==
nullptr
)
{
src_ptr
+=
curr_batch_size
*
src
.
layout
.
stride
[
0
];
}
else
{
midx_ptr
+=
curr_batch_size
;
}
mat_ptr
+=
curr_batch_size
*
mat
.
layout
.
stride
[
0
];
mat_ptr
+=
curr_batch_size
*
mat
.
layout
.
stride
[
0
];
diff_ptr
+=
curr_batch_size
*
diff
.
layout
.
stride
[
0
];
diff_ptr
+=
curr_batch_size
*
diff
.
layout
.
stride
[
0
];
grad_ptr
+=
curr_batch_size
*
grad
.
layout
.
stride
[
0
];
grad_ptr
+=
curr_batch_size
*
grad
.
layout
.
stride
[
0
];
...
@@ -109,4 +126,3 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
...
@@ -109,4 +126,3 @@ void WarpPerspectiveBackwardMatImpl::exec(_megdnn_tensor_in ssrc,
}
// namespace megdnn
}
// namespace megdnn
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/cuda/warp_perspective/backward_mat.cu
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "src/cuda/warp_perspective/common.h"
#include "src/cuda/warp_perspective/common.h"
...
@@ -20,17 +21,21 @@ namespace cuda {
...
@@ -20,17 +21,21 @@ namespace cuda {
namespace
warp_perspective
{
namespace
warp_perspective
{
template
<
typename
Getter
>
template
<
typename
Getter
>
__global__
void
warp_perspective_bwd_mat_kernel
(
const
float
*
hidden
,
__global__
void
warp_perspective_bwd_mat_kernel
(
const
float
*
in
,
const
float
*
mat
,
float
*
grad
,
const
float
*
hidden
,
const
float
*
in
,
const
float
*
mat
,
const
int
*
midx
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
)
float
*
grad
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
)
{
{
Getter
getter
;
Getter
getter
;
int
n
=
blockIdx
.
z
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
hidden
+=
blockIdx
.
z
*
C
*
OH
*
OW
;
hidden
+=
blockIdx
.
z
*
C
*
OH
*
OW
;
in
+=
blockIdx
.
z
*
C
*
IH
*
IW
;
if
(
midx
)
{
mat
+=
blockIdx
.
z
*
3
*
3
;
in
+=
midx
[
n
]
*
C
*
IH
*
IW
;
grad
+=
blockIdx
.
z
*
3
*
3
;
}
else
{
in
+=
n
*
C
*
IH
*
IW
;
}
mat
+=
n
*
3
*
3
;
grad
+=
n
*
3
*
3
;
float
grad_local
[
3
*
3
];
float
grad_local
[
3
*
3
];
memset
(
grad_local
,
0
,
sizeof
(
grad_local
));
memset
(
grad_local
,
0
,
sizeof
(
grad_local
));
if
(
ow
<
OW
&&
oh
<
OH
)
{
if
(
ow
<
OW
&&
oh
<
OH
)
{
...
@@ -83,9 +88,8 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden,
...
@@ -83,9 +88,8 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden,
dh
[
8
]
=
1.0
f
*
ddenominatorh
;
dh
[
8
]
=
1.0
f
*
ddenominatorh
;
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
9
;
++
i
)
{
for
(
int
i
=
0
;
i
<
9
;
++
i
)
{
grad_local
[
i
]
+=
grad_local
[
i
]
+=
hidden
[
oh
*
OW
+
ow
]
*
dalpha
*
dh
[
i
]
+
hidden
[
oh
*
OW
+
ow
]
*
dalpha
*
dh
[
i
]
+
hidden
[
oh
*
OW
+
ow
]
*
dbeta
*
dw
[
i
];
hidden
[
oh
*
OW
+
ow
]
*
dbeta
*
dw
[
i
];
}
}
hidden
+=
OH
*
OW
;
hidden
+=
OH
*
OW
;
in
+=
IH
*
IW
;
in
+=
IH
*
IW
;
...
@@ -125,17 +129,21 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden,
...
@@ -125,17 +129,21 @@ __global__ void warp_perspective_bwd_mat_kernel(const float *hidden,
}
}
}
}
__global__
void
warp_perspective_bwd_mat_constant_kernel
(
const
float
*
hidden
,
__global__
void
warp_perspective_bwd_mat_constant_kernel
(
const
float
*
in
,
const
float
*
mat
,
float
*
grad
,
const
float
*
hidden
,
const
float
*
in
,
const
float
*
mat
,
const
int
*
midx
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
)
float
*
grad
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
)
{
{
int
n
=
blockIdx
.
z
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
ow
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
int
oh
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
hidden
+=
blockIdx
.
z
*
C
*
OH
*
OW
;
hidden
+=
blockIdx
.
z
*
C
*
OH
*
OW
;
in
+=
blockIdx
.
z
*
C
*
IH
*
IW
;
if
(
midx
)
{
mat
+=
blockIdx
.
z
*
3
*
3
;
in
+=
midx
[
n
]
*
C
*
IH
*
IW
;
grad
+=
blockIdx
.
z
*
3
*
3
;
}
else
{
float
grad_local
[
3
*
3
];
in
+=
n
*
C
*
IH
*
IW
;
}
mat
+=
n
*
3
*
3
;
grad
+=
n
*
3
*
3
;
float
grad_local
[
3
*
3
];
memset
(
grad_local
,
0
,
sizeof
(
grad_local
));
memset
(
grad_local
,
0
,
sizeof
(
grad_local
));
if
(
ow
<
OW
&&
oh
<
OH
)
{
if
(
ow
<
OW
&&
oh
<
OH
)
{
float
numeratorw
=
mat
[
0
]
*
ow
+
mat
[
1
]
*
oh
+
mat
[
2
];
float
numeratorw
=
mat
[
0
]
*
ow
+
mat
[
1
]
*
oh
+
mat
[
2
];
...
@@ -199,10 +207,10 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden,
...
@@ -199,10 +207,10 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden,
dh
[
8
]
=
1.0
f
*
ddenominatorh
;
dh
[
8
]
=
1.0
f
*
ddenominatorh
;
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
9
;
++
i
)
{
for
(
int
i
=
0
;
i
<
9
;
++
i
)
{
float
delta
=
float
delta
=
hidden
[
oh
*
OW
+
ow
]
*
dalpha
*
dh
[
i
]
+
hidden
[
oh
*
OW
+
ow
]
*
dalpha
*
dh
[
i
]
+
hidden
[
oh
*
OW
+
ow
]
*
dbeta
*
dw
[
i
];
hidden
[
oh
*
OW
+
ow
]
*
dbeta
*
dw
[
i
];
if
(
isfinite
(
delta
))
if
(
isfinite
(
delta
))
grad_local
[
i
]
+=
delta
;
grad_local
[
i
]
+=
delta
;
}
}
hidden
+=
OH
*
OW
;
hidden
+=
OH
*
OW
;
in
+=
IH
*
IW
;
in
+=
IH
*
IW
;
...
@@ -227,8 +235,9 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden,
...
@@ -227,8 +235,9 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden,
for
(
int
k
=
16
;
k
>=
1
;
k
>>=
1
)
{
for
(
int
k
=
16
;
k
>=
1
;
k
>>=
1
)
{
if
(
tidx
<
k
)
{
if
(
tidx
<
k
)
{
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
9
;
++
i
)
for
(
int
i
=
0
;
i
<
9
;
++
i
)
grad_shared
[
tidy
][
tidx
][
i
]
+=
grad_shared
[
tidy
][
tidx
+
k
][
i
];
grad_shared
[
tidy
][
tidx
][
i
]
+=
grad_shared
[
tidy
][
tidx
+
k
][
i
];
}
}
cub
::
WARP_SYNC
(
0xffffffff
);
cub
::
WARP_SYNC
(
0xffffffff
);
}
}
...
@@ -240,18 +249,17 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden,
...
@@ -240,18 +249,17 @@ __global__ void warp_perspective_bwd_mat_constant_kernel(const float *hidden,
}
}
}
}
void
backward_mat_proxy
(
const
float
*
src
,
const
float
*
mat
,
void
backward_mat_proxy
(
const
float
*
src
,
const
float
*
mat
,
const
int
*
midx
,
const
float
*
diff
,
float
*
grad
,
const
float
*
diff
,
float
*
grad
,
int
N
,
int
C
,
int
IH
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
BorderMode
mode
,
BorderMode
mode
,
cudaStream_t
stream
)
cudaStream_t
stream
)
{
{
const
int
BY
=
16
,
BX
=
32
;
const
int
BY
=
16
,
BX
=
32
;
dim3
threads
(
BX
,
BY
);
dim3
threads
(
BX
,
BY
);
dim3
blocks
((
OW
+
BX
-
1
)
/
BX
,
(
OH
+
BY
-
1
)
/
BY
,
N
);
dim3
blocks
((
OW
+
BX
-
1
)
/
BX
,
(
OH
+
BY
-
1
)
/
BY
,
N
);
cuda_check
(
cudaMemsetAsync
(
grad
,
0
,
sizeof
(
float
)
*
N
*
3
*
3
,
stream
));
cuda_check
(
cudaMemsetAsync
(
grad
,
0
,
sizeof
(
float
)
*
N
*
3
*
3
,
stream
));
#define DISPATCH(Getter) \
#define DISPATCH(Getter)
\
warp_perspective_bwd_mat_kernel<Getter><<<blocks, threads, 0, stream>>>( \
warp_perspective_bwd_mat_kernel<Getter><<<blocks, threads, 0, stream>>>( \
diff, src, mat, grad, N, C, IH, IW, OH, OW);
diff, src, mat,
midx,
grad, N, C, IH, IW, OH, OW);
switch
(
mode
)
{
switch
(
mode
)
{
case
BORDER_REPLICATE
:
case
BORDER_REPLICATE
:
DISPATCH
(
ReplicateGetter
);
DISPATCH
(
ReplicateGetter
);
...
@@ -266,8 +274,9 @@ void backward_mat_proxy(const float *src, const float *mat,
...
@@ -266,8 +274,9 @@ void backward_mat_proxy(const float *src, const float *mat,
DISPATCH
(
WrapGetter
);
DISPATCH
(
WrapGetter
);
break
;
break
;
case
BORDER_CONSTANT
:
case
BORDER_CONSTANT
:
warp_perspective_bwd_mat_constant_kernel
<<<
blocks
,
threads
,
0
,
stream
>>>
(
warp_perspective_bwd_mat_constant_kernel
<<<
blocks
,
threads
,
0
,
diff
,
src
,
mat
,
grad
,
N
,
C
,
IH
,
IW
,
OH
,
OW
,
bval
);
stream
>>>
(
diff
,
src
,
mat
,
midx
,
grad
,
N
,
C
,
IH
,
IW
,
OH
,
OW
,
bval
);
break
;
break
;
default:
default:
break
;
break
;
...
@@ -281,4 +290,3 @@ void backward_mat_proxy(const float *src, const float *mat,
...
@@ -281,4 +290,3 @@ void backward_mat_proxy(const float *src, const float *mat,
}
// namespace megdnn
}
// namespace megdnn
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/cuda/warp_perspective/common.h
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#pragma once
#pragma once
#include <cuda_runtime_api.h>
#include <cuda_runtime_api.h>
...
@@ -19,40 +20,34 @@ namespace warp_perspective {
...
@@ -19,40 +20,34 @@ namespace warp_perspective {
// all these kernels use bilinear interpolation
// all these kernels use bilinear interpolation
template
<
typename
ctype
>
template
<
typename
ctype
>
void
forward_proxy
(
void
forward_proxy
(
bool
is_nhwc
,
const
ctype
*
src
,
const
float
*
mat
,
bool
is_nhwc
,
const
int
*
mat_idx
,
ctype
*
dst
,
int
N_SRC
,
int
N_MAT
,
int
C
,
const
ctype
*
src
,
const
float
*
mat
,
const
int
*
mat_idx
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
ctype
bval
,
BorderMode
bmode
,
ctype
*
dst
,
int
N_SRC
,
int
N_MAT
,
megcore
::
AsyncErrorInfo
*
error_info
,
void
*
error_tracker
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
ctype
bval
,
cudaStream_t
stream
);
BorderMode
bmode
,
megcore
::
AsyncErrorInfo
*
error_info
,
void
*
error_tracker
,
cudaStream_t
stream
);
template
<
typename
ctype
>
template
<
typename
ctype
>
void
forward_proxy_nchw4
(
void
forward_proxy_nchw4
(
const
ctype
*
src
,
const
float
*
mat
,
const
int
*
mat_idx
,
const
ctype
*
src
,
const
float
*
mat
,
const
int
*
mat_idx
,
ctype
*
dst
,
int
N_SRC
,
int
N_MAT
,
int
C
,
int
IH
,
ctype
*
dst
,
int
N_SRC
,
int
N_MAT
,
int
IW
,
int
OH
,
int
OW
,
ctype
bval
,
BorderMode
bmode
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
ctype
bval
,
megcore
::
AsyncErrorInfo
*
error_info
,
BorderMode
bmode
,
void
*
error_tracker
,
cudaStream_t
stream
);
megcore
::
AsyncErrorInfo
*
error_info
,
void
*
error_tracker
,
cudaStream_t
stream
);
void
backward_data_proxy
(
const
float
*
mat
,
const
int
*
midx
,
const
float
*
diff
,
float
*
grad
,
float
*
workspace
,
int
N
,
int
N_SRC
,
int
C
,
void
backward_data_proxy
(
const
float
*
mat
,
const
float
*
diff
,
float
*
grad
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
float
*
workspace
,
BorderMode
bmode
,
cudaStream_t
stream
);
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
size_t
get_backward_data_workspace_in_bytes
(
int
N
,
int
C
,
int
IH
,
int
IW
,
BorderMode
bmode
,
cudaStream_t
stream
);
int
OH
,
int
OW
,
BorderMode
bmode
);
size_t
get_backward_data_workspace_in_bytes
(
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
void
backward_mat_proxy
(
const
float
*
src
,
const
float
*
mat
,
const
int
*
midx
,
BorderMode
bmode
);
const
float
*
diff
,
float
*
grad
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
BorderMode
bmode
,
void
backward_mat_proxy
(
cudaStream_t
stream
);
const
float
*
src
,
const
float
*
mat
,
const
float
*
diff
,
float
*
grad
,
int
N
,
int
C
,
int
IH
,
int
IW
,
int
OH
,
int
OW
,
float
bval
,
}
// namespace warp_perspective
BorderMode
bmode
,
cudaStream_t
stream
);
}
// namespace cuda
}
// namespace megdnn
}
// namespace warp_perspective
}
// namespace cuda
}
// namespace megdnn
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
dnn/src/cuda/warp_perspective/opr_impl.h
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#pragma once
#pragma once
#include "megdnn/oprs.h"
#include "megdnn/oprs.h"
...
@@ -48,20 +49,24 @@ class WarpPerspectiveBackwardDataImpl final
...
@@ -48,20 +49,24 @@ class WarpPerspectiveBackwardDataImpl final
:
public
WarpPerspectiveBackwardData
{
:
public
WarpPerspectiveBackwardData
{
public:
public:
using
WarpPerspectiveBackwardData
::
WarpPerspectiveBackwardData
;
using
WarpPerspectiveBackwardData
::
WarpPerspectiveBackwardData
;
void
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
diff
,
void
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
override
;
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
mat
,
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
override
{
const
TensorLayout
&
grad
)
override
{
return
get_workspace_bundle
(
nullptr
,
mat
,
diff
,
grad
)
return
get_workspace_bundle
(
nullptr
,
mat
,
mat_idx
,
diff
,
grad
)
.
total_size_in_bytes
();
.
total_size_in_bytes
();
}
}
private:
private:
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
TensorLayout
&
mat
,
WorkspaceBundle
get_workspace_bundle
(
void
*
ptr
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
const
;
const
TensorLayout
&
grad
)
const
;
size_t
get_float32_workspace_in_bytes
(
const
TensorLayout
&
mat
,
size_t
get_float32_workspace_in_bytes
(
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat_idx
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
const
;
const
TensorLayout
&
grad
)
const
;
};
};
...
@@ -70,10 +75,11 @@ class WarpPerspectiveBackwardMatImpl final : public WarpPerspectiveBackwardMat {
...
@@ -70,10 +75,11 @@ class WarpPerspectiveBackwardMatImpl final : public WarpPerspectiveBackwardMat {
public:
public:
using
WarpPerspectiveBackwardMat
::
WarpPerspectiveBackwardMat
;
using
WarpPerspectiveBackwardMat
::
WarpPerspectiveBackwardMat
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_workspace
workspace
)
override
;
_megdnn_
tensor_out
grad
,
_megdnn_
workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
src
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
mat
,
const
TensorLayout
&
/* mat_idx */
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
diff
,
const
TensorLayout
&
grad
)
override
{
const
TensorLayout
&
grad
)
override
{
return
get_workspace_bundle
(
nullptr
,
src
,
mat
,
diff
,
grad
)
return
get_workspace_bundle
(
nullptr
,
src
,
mat
,
diff
,
grad
)
...
...
dnn/src/naive/warp_perspective/opr_impl.cpp
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "src/naive/warp_perspective/opr_impl.h"
#include "src/naive/warp_perspective/opr_impl.h"
#include "src/naive/warp_perspective/warp_perspective_cv.h"
#include "src/naive/warp_perspective/warp_perspective_cv.h"
...
@@ -358,18 +359,29 @@ void WarpPerspectiveForwardImpl::exec(_megdnn_tensor_in src,
...
@@ -358,18 +359,29 @@ void WarpPerspectiveForwardImpl::exec(_megdnn_tensor_in src,
}
}
template
<
typename
ctype
,
typename
mtype
>
template
<
typename
ctype
,
typename
mtype
>
void
WarpPerspectiveBackwardDataImpl
::
kern_naive
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
)
{
void
WarpPerspectiveBackwardDataImpl
::
kern_naive
(
const
int
N
=
kern_param
.
n
,
C
=
kern_param
.
c
,
const
KernParam
<
ctype
,
mtype
>&
kern_param
)
{
IH
=
kern_param
.
ih
,
IW
=
kern_param
.
iw
;
const
int
N
=
kern_param
.
n_mat
,
C
=
kern_param
.
c
,
IH
=
kern_param
.
ih
,
IW
=
kern_param
.
iw
;
const
int
OH
=
kern_param
.
oh
,
OW
=
kern_param
.
ow
;
const
int
OH
=
kern_param
.
oh
,
OW
=
kern_param
.
ow
;
const
ctype
*
hptr_
=
kern_param
.
hptr
;
const
ctype
*
hptr_
=
kern_param
.
hptr
;
const
mtype
*
mptr_
=
kern_param
.
mptr
;
const
mtype
*
mptr_
=
kern_param
.
mptr
;
ctype
*
sptr_
=
kern_param
.
sptr
;
ctype
*
sptr_
=
kern_param
.
sptr
;
int
*
midx_ptr
=
kern_param
.
midx_ptr
;
auto
hptr
=
hptr_
;
auto
hptr
=
hptr_
;
auto
mptr
=
mptr_
;
auto
mptr
=
mptr_
;
auto
sptr
=
sptr_
;
auto
sptr
=
sptr_
;
std
::
memset
(
sptr
,
0
,
sizeof
(
ctype
)
*
N
*
C
*
IH
*
IW
);
if
(
midx_ptr
)
{
std
::
memset
(
sptr
,
0
,
sizeof
(
ctype
)
*
kern_param
.
n_src
*
C
*
IH
*
IW
);
}
else
{
std
::
memset
(
sptr
,
0
,
sizeof
(
ctype
)
*
N
*
C
*
IH
*
IW
);
}
rep
(
n
,
N
)
{
rep
(
n
,
N
)
{
if
(
midx_ptr
)
{
sptr
=
sptr_
+
midx_ptr
[
n
]
*
C
*
IH
*
IW
;
}
else
{
sptr
=
sptr_
+
n
*
C
*
IH
*
IW
;
}
rep
(
oh
,
OH
)
rep
(
ow
,
OW
)
{
rep
(
oh
,
OH
)
rep
(
ow
,
OW
)
{
float
numeratorw
=
mptr
[
0
]
*
ow
+
mptr
[
1
]
*
oh
+
mptr
[
2
];
float
numeratorw
=
mptr
[
0
]
*
ow
+
mptr
[
1
]
*
oh
+
mptr
[
2
];
float
numeratorh
=
mptr
[
3
]
*
ow
+
mptr
[
4
]
*
oh
+
mptr
[
5
];
float
numeratorh
=
mptr
[
3
]
*
ow
+
mptr
[
4
]
*
oh
+
mptr
[
5
];
...
@@ -404,27 +416,30 @@ void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam<ctype, mtype>&
...
@@ -404,27 +416,30 @@ void WarpPerspectiveBackwardDataImpl::kern_naive(const KernParam<ctype, mtype>&
}
}
}
}
}
}
sptr
+=
C
*
IH
*
IW
;
hptr
+=
C
*
OH
*
OW
;
hptr
+=
C
*
OH
*
OW
;
mptr
+=
3
*
3
;
mptr
+=
3
*
3
;
}
}
}
}
void
WarpPerspectiveBackwardDataImpl
::
exec
(
_megdnn_tensor_in
mat
,
void
WarpPerspectiveBackwardDataImpl
::
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
_megdnn_workspace
workspace
)
{
check_exec
(
mat
.
layout
,
diff
.
layout
,
grad
.
layout
,
workspace
.
size
);
check_exec
(
mat
.
layout
,
mat_idx
.
layout
,
diff
.
layout
,
grad
.
layout
,
workspace
.
size
);
megdnn_assert
(
param
().
format
==
param
::
WarpPerspective
::
Format
::
NCHW
,
megdnn_assert
(
param
().
format
==
param
::
WarpPerspective
::
Format
::
NCHW
,
"invalid warp_perspective format"
);
"invalid warp_perspective format"
);
#define DISPATCH_ST_MT(dt, ct) \
#define DISPATCH_ST_MT(dt, ct) \
if (diff.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \
if (diff.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \
if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \
if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \
auto kparam = KernParam<ct, float>::from_tensors(mat, diff, grad); \
auto kparam = KernParam<ct, float>::from_tensors(mat, mat_idx, \
diff, grad); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
return; \
return; \
} else { \
} else { \
auto kparam = KernParam<ct, ct>::from_tensors(mat, diff, grad); \
auto kparam = \
KernParam<ct, ct>::from_tensors(mat, mat_idx, diff, grad); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
return; \
return; \
} \
} \
...
@@ -441,7 +456,7 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat,
...
@@ -441,7 +456,7 @@ void WarpPerspectiveBackwardDataImpl::exec(_megdnn_tensor_in mat,
template
<
typename
ctype
,
typename
mtype
>
template
<
typename
ctype
,
typename
mtype
>
void
WarpPerspectiveBackwardMatImpl
::
kern_naive
(
void
WarpPerspectiveBackwardMatImpl
::
kern_naive
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
)
{
const
KernParam
<
ctype
,
mtype
>&
kern_param
)
{
const
int
N
=
kern_param
.
n
,
C
=
kern_param
.
c
,
IH
=
kern_param
.
ih
,
const
int
N
=
kern_param
.
n
_mat
,
C
=
kern_param
.
c
,
IH
=
kern_param
.
ih
,
IW
=
kern_param
.
iw
;
IW
=
kern_param
.
iw
;
const
int
OH
=
kern_param
.
oh
,
OW
=
kern_param
.
ow
;
const
int
OH
=
kern_param
.
oh
,
OW
=
kern_param
.
ow
;
...
@@ -449,9 +464,15 @@ void WarpPerspectiveBackwardMatImpl::kern_naive(
...
@@ -449,9 +464,15 @@ void WarpPerspectiveBackwardMatImpl::kern_naive(
auto
sptr
=
kern_param
.
sptr
;
auto
sptr
=
kern_param
.
sptr
;
auto
mptr
=
kern_param
.
mptr
;
auto
mptr
=
kern_param
.
mptr
;
auto
res
=
kern_param
.
res
;
auto
res
=
kern_param
.
res
;
auto
midx_ptr
=
kern_param
.
midx_ptr
;
auto
border_val
=
kern_param
.
border_val
;
auto
border_val
=
kern_param
.
border_val
;
std
::
memset
(
res
,
0
,
sizeof
(
float
)
*
N
*
3
*
3
);
std
::
memset
(
res
,
0
,
sizeof
(
float
)
*
N
*
3
*
3
);
rep
(
n
,
N
)
{
rep
(
n
,
N
)
{
if
(
midx_ptr
)
{
sptr
=
kern_param
.
sptr
+
midx_ptr
[
n
]
*
C
*
IH
*
IW
;
}
else
{
sptr
=
kern_param
.
sptr
+
n
*
C
*
IH
*
IW
;
}
rep
(
oh
,
OH
)
rep
(
ow
,
OW
)
{
rep
(
oh
,
OH
)
rep
(
ow
,
OW
)
{
float
numeratorw
=
mptr
[
0
]
*
ow
+
mptr
[
1
]
*
oh
+
mptr
[
2
];
float
numeratorw
=
mptr
[
0
]
*
ow
+
mptr
[
1
]
*
oh
+
mptr
[
2
];
float
numeratorh
=
mptr
[
3
]
*
ow
+
mptr
[
4
]
*
oh
+
mptr
[
5
];
float
numeratorh
=
mptr
[
3
]
*
ow
+
mptr
[
4
]
*
oh
+
mptr
[
5
];
...
@@ -537,7 +558,6 @@ void WarpPerspectiveBackwardMatImpl::kern_naive(
...
@@ -537,7 +558,6 @@ void WarpPerspectiveBackwardMatImpl::kern_naive(
}
}
}
}
hptr
+=
C
*
OH
*
OW
;
hptr
+=
C
*
OH
*
OW
;
sptr
+=
C
*
IH
*
IW
;
mptr
+=
3
*
3
;
mptr
+=
3
*
3
;
res
+=
3
*
3
;
res
+=
3
*
3
;
}
}
...
@@ -545,21 +565,22 @@ void WarpPerspectiveBackwardMatImpl::kern_naive(
...
@@ -545,21 +565,22 @@ void WarpPerspectiveBackwardMatImpl::kern_naive(
void
WarpPerspectiveBackwardMatImpl
::
exec
(
_megdnn_tensor_in
src
,
void
WarpPerspectiveBackwardMatImpl
::
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
{
_megdnn_workspace
workspace
)
{
check_exec
(
src
.
layout
,
mat
.
layout
,
diff
.
layout
,
grad
.
layout
,
check_exec
(
src
.
layout
,
mat
.
layout
,
mat_idx
.
layout
,
diff
.
layout
,
grad
.
layout
,
workspace
.
size
);
workspace
.
size
);
#define DISPATCH_ST_MT(dt, ct) \
#define DISPATCH_ST_MT(dt, ct) \
if (src.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \
if (src.layout.dtype.enumv() == DTypeTrait<dt>::enumv) { \
if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \
if (mat.layout.dtype.enumv() == DTypeTrait<dtype::Float32>::enumv) { \
auto kparam = KernParam<ct, float>::from_tensors( \
auto kparam = KernParam<ct, float>::from_tensors( \
param().border_val, src, mat,
diff, grad);
\
param().border_val, src, mat,
mat_idx, diff, grad);
\
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
return; \
return; \
} else { \
} else { \
auto kparam = KernParam<ct, ct>::from_tensors( \
auto kparam = KernParam<ct, ct>::from_tensors( \
param().border_val, src, mat,
diff, grad);
\
param().border_val, src, mat,
mat_idx, diff, grad);
\
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
MEGDNN_DISPATCH_CPU_KERN_OPR(kern_naive(kparam)); \
return; \
return; \
} \
} \
...
...
dnn/src/naive/warp_perspective/opr_impl.h
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#pragma once
#pragma once
#include "megdnn/oprs.h"
#include "megdnn/oprs.h"
...
@@ -15,144 +16,158 @@
...
@@ -15,144 +16,158 @@
namespace
megdnn
{
namespace
megdnn
{
namespace
naive
{
namespace
naive
{
class
WarpPerspectiveForwardImpl
:
public
WarpPerspectiveForward
{
class
WarpPerspectiveForwardImpl
:
public
WarpPerspectiveForward
{
protected:
protected:
using
Format
=
Param
::
Format
;
using
Format
=
Param
::
Format
;
template
<
typename
ctype
,
typename
mtype
>
template
<
typename
ctype
,
typename
mtype
>
struct
KernParam
{
struct
KernParam
{
Format
format
;
Format
format
;
BorderMode
bmode
;
BorderMode
bmode
;
float
border_val
;
float
border_val
;
size_t
n_src
,
n_mat
,
c
,
ih
,
iw
,
oh
,
ow
;
size_t
n_src
,
n_mat
,
c
,
ih
,
iw
,
oh
,
ow
;
ctype
*
sptr
,
*
dptr
;
ctype
*
sptr
,
*
dptr
;
mtype
*
mptr
;
mtype
*
mptr
;
int
*
midx_ptr
;
//!< can be null
int
*
midx_ptr
;
//!< can be null
Workspace
workspace
;
Workspace
workspace
;
static
KernParam
from_tensors
(
static
KernParam
from_tensors
(
Format
format
,
BorderMode
bmode
,
Format
format
,
BorderMode
bmode
,
float
border_val
,
float
border_val
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_out
dst
,
_megdnn_tensor_in
mat_idx
,
_megdnn_workspace
workspace
)
{
_megdnn_tensor_out
dst
,
KernParam
ret
;
_megdnn_workspace
workspace
)
{
ret
.
format
=
format
;
KernParam
ret
;
ret
.
bmode
=
bmode
;
ret
.
format
=
format
;
ret
.
border_val
=
border_val
;
ret
.
bmode
=
bmode
;
ret
.
n_src
=
src
.
layout
.
shape
[
0
];
ret
.
border_val
=
border_val
;
if
(
mat_idx
.
raw_ptr
)
{
ret
.
n_src
=
src
.
layout
.
shape
[
0
];
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
1
);
if
(
mat_idx
.
raw_ptr
)
{
ret
.
n_mat
=
mat_idx
.
layout
.
shape
[
0
];
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
1
);
ret
.
midx_ptr
=
mat_idx
.
ptr
<
int
>
();
ret
.
n_mat
=
mat_idx
.
layout
.
shape
[
0
];
}
else
{
ret
.
midx_ptr
=
mat_idx
.
ptr
<
int
>
();
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
0
);
}
else
{
ret
.
n_mat
=
ret
.
n_src
;
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
0
);
ret
.
midx_ptr
=
nullptr
;
ret
.
n_mat
=
ret
.
n_src
;
}
ret
.
midx_ptr
=
nullptr
;
if
(
format
==
Format
::
NCHW
)
{
}
ret
.
c
=
src
.
layout
.
shape
[
1
];
if
(
format
==
Format
::
NCHW
)
{
ret
.
ih
=
src
.
layout
.
shape
[
2
];
ret
.
c
=
src
.
layout
.
shape
[
1
];
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ih
=
src
.
layout
.
shape
[
2
];
ret
.
oh
=
dst
.
layout
.
shape
[
2
];
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ow
=
dst
.
layout
.
shape
[
3
];
ret
.
oh
=
dst
.
layout
.
shape
[
2
];
}
else
if
(
format
==
Format
::
NHWC
)
{
ret
.
ow
=
dst
.
layout
.
shape
[
3
];
ret
.
c
=
src
.
layout
.
shape
[
3
];
}
else
if
(
format
==
Format
::
NHWC
)
{
ret
.
ih
=
src
.
layout
.
shape
[
1
];
ret
.
c
=
src
.
layout
.
shape
[
3
];
ret
.
iw
=
src
.
layout
.
shape
[
2
];
ret
.
ih
=
src
.
layout
.
shape
[
1
];
ret
.
oh
=
dst
.
layout
.
shape
[
1
];
ret
.
iw
=
src
.
layout
.
shape
[
2
];
ret
.
ow
=
dst
.
layout
.
shape
[
2
];
ret
.
oh
=
dst
.
layout
.
shape
[
1
];
}
else
if
(
format
==
Format
::
NCHW4
)
{
ret
.
ow
=
dst
.
layout
.
shape
[
2
];
ret
.
c
=
src
.
layout
.
shape
[
1
]
*
4
;
}
else
if
(
format
==
Format
::
NCHW4
)
{
ret
.
ih
=
src
.
layout
.
shape
[
2
];
ret
.
c
=
src
.
layout
.
shape
[
1
]
*
4
;
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ih
=
src
.
layout
.
shape
[
2
];
ret
.
oh
=
dst
.
layout
.
shape
[
2
];
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ow
=
dst
.
layout
.
shape
[
3
];
ret
.
oh
=
dst
.
layout
.
shape
[
2
];
}
else
{
ret
.
ow
=
dst
.
layout
.
shape
[
3
];
megdnn_assert
(
format
==
Format
::
NHWCD4
);
}
else
{
ret
.
c
=
src
.
layout
.
shape
[
2
]
*
4
;
megdnn_assert
(
format
==
Format
::
NHWCD4
);
ret
.
ih
=
src
.
layout
.
shape
[
1
];
ret
.
c
=
src
.
layout
.
shape
[
2
]
*
4
;
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ih
=
src
.
layout
.
shape
[
1
];
ret
.
oh
=
dst
.
layout
.
shape
[
1
];
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ow
=
dst
.
layout
.
shape
[
3
];
ret
.
oh
=
dst
.
layout
.
shape
[
1
];
}
ret
.
ow
=
dst
.
layout
.
shape
[
3
];
if
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Float32
||
}
MEGDNN_FLOAT16_SELECT
(
if
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Float32
||
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Float16
||
MEGDNN_FLOAT16_SELECT
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
BFloat16
),
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Float16
||
false
)
||
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
BFloat16
),
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Int8
||
false
)
||
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Uint8
||
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Int8
||
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
||
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Uint8
||
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
||
ret
.
sptr
=
src
.
compatible_ptr
<
ctype
>
();
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
Quantized8Asymm
)
{
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
ret
.
sptr
=
src
.
compatible_ptr
<
ctype
>
();
ret
.
dptr
=
dst
.
compatible_ptr
<
ctype
>
();
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
}
else
if
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
{
ret
.
dptr
=
dst
.
compatible_ptr
<
ctype
>
();
ret
.
sptr
=
src
.
compatible_ptr
<
ctype
>
();
}
else
if
(
src
.
layout
.
dtype
.
enumv
()
==
DTypeEnum
::
QuantizedS8
)
{
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
ret
.
sptr
=
src
.
compatible_ptr
<
ctype
>
();
ret
.
dptr
=
dst
.
compatible_ptr
<
ctype
>
();
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
}
else
{
ret
.
dptr
=
dst
.
compatible_ptr
<
ctype
>
();
ret
.
sptr
=
nullptr
;
}
else
{
ret
.
mptr
=
nullptr
;
ret
.
sptr
=
nullptr
;
ret
.
dptr
=
nullptr
;
ret
.
mptr
=
nullptr
;
}
ret
.
dptr
=
nullptr
;
ret
.
workspace
=
workspace
;
return
ret
;
}
}
};
ret
.
workspace
=
workspace
;
return
ret
;
// ctype: C type of input data type.
// mtype: C type of transformation matrix data type.
template
<
typename
ctype
,
typename
mtype
>
void
kern_naive
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
,
size_t
task_id
);
public:
using
WarpPerspectiveForward
::
WarpPerspectiveForward
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
}
}
};
private:
// ctype: C type of input data type.
template
<
typename
ctype
,
typename
mtype
>
// mtype: C type of transformation matrix data type.
void
kern_naive_nhwcd4
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
,
template
<
typename
ctype
,
typename
mtype
>
size_t
task_id
);
void
kern_naive
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
,
size_t
task_id
);
public:
using
WarpPerspectiveForward
::
WarpPerspectiveForward
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
return
0
;
}
private:
template
<
typename
ctype
,
typename
mtype
>
void
kern_naive_nhwcd4
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
,
size_t
task_id
);
};
};
class
WarpPerspectiveBackwardDataImpl
:
public
WarpPerspectiveBackwardData
{
class
WarpPerspectiveBackwardDataImpl
:
public
WarpPerspectiveBackwardData
{
protected:
protected:
template
<
typename
ctype
,
typename
mtype
>
template
<
typename
ctype
,
typename
mtype
>
struct
KernParam
{
struct
KernParam
{
size_t
n
,
c
,
ih
,
iw
,
oh
,
ow
;
size_t
n
_src
,
n_mat
,
c
,
ih
,
iw
,
oh
,
ow
;
ctype
*
sptr
,
*
hptr
;
ctype
*
sptr
,
*
hptr
;
mtype
*
mptr
;
mtype
*
mptr
;
int
*
midx_ptr
;
//!< can be null
static
KernParam
from_tensors
(
_megdnn_tensor_in
mat
,
static
KernParam
from_tensors
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
)
{
_megdnn_tensor_out
grad
)
{
KernParam
ret
;
KernParam
ret
;
ret
.
n
=
grad
.
layout
.
shape
[
0
],
ret
.
c
=
grad
.
layout
.
shape
[
1
],
ret
.
n
_src
=
grad
.
layout
.
shape
[
0
],
ret
.
c
=
grad
.
layout
.
shape
[
1
];
ret
.
ih
=
grad
.
layout
.
shape
[
2
],
ret
.
iw
=
grad
.
layout
.
shape
[
3
];
ret
.
ih
=
grad
.
layout
.
shape
[
2
],
ret
.
iw
=
grad
.
layout
.
shape
[
3
];
ret
.
oh
=
diff
.
layout
.
shape
[
2
],
ret
.
ow
=
diff
.
layout
.
shape
[
3
];
ret
.
oh
=
diff
.
layout
.
shape
[
2
],
ret
.
ow
=
diff
.
layout
.
shape
[
3
];
ret
.
hptr
=
diff
.
ptr
<
ctype
>
();
ret
.
hptr
=
diff
.
ptr
<
ctype
>
();
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
ret
.
sptr
=
grad
.
ptr
<
ctype
>
();
ret
.
sptr
=
grad
.
ptr
<
ctype
>
();
if
(
mat_idx
.
raw_ptr
)
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
1
);
ret
.
n_mat
=
mat_idx
.
layout
.
shape
[
0
];
ret
.
midx_ptr
=
mat_idx
.
ptr
<
int
>
();
}
else
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
0
);
ret
.
n_mat
=
ret
.
n_src
;
ret
.
midx_ptr
=
nullptr
;
}
return
ret
;
return
ret
;
}
}
};
};
public:
public:
using
WarpPerspectiveBackwardData
::
WarpPerspectiveBackwardData
;
using
WarpPerspectiveBackwardData
::
WarpPerspectiveBackwardData
;
void
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
diff
,
void
exec
(
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
override
;
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
const
TensorLayout
&
)
override
{
return
0
;
return
0
;
}
}
private:
private:
template
<
typename
ctype
,
typename
mtype
>
template
<
typename
ctype
,
typename
mtype
>
void
kern_naive
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
);
void
kern_naive
(
const
KernParam
<
ctype
,
mtype
>&
kern_param
);
...
@@ -162,23 +177,35 @@ class WarpPerspectiveBackwardMatImpl : public WarpPerspectiveBackwardMat {
...
@@ -162,23 +177,35 @@ class WarpPerspectiveBackwardMatImpl : public WarpPerspectiveBackwardMat {
protected:
protected:
template
<
typename
ctype
,
typename
mtype
>
template
<
typename
ctype
,
typename
mtype
>
struct
KernParam
{
struct
KernParam
{
size_t
n
,
c
,
ih
,
iw
,
oh
,
ow
;
size_t
n
_src
,
n_mat
,
c
,
ih
,
iw
,
oh
,
ow
;
ctype
*
sptr
,
*
hptr
;
ctype
*
sptr
,
*
hptr
;
mtype
*
mptr
,
*
res
;
mtype
*
mptr
,
*
res
;
int
*
midx_ptr
;
//!< can be null
float
border_val
;
float
border_val
;
static
KernParam
from_tensors
(
float
border_val_
,
_megdnn_tensor_in
src
,
static
KernParam
from_tensors
(
float
border_val_
,
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
)
{
_megdnn_tensor_out
grad
)
{
KernParam
ret
;
KernParam
ret
;
ret
.
border_val
=
border_val_
;
ret
.
border_val
=
border_val_
;
ret
.
n
=
src
.
layout
.
shape
[
0
],
ret
.
c
=
src
.
layout
.
shape
[
1
],
ret
.
n
_src
=
src
.
layout
.
shape
[
0
],
ret
.
c
=
src
.
layout
.
shape
[
1
];
ret
.
ih
=
src
.
layout
.
shape
[
2
],
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
ih
=
src
.
layout
.
shape
[
2
],
ret
.
iw
=
src
.
layout
.
shape
[
3
];
ret
.
oh
=
diff
.
layout
.
shape
[
2
],
ret
.
ow
=
diff
.
layout
.
shape
[
3
];
ret
.
oh
=
diff
.
layout
.
shape
[
2
],
ret
.
ow
=
diff
.
layout
.
shape
[
3
];
ret
.
hptr
=
diff
.
ptr
<
ctype
>
();
ret
.
hptr
=
diff
.
ptr
<
ctype
>
();
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
ret
.
mptr
=
mat
.
ptr
<
mtype
>
();
ret
.
sptr
=
src
.
ptr
<
ctype
>
();
ret
.
sptr
=
src
.
ptr
<
ctype
>
();
ret
.
res
=
grad
.
ptr
<
mtype
>
();
ret
.
res
=
grad
.
ptr
<
mtype
>
();
if
(
mat_idx
.
raw_ptr
)
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
1
);
ret
.
n_mat
=
mat_idx
.
layout
.
shape
[
0
];
ret
.
midx_ptr
=
mat_idx
.
ptr
<
int
>
();
}
else
{
megdnn_assert
(
mat_idx
.
layout
.
ndim
==
0
);
ret
.
n_mat
=
ret
.
n_src
;
ret
.
midx_ptr
=
nullptr
;
}
return
ret
;
return
ret
;
}
}
};
};
...
@@ -186,10 +213,10 @@ protected:
...
@@ -186,10 +213,10 @@ protected:
public:
public:
using
WarpPerspectiveBackwardMat
::
WarpPerspectiveBackwardMat
;
using
WarpPerspectiveBackwardMat
::
WarpPerspectiveBackwardMat
;
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
void
exec
(
_megdnn_tensor_in
src
,
_megdnn_tensor_in
mat
,
_megdnn_tensor_in
diff
,
_megdnn_tensor_out
grad
,
_megdnn_tensor_in
mat_idx
,
_megdnn_tensor_in
diff
,
_megdnn_workspace
workspace
)
override
;
_megdnn_
tensor_out
grad
,
_megdnn_
workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
size_t
get_workspace_in_bytes
(
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
,
const
TensorLayout
&
)
override
{
const
TensorLayout
&
)
override
{
return
0
;
return
0
;
}
}
...
...
dnn/test/common/warp_perspective.cpp
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "test/common/warp_perspective.h"
#include "test/common/warp_perspective.h"
...
@@ -19,6 +20,10 @@ using namespace warp_perspective;
...
@@ -19,6 +20,10 @@ using namespace warp_perspective;
void
WarpPerspectiveMatIdxProxy
::
deduce_layout
(
WarpPerspective
*
,
void
WarpPerspectiveMatIdxProxy
::
deduce_layout
(
WarpPerspective
*
,
TensorLayoutArray
&
)
{}
TensorLayoutArray
&
)
{}
void
WarpPerspectiveMatIdxProxy
::
deduce_layout
(
WarpPerspectiveBackwardData
*
,
TensorLayoutArray
&
)
{}
void
WarpPerspectiveMatIdxProxy
::
deduce_layout
(
WarpPerspectiveBackwardMat
*
,
TensorLayoutArray
&
)
{}
void
WarpPerspectiveMatIdxProxy
::
exec
(
WarpPerspective
*
opr
,
void
WarpPerspectiveMatIdxProxy
::
exec
(
WarpPerspective
*
opr
,
const
TensorNDArray
&
tensors
)
{
const
TensorNDArray
&
tensors
)
{
...
@@ -31,6 +36,30 @@ void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr,
...
@@ -31,6 +36,30 @@ void WarpPerspectiveMatIdxProxy::exec(WarpPerspective* opr,
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
W
.
workspace
());
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
W
.
workspace
());
}
}
void
WarpPerspectiveMatIdxProxy
::
exec
(
WarpPerspectiveBackwardData
*
opr
,
const
TensorNDArray
&
tensors
)
{
if
(
!
W
.
valid
())
{
W
=
WorkspaceWrapper
(
opr
->
handle
(),
0
);
}
megdnn_assert
(
tensors
.
size
()
==
4
);
W
.
update
(
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
));
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
W
.
workspace
());
}
void
WarpPerspectiveMatIdxProxy
::
exec
(
WarpPerspectiveBackwardMat
*
opr
,
const
TensorNDArray
&
tensors
)
{
if
(
!
W
.
valid
())
{
W
=
WorkspaceWrapper
(
opr
->
handle
(),
0
);
}
megdnn_assert
(
tensors
.
size
()
==
5
);
W
.
update
(
opr
->
get_workspace_in_bytes
(
tensors
[
0
].
layout
,
tensors
[
1
].
layout
,
tensors
[
2
].
layout
,
tensors
[
3
].
layout
,
tensors
[
4
].
layout
));
opr
->
exec
(
tensors
[
0
],
tensors
[
1
],
tensors
[
2
],
tensors
[
3
],
tensors
[
4
],
W
.
workspace
());
}
std
::
vector
<
TestArg
>
warp_perspective
::
get_cv_args
()
{
std
::
vector
<
TestArg
>
warp_perspective
::
get_cv_args
()
{
std
::
vector
<
TestArg
>
args
;
std
::
vector
<
TestArg
>
args
;
...
@@ -101,10 +130,10 @@ void warp_perspective::run_mat_idx_test(Handle* handle) {
...
@@ -101,10 +130,10 @@ void warp_perspective::run_mat_idx_test(Handle* handle) {
// test NHWC
// test NHWC
param
.
format
=
WarpPerspective
::
Param
::
Format
::
NHWC
;
param
.
format
=
WarpPerspective
::
Param
::
Format
::
NHWC
;
checker
.
set_param
(
param
)
checker
.
set_param
(
param
)
.
set_rng
(
2
,
&
mat_idx_rng
)
.
set_rng
(
2
,
&
mat_idx_rng
)
.
set_epsilon
(
1e-1
)
.
set_epsilon
(
1e-1
)
.
set_dtype
(
2
,
dtype
::
Int32
());
.
set_dtype
(
2
,
dtype
::
Int32
());
checker
.
execs
({{
N_SRC
,
10
,
11
,
3
},
{
2
,
3
,
3
},
{
2
},
{
2
,
11
,
12
,
3
}});
checker
.
execs
({{
N_SRC
,
10
,
11
,
3
},
{
2
,
3
,
3
},
{
2
},
{
2
,
11
,
12
,
3
}});
}
}
...
...
dnn/test/common/warp_perspective.h
浏览文件 @
c7b6ef35
...
@@ -22,7 +22,11 @@ namespace test {
...
@@ -22,7 +22,11 @@ namespace test {
struct
WarpPerspectiveMatIdxProxy
{
struct
WarpPerspectiveMatIdxProxy
{
WorkspaceWrapper
W
;
WorkspaceWrapper
W
;
static
void
deduce_layout
(
WarpPerspective
*
,
TensorLayoutArray
&
);
static
void
deduce_layout
(
WarpPerspective
*
,
TensorLayoutArray
&
);
static
void
deduce_layout
(
WarpPerspectiveBackwardData
*
,
TensorLayoutArray
&
);
static
void
deduce_layout
(
WarpPerspectiveBackwardMat
*
,
TensorLayoutArray
&
);
void
exec
(
WarpPerspective
*
opr
,
const
TensorNDArray
&
tensors
);
void
exec
(
WarpPerspective
*
opr
,
const
TensorNDArray
&
tensors
);
void
exec
(
WarpPerspectiveBackwardData
*
opr
,
const
TensorNDArray
&
tensors
);
void
exec
(
WarpPerspectiveBackwardMat
*
opr
,
const
TensorNDArray
&
tensors
);
};
};
class
WarpPerspectiveMatRNG
final
:
public
IIDRNG
{
class
WarpPerspectiveMatRNG
final
:
public
IIDRNG
{
...
...
dnn/test/cuda/warp_perspective.cpp
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "test/cuda/fixture.h"
#include "test/cuda/fixture.h"
...
@@ -21,10 +22,10 @@ namespace {
...
@@ -21,10 +22,10 @@ namespace {
using
namespace
megdnn
;
using
namespace
megdnn
;
using
namespace
test
;
using
namespace
test
;
class
NanMatRNG
:
public
RNG
{
class
NanMatRNG
:
public
RNG
{
void
gen
(
const
TensorND
&
tensor_
)
override
void
gen
(
const
TensorND
&
tensor_
)
override
{
{
auto
&
gen
=
RandomState
::
generator
();
auto
&
gen
=
RandomState
::
generator
();
std
::
uniform_real_distribution
<
dt_float32
>
pdist3
(
1.9
f
,
2.1
f
);
std
::
uniform_real_distribution
<
dt_float32
>
pdist3
(
1.9
f
,
2.1
f
);
std
::
uniform_real_distribution
<
dt_float32
>
pdist
(
0.9
f
,
1.1
f
);
std
::
uniform_real_distribution
<
dt_float32
>
pdist
(
0.9
f
,
1.1
f
);
std
::
uniform_real_distribution
<
dt_float32
>
pdisth
(
0.4
f
,
0.6
f
);
std
::
uniform_real_distribution
<
dt_float32
>
pdisth
(
0.4
f
,
0.6
f
);
...
@@ -32,7 +33,7 @@ class NanMatRNG: public RNG {
...
@@ -32,7 +33,7 @@ class NanMatRNG: public RNG {
std
::
uniform_real_distribution
<
dt_float32
>
ndist3
(
-
2.1
f
,
-
1.9
f
);
std
::
uniform_real_distribution
<
dt_float32
>
ndist3
(
-
2.1
f
,
-
1.9
f
);
std
::
uniform_real_distribution
<
dt_float32
>
ndisth
(
-
0.6
f
,
-
0.4
f
);
std
::
uniform_real_distribution
<
dt_float32
>
ndisth
(
-
0.6
f
,
-
0.4
f
);
std
::
uniform_int_distribution
<
int
>
dice
(
0
,
5
);
std
::
uniform_int_distribution
<
int
>
dice
(
0
,
5
);
float
*
ptr
=
tensor_
.
ptr
<
dt_float32
>
();
float
*
ptr
=
tensor_
.
ptr
<
dt_float32
>
();
auto
N
=
tensor_
.
layout
.
shape
[
0
];
auto
N
=
tensor_
.
layout
.
shape
[
0
];
for
(
size_t
n
=
0
;
n
<
N
;
++
n
)
{
for
(
size_t
n
=
0
;
n
<
N
;
++
n
)
{
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
...
@@ -65,7 +66,7 @@ class NanMatRNG: public RNG {
...
@@ -65,7 +66,7 @@ class NanMatRNG: public RNG {
}
}
};
};
}
// anonymous namespace
}
// anonymous namespace
namespace
megdnn
{
namespace
megdnn
{
namespace
test
{
namespace
test
{
...
@@ -171,17 +172,15 @@ TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
...
@@ -171,17 +172,15 @@ TEST_F(CUDA, WARP_PERSPECTIVE_CV) {
}
}
#endif
#endif
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD
)
{
{
using
Param
=
WarpPerspective
::
Param
;
using
Param
=
WarpPerspective
::
Param
;
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_rng
(
1
,
&
rng
);
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
...
@@ -204,8 +203,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD)
...
@@ -204,8 +203,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD)
// nan case
// nan case
NanMatRNG
rng_nan
;
NanMatRNG
rng_nan
;
UniformFloatRNG
rng_zero
(
0
,
0
);
UniformFloatRNG
rng_zero
(
0
,
0
);
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
{
{
param
::
WarpPerspective
param
;
param
::
WarpPerspective
param
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
...
@@ -213,20 +211,18 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD)
...
@@ -213,20 +211,18 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD)
param
.
border_val
=
1.737
;
param
.
border_val
=
1.737
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
// no invalid mem access is enough; no need to check value
// no invalid mem access is enough; no need to check value
checker
.
set_expect_exec_fail
([](){});
checker
.
set_expect_exec_fail
([]()
{});
checker
.
exec
({{
1000
,
2
,
10
,
11
},
{
1000
,
3
,
3
},
{
1000
,
2
,
12
,
13
}});
checker
.
exec
({{
1000
,
2
,
10
,
11
},
{
1000
,
3
,
3
},
{
1000
,
2
,
12
,
13
}});
}
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD_INTMAX
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD_INTMAX
)
{
{
require_compute_capability
(
6
,
0
);
require_compute_capability
(
6
,
0
);
using
Param
=
WarpPerspective
::
Param
;
using
Param
=
WarpPerspective
::
Param
;
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_rng
(
1
,
&
rng
);
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
REPLICATE
})
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
REPLICATE
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
...
@@ -235,27 +231,24 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX)
...
@@ -235,27 +231,24 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_INTMAX)
param
.
format
=
Param
::
Format
::
NHWC
;
param
.
format
=
Param
::
Format
::
NHWC
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
checker
.
set_epsilon
(
0.15
).
set_max_avg_error
(
4e-2
);
checker
.
set_epsilon
(
0.15
).
set_max_avg_error
(
4e-2
);
size_t
n
=
(
INT_MAX
)
/
(
512
*
512
*
3
);
size_t
n
=
(
INT_MAX
)
/
(
512
*
512
*
3
);
checker
.
execs
(
checker
.
execs
(
{{
n
+
1
,
512
,
512
,
3
},
{
n
+
1
,
3
,
3
},
{
n
+
1
,
25
,
25
,
3
}});
{{
n
+
1
,
512
,
512
,
3
},
{
n
+
1
,
3
,
3
},
{
n
+
1
,
25
,
25
,
3
}});
}
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD_FP16
)
{
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD_FP16
)
{
using
Param
=
WarpPerspective
::
Param
;
using
Param
=
WarpPerspective
::
Param
;
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_dtype
(
0
,
dtype
::
Float16
())
checker
.
set_dtype
(
0
,
dtype
::
Float16
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_dtype
(
2
,
dtype
::
Float16
());
.
set_dtype
(
2
,
dtype
::
Float16
());
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
...
@@ -278,8 +271,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16)
...
@@ -278,8 +271,7 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16)
// nan case
// nan case
NanMatRNG
rng_nan
;
NanMatRNG
rng_nan
;
UniformFloatRNG
rng_zero
(
0
,
0
);
UniformFloatRNG
rng_zero
(
0
,
0
);
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
{
{
param
::
WarpPerspective
param
;
param
::
WarpPerspective
param
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
...
@@ -287,13 +279,12 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16)
...
@@ -287,13 +279,12 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_FP16)
param
.
border_val
=
1.737
;
param
.
border_val
=
1.737
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
// no invalid mem access is enough; no need to check value
// no invalid mem access is enough; no need to check value
checker
.
set_expect_exec_fail
([](){});
checker
.
set_expect_exec_fail
([]()
{});
checker
.
exec
({{
1000
,
2
,
10
,
11
},
{
1000
,
3
,
3
},
{
1000
,
2
,
12
,
13
}});
checker
.
exec
({{
1000
,
2
,
10
,
11
},
{
1000
,
3
,
3
},
{
1000
,
2
,
12
,
13
}});
}
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_NCHW4
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_NCHW4
)
{
{
using
Param
=
WarpPerspective
::
Param
;
using
Param
=
WarpPerspective
::
Param
;
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
...
@@ -348,31 +339,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
...
@@ -348,31 +339,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_NCHW_INT8) {
warp_perspective
::
run_int8_test
(
handle_cuda
());
warp_perspective
::
run_int8_test
(
handle_cuda
());
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_DATA
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_DATA
)
{
{
Checker
<
WarpPerspectiveBackwardData
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveBackwardData
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
0
,
&
rng
);
checker
.
set_rng
(
0
,
&
rng
);
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
checker
.
execs
({{
2
,
3
,
3
},
{
2
,
3
,
11
,
12
},
{
2
,
3
,
10
,
11
}});
checker
.
execs
({{
2
,
3
,
3
},
{
2
,
3
,
11
,
12
},
{
2
,
3
,
10
,
11
}});
checker
.
execs
({{
22000
,
3
,
3
},
{
22000
,
3
,
11
,
12
},
{
22000
,
3
,
10
,
11
}});
checker
.
execs
(
{{
22000
,
3
,
3
},
{
22000
,
3
,
11
,
12
},
{
22000
,
3
,
10
,
11
}});
}
}
}
}
// nan case
// nan case
NanMatRNG
rng_nan
;
NanMatRNG
rng_nan
;
UniformFloatRNG
rng_zero
(
0
,
0
);
UniformFloatRNG
rng_zero
(
0
,
0
);
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
{
{
param
::
WarpPerspective
param
;
param
::
WarpPerspective
param
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
...
@@ -380,39 +369,54 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA)
...
@@ -380,39 +369,54 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA)
param
.
border_val
=
1.737
;
param
.
border_val
=
1.737
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
// no invalid mem access is enough; no need to check value
// no invalid mem access is enough; no need to check value
checker
.
set_expect_exec_fail
([](){});
checker
.
set_expect_exec_fail
([]()
{});
checker
.
exec
({{
1000
,
3
,
3
},
{
1000
,
2
,
10
,
11
},
{
1000
,
2
,
12
,
13
}});
checker
.
exec
({{
1000
,
3
,
3
},
{
1000
,
2
,
10
,
11
},
{
1000
,
2
,
12
,
13
}});
}
}
{
Checker
<
WarpPerspectiveBackwardData
,
WarpPerspectiveMatIdxProxy
>
checker
(
handle_cuda
());
constexpr
int
N_SRC
=
5
;
UniformIntRNG
mat_idx_rng
{
0
,
N_SRC
-
1
};
checker
.
set_rng
(
0
,
&
rng
);
checker
.
set_dtype
(
1
,
dtype
::
Int32
());
checker
.
set_rng
(
1
,
&
mat_idx_rng
);
param
::
WarpPerspective
param
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
REFLECT
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
checker
.
set_param
(
param
);
checker
.
set_epsilon
(
1
+
1e-3
);
checker
.
execs
({{
2
,
3
,
3
},
{
2
},
{
2
,
12
,
11
,
12
},
{
N_SRC
,
12
,
10
,
11
}});
checker
.
execs
(
{{
123
,
3
,
3
},
{
123
},
{
123
,
56
,
16
,
15
},
{
N_SRC
,
56
,
17
,
13
}});
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_MAT
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_MAT
)
{
{
Checker
<
WarpPerspectiveBackwardMat
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveBackwardMat
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_rng
(
1
,
&
rng
);
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
checker
.
set_epsilon
(
1e-2
);
checker
.
set_epsilon
(
1e-2
);
checker
.
execs
({
checker
.
execs
({
{
1000
,
3
,
11
,
12
},
{
1000
,
3
,
11
,
12
},
{
1000
,
3
,
3
},
{
1000
,
3
,
3
},
{
1000
,
3
,
10
,
11
},
{
1000
,
3
,
3
}
{
1000
,
3
,
10
,
11
},
});
{
1000
,
3
,
3
}
});
}
}
}
}
// nan case
// nan case
NanMatRNG
rng_nan
;
NanMatRNG
rng_nan
;
UniformFloatRNG
rng_zero
(
0
,
0
);
UniformFloatRNG
rng_zero
(
0
,
0
);
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
for
(
auto
rng
:
std
::
vector
<
RNG
*>
{
&
rng_nan
,
&
rng_zero
})
{
{
param
::
WarpPerspective
param
;
param
::
WarpPerspective
param
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
CONSTANT
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
...
@@ -420,26 +424,50 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT)
...
@@ -420,26 +424,50 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_MAT)
param
.
border_val
=
1.737
;
param
.
border_val
=
1.737
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
// no invalid mem access is enough; no need to check value
// no invalid mem access is enough; no need to check value
checker
.
set_expect_exec_fail
([](){});
checker
.
set_expect_exec_fail
([]()
{});
checker
.
exec
({{
1000
,
2
,
10
,
11
},
{
1000
,
3
,
3
},
checker
.
exec
({{
1000
,
2
,
10
,
11
},
{
1000
,
2
,
12
,
13
},
{
1000
,
3
,
3
}});
{
1000
,
3
,
3
},
{
1000
,
2
,
12
,
13
},
{
1000
,
3
,
3
}});
}
{
Checker
<
WarpPerspectiveBackwardMat
,
WarpPerspectiveMatIdxProxy
>
checker
(
handle_cuda
());
constexpr
int
N_SRC
=
5
;
UniformIntRNG
mat_idx_rng
{
0
,
N_SRC
-
1
};
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_dtype
(
2
,
dtype
::
Int32
());
checker
.
set_rng
(
2
,
&
mat_idx_rng
);
param
::
WarpPerspective
param
;
param
.
bmode
=
param
::
WarpPerspective
::
BorderMode
::
REFLECT
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
checker
.
set_param
(
param
);
checker
.
set_epsilon
(
1
+
1e-3
);
checker
.
execs
({{
N_SRC
,
12
,
10
,
11
},
{
2
,
3
,
3
},
{
2
},
{
2
,
12
,
11
,
12
},
{
2
,
3
,
3
}});
checker
.
execs
({{
N_SRC
,
56
,
17
,
13
},
{
123
,
3
,
3
},
{
123
},
{
123
,
56
,
16
,
15
},
{
123
,
3
,
3
}});
}
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD_BFLOAT16
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_FORWARD_BFLOAT16
)
{
{
using
Param
=
WarpPerspective
::
Param
;
using
Param
=
WarpPerspective
::
Param
;
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveForward
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_rng
(
1
,
&
rng
);
checker
.
set_dtype
(
0
,
dtype
::
BFloat16
())
checker
.
set_dtype
(
0
,
dtype
::
BFloat16
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_dtype
(
2
,
dtype
::
BFloat16
());
.
set_dtype
(
2
,
dtype
::
BFloat16
());
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
...
@@ -457,21 +485,19 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16)
...
@@ -457,21 +485,19 @@ TEST_F(CUDA, WARP_PERSPECTIVE_FORWARD_BFLOAT16)
}
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16
)
{
{
Checker
<
WarpPerspectiveBackwardData
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveBackwardData
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
0
,
&
rng
)
checker
.
set_rng
(
0
,
&
rng
)
.
set_epsilon
(
1e-1
)
.
set_epsilon
(
1e-1
)
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
0
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
BFloat16
())
.
set_dtype
(
1
,
dtype
::
BFloat16
())
.
set_dtype
(
2
,
dtype
::
BFloat16
());
.
set_dtype
(
2
,
dtype
::
BFloat16
());
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
...
@@ -482,31 +508,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16)
...
@@ -482,31 +508,29 @@ TEST_F(CUDA, WARP_PERSPECTIVE_BACKWARD_DATA_BFLOAT16)
}
}
}
}
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16
)
TEST_F
(
CUDA
,
WARP_PERSPECTIVE_BACKWARD_MAT_BFLOAT16
)
{
{
Checker
<
WarpPerspectiveBackwardMat
>
checker
(
handle_cuda
());
Checker
<
WarpPerspectiveBackwardMat
>
checker
(
handle_cuda
());
WarpPerspectiveMatRNG
rng
;
WarpPerspectiveMatRNG
rng
;
checker
.
set_rng
(
1
,
&
rng
)
checker
.
set_rng
(
1
,
&
rng
)
.
set_epsilon
(
1e-2
)
.
set_epsilon
(
1e-2
)
.
set_dtype
(
0
,
dtype
::
BFloat16
())
.
set_dtype
(
0
,
dtype
::
BFloat16
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_dtype
(
1
,
dtype
::
Float32
())
.
set_dtype
(
2
,
dtype
::
BFloat16
())
.
set_dtype
(
2
,
dtype
::
BFloat16
())
.
set_dtype
(
3
,
dtype
::
Float32
());
.
set_dtype
(
3
,
dtype
::
Float32
());
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
int
i
=
0
;
i
<
1
;
++
i
)
{
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
for
(
auto
bmode
:
{
WarpPerspective
::
BorderMode
::
WRAP
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REFLECT
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
REPLICATE
,
WarpPerspective
::
BorderMode
::
CONSTANT
})
WarpPerspective
::
BorderMode
::
CONSTANT
})
{
{
WarpPerspective
::
Param
param
;
WarpPerspective
::
Param
param
;
param
.
border_val
=
0.3
f
;
param
.
border_val
=
0.3
f
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
imode
=
param
::
WarpPerspective
::
InterpolationMode
::
LINEAR
;
param
.
bmode
=
bmode
;
param
.
bmode
=
bmode
;
checker
.
set_param
(
param
);
checker
.
set_param
(
param
);
checker
.
execs
({
checker
.
execs
({
{
1000
,
3
,
11
,
12
},
{
1000
,
3
,
11
,
12
},
{
1000
,
3
,
3
},
{
1000
,
3
,
3
},
{
1000
,
3
,
10
,
11
},
{
1000
,
3
,
3
}
{
1000
,
3
,
10
,
11
},
});
{
1000
,
3
,
3
}
});
}
}
}
}
}
}
...
@@ -549,14 +573,14 @@ TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
...
@@ -549,14 +573,14 @@ TEST_F(CUDA, BENCHMARK_WARP_PERSPECTIVE_NCHW4) {
benchmarker
.
set_dtype
(
0
,
dtype
::
QuantizedS8
(
1.0
f
));
benchmarker
.
set_dtype
(
0
,
dtype
::
QuantizedS8
(
1.0
f
));
benchmarker
.
set_dtype
(
2
,
dtype
::
QuantizedS8
(
1.0
f
));
benchmarker
.
set_dtype
(
2
,
dtype
::
QuantizedS8
(
1.0
f
));
run
({
TensorShape
{
1
,
25
,
256
,
256
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
256
,
5120
,
4
}});
run
({
TensorShape
{
1
,
25
,
256
,
256
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
256
,
5120
,
4
}});
run
({
TensorShape
{
1
,
25
,
256
,
5120
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
256
,
256
,
4
}});
run
({
TensorShape
{
1
,
25
,
256
,
5120
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
256
,
256
,
4
}});
run
({
TensorShape
{
1
,
25
,
256
,
256
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
512
,
512
,
4
}});
run
({
TensorShape
{
1
,
25
,
256
,
256
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
512
,
512
,
4
}});
run
({
TensorShape
{
1
,
25
,
512
,
512
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
256
,
256
,
4
}});
run
({
TensorShape
{
1
,
25
,
512
,
512
,
4
},
{
1
,
3
,
3
},
{
1
,
25
,
256
,
256
,
4
}});
}
}
#endif
#endif
}
// namespace test
}
// namespace test
}
// namespace megdnn
}
// namespace megdnn
// vim: syntax=cpp.doxygen
// vim: syntax=cpp.doxygen
src/opr/impl/imgproc.cpp
浏览文件 @
c7b6ef35
...
@@ -6,18 +6,18 @@
...
@@ -6,18 +6,18 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#include "./internal/megdnn_opr_wrapper.inl"
#include "megbrain/opr/imgproc.h"
#include "megbrain/opr/imgproc.h"
#include "
megbrain/opr/utility.h
"
#include "
./internal/megdnn_opr_wrapper.inl
"
#include "megbrain/graph/grad_impl.h"
#include "megbrain/graph/grad_impl.h"
#include "megbrain/opr/utility.h"
using
namespace
mgb
;
using
namespace
mgb
;
using
namespace
opr
;
using
namespace
opr
;
/* ======================= WarpPerspectiveForward ======================= */
/* ======================= WarpPerspectiveForward ======================= */
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
WarpPerspectiveForward
);
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
WarpPerspectiveForward
);
...
@@ -54,8 +54,7 @@ void WarpPerspectiveForward::add_input_layout_constraint() {
...
@@ -54,8 +54,7 @@ void WarpPerspectiveForward::add_input_layout_constraint() {
}
}
void
WarpPerspectiveForward
::
outshape_by_symvar_do_get_output_shape
(
void
WarpPerspectiveForward
::
outshape_by_symvar_do_get_output_shape
(
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
{
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
{
TensorShape
oshp2d
;
TensorShape
oshp2d
;
cg
::
copy_tensor_value_to_shape
(
oshp2d
,
*
shpinfo
.
shpval_inp_val
.
at
(
0
));
cg
::
copy_tensor_value_to_shape
(
oshp2d
,
*
shpinfo
.
shpval_inp_val
.
at
(
0
));
auto
imgshp
=
shpinfo
.
shape_inp_shp
.
at
(
0
),
auto
imgshp
=
shpinfo
.
shape_inp_shp
.
at
(
0
),
...
@@ -112,8 +111,8 @@ void WarpPerspectiveForward::scn_do_execute() {
...
@@ -112,8 +111,8 @@ void WarpPerspectiveForward::scn_do_execute() {
}
}
size_t
WarpPerspectiveForward
::
get_workspace_size_bytes
(
size_t
WarpPerspectiveForward
::
get_workspace_size_bytes
(
const
TensorShapeArray
&
input_shapes
,
const
TensorShapeArray
&
input_shapes
,
const
TensorShapeArray
&
output_shapes
)
const
{
const
TensorShapeArray
&
output_shapes
)
const
{
if
(
input
().
size
()
==
3
)
{
if
(
input
().
size
()
==
3
)
{
return
intl
::
_MegDNNOprMethInvoker
<
2
,
1
>::
get_workspace_in_bytes
(
return
intl
::
_MegDNNOprMethInvoker
<
2
,
1
>::
get_workspace_in_bytes
(
megdnn_opr
(),
this
,
input_shapes
,
output_shapes
);
megdnn_opr
(),
this
,
input_shapes
,
output_shapes
);
...
@@ -129,19 +128,34 @@ void WarpPerspectiveForward::record_execute_deps(ExecDependencyArray& deps) {
...
@@ -129,19 +128,34 @@ void WarpPerspectiveForward::record_execute_deps(ExecDependencyArray& deps) {
#ifdef MGB_ENABLE_GRAD
#ifdef MGB_ENABLE_GRAD
MGB_IMPL_OPR_GRAD
(
WarpPerspectiveForward
)
{
MGB_IMPL_OPR_GRAD
(
WarpPerspectiveForward
)
{
mgb_assert
(
opr
.
input
().
size
()
==
3
,
if
(
opr
.
input
().
size
()
==
4
)
{
"backward with mat_idx is currently unsupported"
);
if
(
wrt_idx
==
0
)
{
// wrt data
SymbolVar
grad
=
WarpPerspectiveBackwardData
::
make
(
opr
.
input
(
1
),
opr
.
input
(
2
),
out_grad
[
0
],
opr
.
input
(
0
),
opr
.
param
());
return
grad
.
node
();
}
else
if
(
wrt_idx
==
1
)
{
// wrt mat
SymbolVar
grad
=
WarpPerspectiveBackwardMat
::
make
(
opr
.
input
(
0
),
opr
.
input
(
1
),
opr
.
input
(
2
),
out_grad
[
0
],
opr
.
param
());
return
grad
.
node
();
}
else
{
return
InvalidGrad
::
make
(
opr
,
wrt_idx
);
}
}
mgb_assert
(
opr
.
input
().
size
()
==
3
);
if
(
wrt_idx
==
0
)
{
if
(
wrt_idx
==
0
)
{
// wrt data
// wrt data
SymbolVar
grad
=
WarpPerspectiveBackwardData
::
make
(
SymbolVar
grad
=
WarpPerspectiveBackwardData
::
make
(
opr
.
input
(
1
),
out_grad
[
0
],
opr
.
input
(
0
),
opr
.
input
(
1
),
out_grad
[
0
],
opr
.
input
(
0
),
opr
.
param
());
opr
.
param
());
return
grad
.
node
();
return
grad
.
node
();
}
else
if
(
wrt_idx
==
1
){
}
else
if
(
wrt_idx
==
1
)
{
// wrt mat
// wrt mat
SymbolVar
grad
=
WarpPerspectiveBackwardMat
::
make
(
SymbolVar
grad
=
WarpPerspectiveBackwardMat
::
make
(
opr
.
input
(
0
),
opr
.
input
(
1
),
out_grad
[
0
],
opr
.
input
(
0
),
opr
.
input
(
1
),
out_grad
[
0
],
opr
.
param
());
opr
.
param
());
return
grad
.
node
();
return
grad
.
node
();
}
else
}
else
return
InvalidGrad
::
make
(
opr
,
wrt_idx
);
return
InvalidGrad
::
make
(
opr
,
wrt_idx
);
...
@@ -151,14 +165,116 @@ MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) {
...
@@ -151,14 +165,116 @@ MGB_IMPL_OPR_GRAD(WarpPerspectiveForward) {
/* ====================== WarpPerspectiveBackwardData ====================== */
/* ====================== WarpPerspectiveBackwardData ====================== */
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
WarpPerspectiveBackwardData
);
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
WarpPerspectiveBackwardData
);
MEGDNN_OPR_INIT3
(
WarpPerspectiveBackwardData
,
"warp_perspective_bwd_data"
,
2
,
false
);
WarpPerspectiveBackwardData
::
WarpPerspectiveBackwardData
(
VarNode
*
mat
,
VarNode
*
out_diff
,
VarNode
*
in_for_shape
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
)
:
Super
(
OperatorNodeBaseCtorParam
{
mat
->
owner_graph
(),
config
,
"warp_perspective_bwd_data"
,
{
mat
}},
2
,
false
)
{
init_megdnn_opr
(
*
this
,
param
);
add_input
({
mat
,
out_diff
,
in_for_shape
});
intl
::
MegDNNOprInitPostCtor
<
WarpPerspectiveBackwardData
>::
apply
(
*
this
);
}
WarpPerspectiveBackwardData
::
WarpPerspectiveBackwardData
(
VarNode
*
mat
,
VarNode
*
mat_idx
,
VarNode
*
out_diff
,
VarNode
*
in_for_shape
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
)
:
Super
(
OperatorNodeBaseCtorParam
{
mat
->
owner_graph
(),
config
,
"warp_perspective_bwd_data"
,
{
mat
,
mat_idx
}},
3
,
false
)
{
init_megdnn_opr
(
*
this
,
param
);
add_input
({
mat
,
mat_idx
,
out_diff
,
in_for_shape
});
intl
::
MegDNNOprInitPostCtor
<
WarpPerspectiveBackwardData
>::
apply
(
*
this
);
}
SymbolVar
WarpPerspectiveBackwardData
::
make
(
SymbolVar
i0
,
SymbolVar
i1
,
SymbolVar
i2
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
)
{
intl
::
MegDNNOprInitInputsModifier
<
WarpPerspectiveBackwardData
>::
apply
(
param
,
{
&
i0
,
&
i1
,
&
i2
});
return
i0
.
insert_single_output_opr
<
WarpPerspectiveBackwardData
>
(
i0
.
node
(),
i1
.
node
(),
i2
.
node
(),
param
,
config
);
}
SymbolVar
WarpPerspectiveBackwardData
::
make
(
SymbolVar
i0
,
SymbolVar
i1
,
SymbolVar
i2
,
SymbolVar
i3
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
)
{
intl
::
MegDNNOprInitInputsModifier
<
WarpPerspectiveBackwardData
>::
apply
(
param
,
{
&
i0
,
&
i1
,
&
i2
,
&
i3
});
return
i0
.
insert_single_output_opr
<
WarpPerspectiveBackwardData
>
(
i0
.
node
(),
i1
.
node
(),
i2
.
node
(),
i3
.
node
(),
param
,
config
);
}
void
WarpPerspectiveBackwardData
::
scn_do_execute
()
{
if
(
input
().
size
()
==
3
)
{
megdnn_opr
()
->
exec
(
input
(
0
)
->
dev_tensor
().
as_megdnn
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
output
(
0
)
->
dev_tensor
().
as_megdnn
(),
intl
::
get_megdnn_workspace_from_var
(
output
(
1
)));
}
else
{
mgb_assert
(
input
().
size
()
==
4
);
megdnn_opr
()
->
exec
(
input
(
0
)
->
dev_tensor
().
as_megdnn
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
input
(
2
)
->
dev_tensor
().
as_megdnn
(),
output
(
0
)
->
dev_tensor
().
as_megdnn
(),
intl
::
get_megdnn_workspace_from_var
(
output
(
1
)));
}
}
/* ====================== WarpPerspectiveBackwardMat ====================== */
/* ====================== WarpPerspectiveBackwardMat ====================== */
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
WarpPerspectiveBackwardMat
);
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
WarpPerspectiveBackwardMat
);
MEGDNN_OPR_INIT3
(
WarpPerspectiveBackwardMat
,
"warp_perspective_bwd_mat"
,
1
,
true
);
WarpPerspectiveBackwardMat
::
WarpPerspectiveBackwardMat
(
VarNode
*
src
,
VarNode
*
mat
,
VarNode
*
mat_idx
,
VarNode
*
out_diff
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
)
:
Super
(
OperatorNodeBaseCtorParam
{
src
->
owner_graph
(),
config
,
"warp_perspective_bwd_mat"
,
{
src
,
mat
,
mat_idx
}},
1
,
true
)
{
init_megdnn_opr
(
*
this
,
param
);
if
(
mat_idx
)
{
add_input
({
src
,
mat
,
mat_idx
,
out_diff
});
}
else
{
add_input
({
src
,
mat
,
out_diff
});
}
intl
::
MegDNNOprInitPostCtor
<
WarpPerspectiveBackwardMat
>::
apply
(
*
this
);
}
void
WarpPerspectiveBackwardMat
::
scn_do_execute
()
{
if
(
input
().
size
()
==
3
)
{
megdnn_opr
()
->
exec
(
input
(
0
)
->
dev_tensor
().
as_megdnn
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
input
(
2
)
->
dev_tensor
().
as_megdnn
(),
output
(
0
)
->
dev_tensor
().
as_megdnn
(),
intl
::
get_megdnn_workspace_from_var
(
output
(
1
)));
}
else
{
mgb_assert
(
input
().
size
()
==
4
);
megdnn_opr
()
->
exec
(
input
(
0
)
->
dev_tensor
().
as_megdnn
(),
input
(
1
)
->
dev_tensor
().
as_megdnn
(),
input
(
2
)
->
dev_tensor
().
as_megdnn
(),
input
(
3
)
->
dev_tensor
().
as_megdnn
(),
output
(
0
)
->
dev_tensor
().
as_megdnn
(),
intl
::
get_megdnn_workspace_from_var
(
output
(
1
)));
}
}
SymbolVar
WarpPerspectiveBackwardMat
::
make
(
SymbolVar
i0
,
SymbolVar
i1
,
SymbolVar
i2
,
SymbolVar
i3
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
)
{
intl
::
MegDNNOprInitInputsModifier
<
WarpPerspectiveBackwardMat
>::
apply
(
param
,
{
&
i0
,
&
i1
,
&
i2
,
&
i3
});
return
i0
.
insert_single_output_opr
<
WarpPerspectiveBackwardMat
>
(
i0
.
node
(),
i1
.
node
(),
i2
.
node
(),
i3
.
node
(),
param
,
config
);
}
/* ====================== Cv operator ====================== */
/* ====================== Cv operator ====================== */
...
@@ -188,8 +304,7 @@ void ResizeForward::add_input_layout_constraint() {
...
@@ -188,8 +304,7 @@ void ResizeForward::add_input_layout_constraint() {
}
}
void
ResizeForward
::
outshape_by_symvar_do_get_output_shape
(
void
ResizeForward
::
outshape_by_symvar_do_get_output_shape
(
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
{
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
{
TensorShape
oshp2d
;
TensorShape
oshp2d
;
cg
::
copy_tensor_value_to_shape
(
oshp2d
,
*
shpinfo
.
shpval_inp_val
.
at
(
0
));
cg
::
copy_tensor_value_to_shape
(
oshp2d
,
*
shpinfo
.
shpval_inp_val
.
at
(
0
));
auto
imgshp
=
shpinfo
.
shape_inp_shp
.
at
(
0
);
auto
imgshp
=
shpinfo
.
shape_inp_shp
.
at
(
0
);
...
@@ -232,7 +347,7 @@ size_t ResizeForward::get_workspace_size_bytes(
...
@@ -232,7 +347,7 @@ size_t ResizeForward::get_workspace_size_bytes(
megdnn_opr
(),
this
,
input_shapes
,
output_shapes
);
megdnn_opr
(),
this
,
input_shapes
,
output_shapes
);
}
}
void
ResizeForward
::
record_execute_deps
(
ExecDependencyArray
&
deps
)
{
void
ResizeForward
::
record_execute_deps
(
ExecDependencyArray
&
deps
)
{
record_megdnn_opr
(
deps
);
record_megdnn_opr
(
deps
);
}
}
...
@@ -268,19 +383,17 @@ void WarpAffineForward::add_input_layout_constraint() {
...
@@ -268,19 +383,17 @@ void WarpAffineForward::add_input_layout_constraint() {
}
}
void
WarpAffineForward
::
outshape_by_symvar_do_get_output_shape
(
void
WarpAffineForward
::
outshape_by_symvar_do_get_output_shape
(
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
{
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
{
TensorShape
oshp2d
;
TensorShape
oshp2d
;
cg
::
copy_tensor_value_to_shape
(
oshp2d
,
*
shpinfo
.
shpval_inp_val
.
at
(
0
));
cg
::
copy_tensor_value_to_shape
(
oshp2d
,
*
shpinfo
.
shpval_inp_val
.
at
(
0
));
auto
imgshp
=
shpinfo
.
shape_inp_shp
.
at
(
0
),
auto
imgshp
=
shpinfo
.
shape_inp_shp
.
at
(
0
),
matshp
=
shpinfo
.
shape_inp_shp
.
at
(
1
);
matshp
=
shpinfo
.
shape_inp_shp
.
at
(
1
);
mgb_assert
(
mgb_assert
((
imgshp
.
ndim
==
4
||
imgshp
.
ndim
==
5
)
&&
matshp
.
ndim
==
3
&&
(
imgshp
.
ndim
==
4
||
imgshp
.
ndim
==
5
)
&&
matshp
.
ndim
==
3
&&
oshp2d
.
ndim
==
2
&&
oshp2d
.
ndim
==
2
&&
matshp
.
shape
[
0
]
==
imgshp
.
shape
[
0
]
&&
matshp
.
shape
[
0
]
==
imgshp
.
shape
[
0
]
&&
matshp
.
shape
[
1
]
==
2
&&
matshp
.
shape
[
2
]
==
3
,
matshp
.
shape
[
1
]
==
2
&&
matshp
.
shape
[
2
]
==
3
,
"shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s"
,
"shape mismatch for WarpAffineForward: img=%s mat=%s out2d=%s"
,
imgshp
.
to_string
().
c_str
(),
matshp
.
to_string
().
c_str
(),
imgshp
.
to_string
().
c_str
(),
matshp
.
to_string
().
c_str
(),
oshp2d
.
to_string
().
c_str
());
oshp2d
.
to_string
().
c_str
());
size_t
height_idx
=
0
;
size_t
height_idx
=
0
;
if
(
param
().
format
==
Param
::
Format
::
NCHW
)
{
if
(
param
().
format
==
Param
::
Format
::
NCHW
)
{
...
@@ -305,18 +418,19 @@ void WarpAffineForward::init_output_static_infer_desc() {
...
@@ -305,18 +418,19 @@ void WarpAffineForward::init_output_static_infer_desc() {
}
}
void
WarpAffineForward
::
scn_do_execute
()
{
void
WarpAffineForward
::
scn_do_execute
()
{
intl
::
MegDNNOprMethInvoker
<
megdnn
::
WarpAffine
>::
intl
::
MegDNNOprMethInvoker
<
megdnn
::
WarpAffine
>::
exec
(
megdnn_opr
(),
this
);
exec
(
megdnn_opr
(),
this
);
}
}
size_t
WarpAffineForward
::
get_workspace_size_bytes
(
size_t
WarpAffineForward
::
get_workspace_size_bytes
(
const
TensorShapeArray
&
input_shapes
,
const
TensorShapeArray
&
input_shapes
,
const
TensorShapeArray
&
output_shapes
)
const
{
const
TensorShapeArray
&
output_shapes
)
const
{
return
intl
::
MegDNNOprMethInvoker
<
megdnn
::
WarpAffine
>::
return
intl
::
MegDNNOprMethInvoker
<
get_workspace_in_bytes
(
megdnn_opr
(),
this
,
input_shapes
,
output_shapes
);
megdnn
::
WarpAffine
>::
get_workspace_in_bytes
(
megdnn_opr
(),
this
,
input_shapes
,
output_shapes
);
}
}
void
WarpAffineForward
::
record_execute_deps
(
ExecDependencyArray
&
deps
)
{
void
WarpAffineForward
::
record_execute_deps
(
ExecDependencyArray
&
deps
)
{
record_megdnn_opr
(
deps
);
record_megdnn_opr
(
deps
);
}
}
...
@@ -325,7 +439,7 @@ void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) {
...
@@ -325,7 +439,7 @@ void WarpAffineForward::record_execute_deps(ExecDependencyArray &deps) {
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
RemapForward
);
MGB_DYN_TYPE_OBJ_FINAL_IMPL
(
RemapForward
);
MEGDNN_OPR_INIT2
(
RemapForward
,
"remap"
)
MEGDNN_OPR_INIT2
(
RemapForward
,
"remap"
)
void
RemapForward
::
init_output_dtype
(){
void
RemapForward
::
init_output_dtype
()
{
output
(
0
)
->
dtype
(
input
(
0
)
->
dtype
());
output
(
0
)
->
dtype
(
input
(
0
)
->
dtype
());
}
}
...
...
src/opr/impl/imgproc.sereg.h
浏览文件 @
c7b6ef35
...
@@ -37,13 +37,59 @@ namespace serialization {
...
@@ -37,13 +37,59 @@ namespace serialization {
}
}
}
}
};
};
template
<
>
struct
OprMaker
<
opr
::
WarpPerspectiveBackwardData
,
0
>
{
using
Opr
=
opr
::
WarpPerspectiveBackwardData
;
using
Param
=
Opr
::
Param
;
static
cg
::
OperatorNodeBase
*
make
(
const
Param
&
param
,
const
cg
::
VarNodeArray
&
inputs
,
ComputingGraph
&
graph
,
const
OperatorNodeConfig
&
config
)
{
MGB_MARK_USED_VAR
(
graph
);
if
(
inputs
.
size
()
==
3
)
{
return
Opr
::
make
(
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
param
,
config
)
.
node
()
->
owner_opr
();
}
else
{
mgb_assert
(
inputs
.
size
()
==
4
);
return
Opr
::
make
(
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
inputs
[
3
],
param
,
config
)
.
node
()
->
owner_opr
();
}
}
};
template
<
>
struct
OprMaker
<
opr
::
WarpPerspectiveBackwardMat
,
0
>
{
using
Opr
=
opr
::
WarpPerspectiveBackwardMat
;
using
Param
=
Opr
::
Param
;
static
cg
::
OperatorNodeBase
*
make
(
const
Param
&
param
,
const
cg
::
VarNodeArray
&
inputs
,
ComputingGraph
&
graph
,
const
OperatorNodeConfig
&
config
)
{
MGB_MARK_USED_VAR
(
graph
);
if
(
inputs
.
size
()
==
3
)
{
return
Opr
::
make
(
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
param
,
config
)
.
node
()
->
owner_opr
();
}
else
{
mgb_assert
(
inputs
.
size
()
==
4
);
return
Opr
::
make
(
inputs
[
0
],
inputs
[
1
],
inputs
[
2
],
inputs
[
3
],
param
,
config
)
.
node
()
->
owner_opr
();
}
}
};
}
// namespace serialization
}
// namespace serialization
namespace
opr
{
namespace
opr
{
MGB_SEREG_OPR
(
WarpPerspective
,
0
);
MGB_SEREG_OPR
(
WarpPerspective
,
0
);
MGB_SEREG_OPR
(
WarpPerspectiveBackwardData
,
3
);
MGB_SEREG_OPR
(
WarpPerspectiveBackwardData
,
0
);
MGB_SEREG_OPR
(
WarpPerspectiveBackwardMat
,
3
);
MGB_SEREG_OPR
(
WarpPerspectiveBackwardMat
,
0
);
MGB_SEREG_OPR
(
Rotate
,
1
);
MGB_SEREG_OPR
(
Rotate
,
1
);
MGB_SEREG_OPR
(
CvtColor
,
1
);
MGB_SEREG_OPR
(
CvtColor
,
1
);
...
...
src/opr/include/megbrain/opr/imgproc.h
浏览文件 @
c7b6ef35
...
@@ -6,7 +6,8 @@
...
@@ -6,7 +6,8 @@
*
*
* Unless required by applicable law or agreed to in writing,
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied.
*/
*/
#pragma once
#pragma once
...
@@ -33,77 +34,93 @@ namespace opr {
...
@@ -33,77 +34,93 @@ namespace opr {
* Impl note: this operator might have 3 or 4 inputs depending on whether
* Impl note: this operator might have 3 or 4 inputs depending on whether
* \p mat_idx is given
* \p mat_idx is given
*/
*/
MGB_DEFINE_OPR_CLASS
(
WarpPerspectiveForward
,
MGB_DEFINE_OPR_CLASS
(
WarpPerspectiveForward
,
intl
::
WorkspaceSizeInfer
<
intl
::
WorkspaceSizeInfer
<
intl
::
OutshapeBySymvarSCNOpr
<
mixin
::
MegDNNOprHolderImpl
<
intl
::
OutshapeBySymvarSCNOpr
<
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
WarpPerspectiveForward
>>>
)
// {
megdnn
::
WarpPerspectiveForward
>>>
)
// {
public:
public:
WarpPerspectiveForward
(
WarpPerspectiveForward
(
VarNode
*
in_tensor
,
VarNode
*
mat
,
VarNode
*
mat_idx
,
VarNode
*
in_tensor
,
VarNode
*
mat
,
VarNode
*
mat_idx
,
VarNode
*
out_shape
,
const
Param
&
param
,
VarNode
*
out_shape
,
const
OperatorNodeConfig
&
config
);
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
SymbolVar
mat_idx
,
SymbolVar
out_shape
,
const
Param
&
param
=
{},
static
SymbolVar
make
(
SymbolVar
in_tensor
,
const
OperatorNodeConfig
&
config
=
{});
SymbolVar
mat
,
SymbolVar
mat_idx
,
SymbolVar
out_shape
,
const
Param
&
param
=
{},
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
SymbolVar
out_shape
,
const
OperatorNodeConfig
&
config
=
{});
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{})
{
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
return
make
(
in_tensor
,
mat
,
SymbolVar
{},
out_shape
,
param
,
config
);
SymbolVar
out_shape
,
}
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{})
{
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
return
make
(
in_tensor
,
mat
,
SymbolVar
{},
out_shape
,
param
,
config
);
const
TensorShape
&
out_shape
,
const
Param
&
param
=
{},
}
const
OperatorNodeConfig
&
config
=
{})
{
return
make
(
in_tensor
,
mat
,
cg
::
var_from_tensor_shape
(
in_tensor
,
out_shape
),
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
param
,
config
);
const
TensorShape
&
out_shape
,
}
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{})
private:
{
void
init_output_dtype
()
override
;
return
make
(
in_tensor
,
mat
,
void
add_input_layout_constraint
()
override
;
cg
::
var_from_tensor_shape
(
void
init_output_static_infer_desc
()
override
;
in_tensor
,
out_shape
),
param
,
config
);
void
outshape_by_symvar_do_get_output_shape
(
}
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
override
;
private:
void
scn_do_execute
()
override
;
void
init_output_dtype
()
override
;
size_t
get_workspace_size_bytes
(
void
add_input_layout_constraint
()
override
;
const
TensorShapeArray
&
input_shapes
,
void
init_output_static_infer_desc
()
override
;
const
TensorShapeArray
&
output_shapes
)
const
override
;
void
outshape_by_symvar_do_get_output_shape
(
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
override
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
};
// namespace opr
void
scn_do_execute
()
override
;
size_t
get_workspace_size_bytes
(
const
TensorShapeArray
&
input_shapes
,
const
TensorShapeArray
&
output_shapes
)
const
override
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
};
using
WarpPerspective
=
WarpPerspectiveForward
;
using
WarpPerspective
=
WarpPerspectiveForward
;
MGB_DEFINE_OPR_CLASS
(
WarpPerspectiveBackwardData
,
MGB_DEFINE_OPR_CLASS
(
intl
::
MegDNNOprWrapperBwd
<
megdnn
::
WarpPerspectiveBackwardData
>
)
// {
WarpPerspectiveBackwardData
,
public:
intl
::
MegDNNOprWrapperBwd
<
megdnn
::
WarpPerspectiveBackwardData
>
)
// {
WarpPerspectiveBackwardData
(
VarNode
*
mat
,
VarNode
*
out_diff
,
public:
VarNode
*
in_for_shape
,
const
Param
&
param
,
WarpPerspectiveBackwardData
(
VarNode
*
mat
,
VarNode
*
out_diff
,
const
OperatorNodeConfig
&
config
);
VarNode
*
in_for_shape
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
mat
,
SymbolVar
out_diff
,
SymbolVar
in_for_shape
,
const
Param
&
param
=
{},
WarpPerspectiveBackwardData
(
VarNode
*
mat
,
VarNode
*
mat_idx
,
VarNode
*
out_diff
,
const
OperatorNodeConfig
&
config
=
{});
VarNode
*
in_for_shape
,
const
Param
&
param
,
};
const
OperatorNodeConfig
&
config
);
MGB_DEFINE_OPR_CLASS
(
WarpPerspectiveBackwardMat
,
static
SymbolVar
make
(
SymbolVar
mat
,
SymbolVar
out_diff
,
SymbolVar
in_for_shape
,
intl
::
MegDNNOprWrapperBwd
<
megdnn
::
WarpPerspectiveBackwardMat
>
)
// {
const
Param
&
param
=
{},
public
:
const
OperatorNodeConfig
&
config
=
{});
WarpPerspectiveBackwardMat
(
VarNode
*
src
,
VarNode
*
mat
,
VarNode
*
out_diff
,
static
SymbolVar
make
(
SymbolVar
mat
,
SymbolVar
mat_idx
,
SymbolVar
out_diff
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
SymbolVar
in_for_shape
,
const
Param
&
param
=
{},
static
SymbolVar
make
(
const
OperatorNodeConfig
&
config
=
{});
SymbolVar
src
,
SymbolVar
mat
,
SymbolVar
out_diff
,
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{});
void
scn_do_execute
()
override
;
}
;
};
// namespace mgb
MGB_DEFINE_OPR_CLASS
(
WarpPerspectiveBackwardMat
,
intl
::
MegDNNOprWrapperBwd
<
megdnn
::
WarpPerspectiveBackwardMat
>
)
// {
public
:
WarpPerspectiveBackwardMat
(
VarNode
*
src
,
VarNode
*
mat
,
VarNode
*
mat_idx
,
VarNode
*
out_diff
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
src
,
SymbolVar
mat
,
SymbolVar
out_diff
,
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{})
{
return
make
(
src
,
mat
,
{},
out_diff
,
param
,
config
);
}
static
SymbolVar
make
(
SymbolVar
src
,
SymbolVar
mat
,
SymbolVar
mat_idx
,
SymbolVar
out_diff
,
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{});
void
scn_do_execute
()
override
;
}
;
/* ============================= shape infer ============================== */
/* ============================= shape infer ============================== */
//! param: src, dst
//! param: src, dst
...
@@ -116,68 +133,67 @@ using CvtColor = CvtColorForward;
...
@@ -116,68 +133,67 @@ using CvtColor = CvtColorForward;
using
GaussianBlur
=
GaussianBlurForward
;
using
GaussianBlur
=
GaussianBlurForward
;
/* ============================= user set shape =========================== */
/* ============================= user set shape =========================== */
MGB_DEFINE_OPR_CLASS
(
ResizeForward
,
MGB_DEFINE_OPR_CLASS
(
intl
::
WorkspaceSizeInfer
<
ResizeForward
,
intl
::
OutshapeBySymvarSCNOpr
<
mixin
::
MegDNNOprHolderImpl
<
intl
::
WorkspaceSizeInfer
<
intl
::
OutshapeBySymvarSCNOpr
<
megdnn
::
ResizeForward
>>>
)
// {
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
ResizeForward
>>>
)
// {
public
:
public
:
ResizeForward
(
ResizeForward
(
VarNode
*
in_tensor
,
VarNode
*
out_shape
,
const
Param
&
param
,
VarNode
*
in_tensor
,
VarNode
*
out_shape
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
out_shape
,
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
out_shape
,
const
Param
&
param
=
{},
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{});
const
OperatorNodeConfig
&
config
=
{});
static
SymbolVar
make
(
SymbolVar
in_tensor
,
const
TensorShape
&
out_shape
,
static
SymbolVar
make
(
SymbolVar
in_tensor
,
const
TensorShape
&
out_shape
,
const
Param
&
param
=
{},
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{})
{
const
OperatorNodeConfig
&
config
=
{})
return
make
(
in_tensor
,
cg
::
var_from_tensor_shape
(
in_tensor
,
out_shape
),
{
param
,
config
);
return
make
(
in_tensor
,
}
cg
::
var_from_tensor_shape
(
in_tensor
,
out_shape
),
param
,
config
);
private
:
}
void
init_output_dtype
()
override
;
void
add_input_layout_constraint
()
override
;
private
:
void
init_output_static_infer_desc
()
override
;
void
init_output_dtype
()
override
;
void
outshape_by_symvar_do_get_output_shape
(
void
add_input_layout_constraint
()
override
;
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
override
;
void
init_output_static_infer_desc
()
override
;
void
outshape_by_symvar_do_get_output_shape
(
void
scn_do_execute
()
override
;
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
override
;
size_t
get_workspace_size_bytes
(
const
TensorShapeArray
&
input_shapes
,
void
scn_do_execute
()
override
;
const
TensorShapeArray
&
output_shapes
)
const
override
;
size_t
get_workspace_size_bytes
(
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
const
TensorShapeArray
&
input_shapes
,
}
const
TensorShapeArray
&
output_shapes
)
const
override
;
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
}
;
using
Resize
=
ResizeForward
;
using
Resize
=
ResizeForward
;
MGB_DEFINE_OPR_CLASS
(
ResizeBackward
,
MGB_DEFINE_OPR_CLASS
(
ResizeBackward
,
intl
::
MegDNNOprWrapperBwd
<
megdnn
::
ResizeBackward
>
)
// {
intl
::
MegDNNOprWrapperBwd
<
megdnn
::
ResizeBackward
>
)
// {
public
:
public
:
ResizeBackward
(
VarNode
*
out_diff
,
ResizeBackward
(
VarNode
*
out_diff
,
VarNode
*
in_for_shape
,
const
Param
&
param
,
VarNode
*
in_for_shape
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
out_diff
,
static
SymbolVar
make
(
SymbolVar
out_diff
,
SymbolVar
in_for_shape
,
SymbolVar
in_for_shape
,
const
Param
&
param
=
{},
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{});
const
OperatorNodeConfig
&
config
=
{});
}
;
}
;
MGB_DEFINE_OPR_CLASS
(
RemapForward
,
MGB_DEFINE_OPR_CLASS
(
RemapForward
,
intl
::
MegDNNOprWrapperFwd
<
megdnn
::
RemapForward
>
)
// {
intl
::
MegDNNOprWrapperFwd
<
megdnn
::
RemapForward
>
)
// {
public
:
public
:
RemapForward
(
RemapForward
(
VarNode
*
in_tensor
,
VarNode
*
map
,
const
Param
&
param
,
VarNode
*
in_tensor
,
VarNode
*
map
,
const
OperatorNodeConfig
&
config
);
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
map
,
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
map
,
const
Param
&
param
=
{},
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{});
const
OperatorNodeConfig
&
config
=
{});
private
:
private
:
void
init_output_dtype
()
override
;
void
init_output_dtype
()
override
;
}
;
}
;
using
Remap
=
RemapForward
;
using
Remap
=
RemapForward
;
/*!
/*!
...
@@ -191,47 +207,42 @@ using Remap = RemapForward;
...
@@ -191,47 +207,42 @@ using Remap = RemapForward;
* Input mat shape: batch, 2, 2; note that the mat is used to translate output
* Input mat shape: batch, 2, 2; note that the mat is used to translate output
* coordinate onto input coordinate, so it is not inversed.
* coordinate onto input coordinate, so it is not inversed.
*/
*/
MGB_DEFINE_OPR_CLASS
(
WarpAffineForward
,
MGB_DEFINE_OPR_CLASS
(
intl
::
WorkspaceSizeInfer
<
WarpAffineForward
,
intl
::
OutshapeBySymvarSCNOpr
<
mixin
::
MegDNNOprHolderImpl
<
intl
::
WorkspaceSizeInfer
<
intl
::
OutshapeBySymvarSCNOpr
<
megdnn
::
WarpAffineForward
>>>
)
// {
mixin
::
MegDNNOprHolderImpl
<
megdnn
::
WarpAffineForward
>>>
)
// {
public
:
public
:
WarpAffineForward
(
WarpAffineForward
(
VarNode
*
in_tensor
,
VarNode
*
mat
,
VarNode
*
out_shape
,
VarNode
*
in_tensor
,
VarNode
*
mat
,
VarNode
*
out_shape
,
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
const
Param
&
param
,
const
OperatorNodeConfig
&
config
);
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
SymbolVar
out_shape
,
const
Param
&
param
=
{},
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
const
OperatorNodeConfig
&
config
=
{});
SymbolVar
out_shape
,
const
Param
&
param
=
{},
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
const
OperatorNodeConfig
&
config
=
{});
const
TensorShape
&
out_shape
,
const
Param
&
param
=
{},
const
OperatorNodeConfig
&
config
=
{})
{
static
SymbolVar
make
(
SymbolVar
in_tensor
,
SymbolVar
mat
,
return
make
(
in_tensor
,
mat
,
cg
::
var_from_tensor_shape
(
in_tensor
,
out_shape
),
const
TensorShape
&
out_shape
,
param
,
config
);
const
Param
&
param
=
{},
}
const
OperatorNodeConfig
&
config
=
{})
{
private
:
return
make
(
in_tensor
,
mat
,
void
init_output_dtype
()
override
;
cg
::
var_from_tensor_shape
(
void
add_input_layout_constraint
()
override
;
in_tensor
,
out_shape
),
param
,
config
);
void
init_output_static_infer_desc
()
override
;
}
void
outshape_by_symvar_do_get_output_shape
(
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
override
;
private
:
void
init_output_dtype
()
override
;
void
scn_do_execute
()
override
;
void
add_input_layout_constraint
()
override
;
size_t
get_workspace_size_bytes
(
void
init_output_static_infer_desc
()
override
;
const
TensorShapeArray
&
input_shapes
,
void
outshape_by_symvar_do_get_output_shape
(
const
TensorShapeArray
&
output_shapes
)
const
override
;
TensorShape
&
dest
,
const
ShapeInferInfo
&
shpinfo
)
override
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
}
void
scn_do_execute
()
override
;
;
size_t
get_workspace_size_bytes
(
const
TensorShapeArray
&
input_shapes
,
const
TensorShapeArray
&
output_shapes
)
const
override
;
void
record_execute_deps
(
ExecDependencyArray
&
deps
)
override
;
}
;
using
WarpAffine
=
WarpAffineForward
;
using
WarpAffine
=
WarpAffineForward
;
}
// opr
}
// opr
}
// mgb
}
// mgb
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
// vim: syntax=cpp.doxygen foldmethod=marker foldmarker=f{{{,f}}}
src/opr/test/imgproc.cpp
浏览文件 @
c7b6ef35
...
@@ -216,7 +216,10 @@ TEST(TestOprImgproc, WarpPerspectiveWithMatIdx) {
...
@@ -216,7 +216,10 @@ TEST(TestOprImgproc, WarpPerspectiveWithMatIdx) {
.
set_input_generator
(
1
,
gen_mat
)
.
set_input_generator
(
1
,
gen_mat
)
.
set_input_generator
(
2
,
gen_mat_idx
)
.
set_input_generator
(
2
,
gen_mat_idx
)
.
set_input_dtype
(
2
,
dtype
::
Int32
{})
.
set_input_dtype
(
2
,
dtype
::
Int32
{})
/*! it's hard to make the grad check success,
the cuda implementation is grad sum */
.
disable_grad_check
()
.
disable_grad_check
()
.
set_input_allow_grad
(
2
,
false
)
.
run
({
TensorShape
{
N_SRC
,
C
,
4
,
5
},
{
N_MAT
,
3
,
3
},
{
N_MAT
}})
.
run
({
TensorShape
{
N_SRC
,
C
,
4
,
5
},
{
N_MAT
,
3
,
3
},
{
N_MAT
}})
.
run
({
TensorShape
{
N_SRC
,
C
,
6
,
5
},
{
N_MAT
,
3
,
3
},
{
N_MAT
}})
.
run
({
TensorShape
{
N_SRC
,
C
,
6
,
5
},
{
N_MAT
,
3
,
3
},
{
N_MAT
}})
.
run
({
TensorShape
{
N_SRC
,
C
,
22
,
19
},
{
N_MAT
,
3
,
3
},
{
N_MAT
}});
.
run
({
TensorShape
{
N_SRC
,
C
,
22
,
19
},
{
N_MAT
,
3
,
3
},
{
N_MAT
}});
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录