Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
78fff72a
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
411
Star
4707
Fork
583
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
78fff72a
编写于
11月 18, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(dnn): add param_pack for rocm
GitOrigin-RevId: 2180504c714a52974f9d96fe49597415a9f1c8fe
上级
6de3e4ba
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
355 addition
and
0 deletion
+355
-0
dnn/src/rocm/handle.cpp
dnn/src/rocm/handle.cpp
+2
-0
dnn/src/rocm/param_pack/opr_impl.cpp
dnn/src/rocm/param_pack/opr_impl.cpp
+65
-0
dnn/src/rocm/param_pack/opr_impl.h
dnn/src/rocm/param_pack/opr_impl.h
+35
-0
dnn/src/rocm/param_pack/param_pack.cpp.hip
dnn/src/rocm/param_pack/param_pack.cpp.hip
+67
-0
dnn/src/rocm/param_pack/param_pack.h.hip
dnn/src/rocm/param_pack/param_pack.h.hip
+30
-0
dnn/test/rocm/param_pack.cpp
dnn/test/rocm/param_pack.cpp
+156
-0
未找到文件。
dnn/src/rocm/handle.cpp
浏览文件 @
78fff72a
...
...
@@ -36,6 +36,7 @@
#include "src/rocm/argmxx/opr_impl.h"
#include "src/rocm/sleep/opr_impl.h"
#include "src/rocm/batch_normalization/opr_impl.h"
#include "src/rocm/param_pack/opr_impl.h"
#include <miopen/version.h>
#include <hip/hip_version.h>
...
...
@@ -174,6 +175,7 @@ MEGDNN_SPECIALIZE_CREATE_OPERATOR(ArgminForward);
MEGDNN_SPECIALIZE_CREATE_OPERATOR
(
SleepForward
);
MEGDNN_SPECIALIZE_CREATE_OPERATOR
(
BNForward
);
MEGDNN_SPECIALIZE_CREATE_OPERATOR
(
BNBackward
);
MEGDNN_SPECIALIZE_CREATE_OPERATOR
(
ParamPackConcat
);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpragmas"
...
...
dnn/src/rocm/param_pack/opr_impl.cpp
0 → 100644
浏览文件 @
78fff72a
/**
* \file dnn/src/rocm/param_pack/opr_impl.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "hcc_detail/hcc_defs_prologue.h"
#include "src/rocm/param_pack/opr_impl.h"
#include "src/rocm/param_pack/param_pack.h.hip"
#include "src/rocm/utils.h"
namespace
megdnn
{
namespace
rocm
{
size_t
ParamPackConcatImpl
::
get_workspace_in_bytes
(
const
TensorShapeArray
&
srcs
,
const
TensorShape
&
,
const
TensorShape
&
)
{
return
sizeof
(
size_t
)
*
srcs
.
size
();
}
template
<
typename
T
>
void
ParamPackConcatImpl
::
exec_internal
(
_megdnn_tensor_in
srcs
,
_megdnn_tensor_in
offsets
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
size_t
inp_size
=
srcs
.
layout
.
shape
[
0
],
out_size
=
dst
.
layout
.
total_nr_elems
();
auto
stream
=
hip_stream
(
this
->
handle
());
auto
src_cpu
=
static_cast
<
const
T
**>
(
srcs
.
raw_ptr
);
megdnn_assert_internal
(
src_cpu
);
auto
src_gpu
=
reinterpret_cast
<
const
T
**>
(
workspace
.
raw_ptr
);
auto
offsets_gpu
=
offsets
.
ptr
<
int32_t
>
();
hip_check
(
hipMemcpyAsync
(
src_gpu
,
src_cpu
,
sizeof
(
const
T
*
)
*
inp_size
,
hipMemcpyHostToDevice
,
stream
));
param_pack
::
concat_proxy
<
T
>
(
src_gpu
,
dst
.
ptr
<
T
>
(),
inp_size
,
out_size
,
offsets_gpu
,
stream
);
}
void
ParamPackConcatImpl
::
exec
(
_megdnn_tensor_in
srcs
,
_megdnn_tensor_in
offsets
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
{
check_exec
(
dst
.
layout
,
offsets
.
layout
,
srcs
.
layout
);
#define cb(DType) \
if (dst.layout.dtype == DType()) { \
using ctype = typename DTypeTrait<DType>::ctype; \
exec_internal<ctype>(srcs, offsets, dst, workspace); \
return; \
}
MEGDNN_FOREACH_COMPUTING_DTYPE
(
cb
)
megdnn_throw
(
"bad type"
);
#undef cb
}
}
// namespace rocm
}
// namespace megdnn
dnn/src/rocm/param_pack/opr_impl.h
0 → 100644
浏览文件 @
78fff72a
/**
* \file dnn/src/rocm/param_pack/opr_impl.h
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "megdnn/oprs.h"
namespace
megdnn
{
namespace
rocm
{
class
ParamPackConcatImpl
final
:
public
ParamPackConcat
{
public:
using
ParamPackConcat
::
ParamPackConcat
;
void
exec
(
_megdnn_tensor_in
srcs
,
_megdnn_tensor_in
table
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
)
override
;
size_t
get_workspace_in_bytes
(
const
TensorShapeArray
&
srcs
,
const
TensorShape
&
table
,
const
TensorShape
&
dst
)
override
;
private:
template
<
typename
T
>
void
exec_internal
(
_megdnn_tensor_in
srcs
,
_megdnn_tensor_in
table
,
_megdnn_tensor_out
dst
,
_megdnn_workspace
workspace
);
};
}
// namespace rocm
}
// namespace megdnn
dnn/src/rocm/param_pack/param_pack.cpp.hip
0 → 100644
浏览文件 @
78fff72a
/**
* \file dnn/src/rocm/param_pack/param_pack.cpp.hip
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "hcc_detail/hcc_defs_prologue.h"
#include "src/rocm/param_pack/param_pack.h.hip"
#include "megdnn/dtype.h"
#include "src/rocm/utils.h.hip"
namespace
megdnn
{
namespace
rocm
{
namespace
param_pack
{
template
<
typename
T
>
__global__
void
concat_kernel
(
const
T
**
srcs
,
T
*
dst
,
const
int32_t
*
offsets
,
size_t
srcs_size
,
size_t
total_size
)
{
size_t
addr
=
threadIdx
.
x
+
blockIdx
.
x
*
blockDim
.
x
;
if
(
addr
<
total_size
)
{
size_t
l
=
0
,
r
=
srcs_size
-
1
,
mid
;
while
(
l
<
r
)
{
mid
=
(
l
+
r
)
>>
1
;
if
(
offsets
[(
mid
<<
1
)
+
1
]
>
addr
)
{
r
=
mid
;
}
else
{
l
=
mid
+
1
;
}
}
if
(
addr
<
offsets
[
l
<<
1
])
dst
[
addr
]
=
0
;
else
dst
[
addr
]
=
srcs
[
l
][
addr
-
offsets
[
l
<<
1
]];
}
}
template
<
typename
T
>
void
concat_proxy
(
const
T
**
srcs
,
T
*
dst
,
size_t
srcs_size
,
size_t
total_size
,
const
int32_t
*
offsets
,
hipStream_t
stream
)
{
size_t
NR_BLOCKS
=
DIVUP
(
total_size
,
NR_THREADS
);
hipLaunchKernelGGL
(
concat_kernel
,
NR_BLOCKS
,
NR_THREADS
,
0
,
stream
,
srcs
,
dst
,
offsets
,
srcs_size
,
total_size
);
after_kernel_launch
();
}
#define INST(T) \
template void concat_proxy<T>(const T**, T*, size_t, size_t, \
const int32_t*, \
hipStream_t);
#define cb(DType) INST(typename DTypeTrait<DType>::ctype)
MEGDNN_FOREACH_COMPUTING_DTYPE
(
cb
)
#undef cb
#undef INST
}
// namespace param_pack
}
// namespace hip
}
// namespace megdnn
// vim: syntax=cpp.doxygen
dnn/src/rocm/param_pack/param_pack.h.hip
0 → 100644
浏览文件 @
78fff72a
/**
* \file dnn/src/rocm/param_pack/param_pack.h.hip
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#pragma once
#include "hip_header.h"
#include <stdint.h>
#include <stdio.h>
namespace
megdnn
{
namespace
rocm
{
namespace
param_pack
{
template
<
typename
T
>
void
concat_proxy
(
const
T
**
srcs
,
T
*
dst
,
size_t
srcs_size
,
size_t
total_size
,
const
int32_t
*
offsets
,
hipStream_t
stream
);
}
// namespace param_pack
}
// namespace rocm
}
// namespace megdnn
// vim: syntax=cpp.doxygen
dnn/test/rocm/param_pack.cpp
0 → 100644
浏览文件 @
78fff72a
/**
* \file dnn/test/rocm/param_pack.cpp
* MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
*
* Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
*/
#include "hcc_detail/hcc_defs_prologue.h"
#include "test/rocm/fixture.h"
#include "test/common/checker.h"
#include "test/common/utils.h"
using
namespace
megdnn
;
using
namespace
test
;
namespace
{
template
<
class
T
>
std
::
vector
<
int32_t
>
create_offsets
(
const
TensorShapeArray
&
shapes
,
size_t
alignment
)
{
size_t
dtype_size
=
sizeof
(
T
);
if
(
alignment
<
dtype_size
)
alignment
=
dtype_size
;
alignment
/=
dtype_size
;
auto
get_aligned
=
[
alignment
](
size_t
v
)
{
auto
mod
=
v
&
(
alignment
-
1
);
return
v
+
((
alignment
-
mod
)
&
(
alignment
-
1
));
};
std
::
vector
<
dt_int32
>
offsets
(
shapes
.
size
()
<<
1
);
size_t
offset
=
0
;
for
(
size_t
i
=
0
;
i
<
shapes
.
size
();
i
++
)
{
offset
=
get_aligned
(
offset
);
offsets
[
i
<<
1
]
=
offset
;
offset
+=
shapes
[
i
].
total_nr_elems
();
offsets
[(
i
<<
1
)
+
1
]
=
offset
;
}
return
offsets
;
}
template
<
class
T
>
std
::
vector
<
T
>
create_pack
(
size_t
pack_size
,
const
std
::
vector
<
int32_t
>&
offsets
,
const
std
::
vector
<
std
::
vector
<
T
>>&
ptr
)
{
megdnn_assert
(
pack_size
==
static_cast
<
size_t
>
(
offsets
.
back
()));
std
::
vector
<
T
>
data
(
pack_size
,
0
);
for
(
size_t
i
=
0
;
i
*
2
<
offsets
.
size
();
++
i
)
{
size_t
begin
=
offsets
[
i
*
2
],
end
=
offsets
[
i
*
2
+
1
];
for
(
size_t
j
=
0
;
j
<
end
-
begin
;
j
++
)
data
[
begin
+
j
]
=
ptr
[
i
][
j
];
}
return
data
;
}
template
<
class
T
>
std
::
vector
<
std
::
vector
<
T
>>
create_params
(
size_t
nr_params
,
const
TensorShapeArray
&
shapes
)
{
std
::
vector
<
std
::
vector
<
T
>>
params
;
for
(
size_t
i
=
0
;
i
<
nr_params
;
++
i
)
{
std
::
vector
<
T
>
expected_data
;
for
(
size_t
x
=
0
;
x
<
shapes
[
i
].
total_nr_elems
();
++
x
)
{
expected_data
.
push_back
(
rand
());
}
params
.
push_back
(
std
::
move
(
expected_data
));
}
return
params
;
}
template
<
class
T
>
T
*
create_device_data
(
Handle
*
handle
,
const
T
*
data
,
size_t
size
)
{
T
*
data_device
=
static_cast
<
T
*>
(
test
::
megdnn_malloc
(
handle
,
size
*
sizeof
(
T
)));
if
(
data
)
test
::
megdnn_memcpy_H2D
(
handle
,
data_device
,
data
,
size
*
sizeof
(
T
));
return
data_device
;
}
template
<
class
T
>
void
test_param_pack_concat
(
Handle
*
handle
,
const
TensorShapeArray
&
shapes
,
DType
type
)
{
auto
concat
=
handle
->
create_operator
<
ParamPackConcat
>
();
size_t
nr_params
=
shapes
.
size
();
std
::
vector
<
T
*>
param_ptrs
;
std
::
vector
<
std
::
vector
<
T
>>
params
=
create_params
<
T
>
(
nr_params
,
shapes
);
for
(
size_t
i
=
0
;
i
<
nr_params
;
++
i
)
{
param_ptrs
.
push_back
(
create_device_data
<
T
>
(
handle
,
params
[
i
].
data
(),
shapes
[
i
].
total_nr_elems
()));
}
std
::
vector
<
int32_t
>
offsets
=
create_offsets
<
T
>
(
shapes
,
handle
->
alignment_requirement
());
size_t
pack_size
=
offsets
.
back
();
int32_t
*
offsets_gpu
=
create_device_data
<
int32_t
>
(
handle
,
offsets
.
data
(),
offsets
.
size
());
std
::
vector
<
T
>
expected_pack
=
create_pack
<
T
>
(
pack_size
,
offsets
,
params
);
T
*
pack_gpu
=
create_device_data
<
T
>
(
handle
,
nullptr
,
expected_pack
.
size
());
TensorLayout
dst_layout
({
pack_size
},
type
);
TensorND
dst_tensor
(
pack_gpu
,
dst_layout
);
TensorLayout
offsets_layout
({
offsets
.
size
()},
dtype
::
Int32
());
TensorND
offsets_tensor
(
offsets_gpu
,
offsets_layout
);
test
::
WorkspaceWrapper
workspace
(
handle
,
concat
->
get_workspace_in_bytes
(
shapes
,
offsets_layout
,
{
pack_size
}));
TensorND
src_tensor
(
param_ptrs
.
data
(),
TensorLayout
({
nr_params
},
dtype
::
Int32
()));
concat
->
exec
(
src_tensor
,
offsets_tensor
,
dst_tensor
,
workspace
.
workspace
());
// check
T
*
actual_pack
=
static_cast
<
T
*>
(
malloc
(
pack_size
*
sizeof
(
T
)));
test
::
megdnn_memcpy_D2H
(
handle
,
actual_pack
,
pack_gpu
,
sizeof
(
T
)
*
pack_size
);
for
(
size_t
i
=
0
;
i
<
pack_size
;
++
i
)
{
ASSERT_EQ
(
actual_pack
[
i
],
expected_pack
[
i
]);
}
free
(
actual_pack
);
test
::
megdnn_free
(
handle
,
pack_gpu
);
test
::
megdnn_free
(
handle
,
offsets_gpu
);
for
(
auto
ptr
:
param_ptrs
)
{
test
::
megdnn_free
(
handle
,
ptr
);
}
}
}
// namespace
TEST_F
(
ROCM
,
PARAM_PACK
)
{
SmallVector
<
TensorShapeArray
>
shapes_vec
;
shapes_vec
.
push_back
({{
1
}});
shapes_vec
.
push_back
({{
129
},
{
21
}});
shapes_vec
.
push_back
({{
15
},
{
21
},
{
34
}});
shapes_vec
.
push_back
({{
1
,
2
},
{
3
,
5
},
{
5
,
8
},
{
7
,
11
},
{
9
,
14
}});
shapes_vec
.
push_back
({{
1
,
2
},
{
3
,
5
},
{
1
},
{
3
,
3
,
3
,
4
},
{
71
},
{
9
,
14
},
{
111
,
111
,
111
},
{
128
,
128
,
128
}});
for
(
auto
shapes
:
shapes_vec
)
{
test_param_pack_concat
<
int32_t
>
(
handle_rocm
(),
shapes
,
dtype
::
Int32
());
test_param_pack_concat
<
int16_t
>
(
handle_rocm
(),
shapes
,
dtype
::
Int16
());
test_param_pack_concat
<
float
>
(
handle_rocm
(),
shapes
,
dtype
::
Float32
());
}
}
// vim: syntax=cpp.doxygen
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录