Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
81065cf0
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
81065cf0
编写于
4月 30, 2022
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
build(mgb/cutlass): merge partial headers
GitOrigin-RevId: 1bc2af604bea52159f8bfed7adcd2049bb900287
上级
d610c987
变更
6
显示空白变更内容
内联
并排
Showing
6 changed file
with
146 addition
and
63 deletion
+146
-63
dnn/scripts/cutlass_generator/conv2d_operation.py
dnn/scripts/cutlass_generator/conv2d_operation.py
+4
-18
dnn/scripts/cutlass_generator/gemm_operation.py
dnn/scripts/cutlass_generator/gemm_operation.py
+0
-29
dnn/scripts/cutlass_generator/gen_list.py
dnn/scripts/cutlass_generator/gen_list.py
+22
-15
dnn/scripts/cutlass_generator/generator.py
dnn/scripts/cutlass_generator/generator.py
+118
-1
dnn/scripts/cutlass_generator/list.bzl
dnn/scripts/cutlass_generator/list.bzl
+0
-0
dnn/src/cuda/matrix_mul/cutlass_matrix_mul_wrapper_batched_gemv_strided.cuinl
...mul/cutlass_matrix_mul_wrapper_batched_gemv_strided.cuinl
+2
-0
未找到文件。
dnn/scripts/cutlass_generator/conv2d_operation.py
浏览文件 @
81065cf0
...
...
@@ -213,7 +213,7 @@ class EmitConv2dInstance:
def
__init__
(
self
):
self
.
template
=
"""
// kernel instance "${operation_name}" generated by cutlass generator
using Convolution =
using Convolution
_${operation_name}
=
typename cutlass::conv::device::Convolution<
${element_src},
${layout_src},
...
...
@@ -317,7 +317,7 @@ class EmitDeconvInstance:
def
__init__
(
self
):
self
.
template
=
"""
// kernel instance "${operation_name}" generated by cutlass generator
using Convolution =
using Convolution
_${operation_name}
=
typename cutlass::conv::device::Deconvolution<
${element_src},
${layout_src},
...
...
@@ -419,7 +419,7 @@ class EmitConvolutionBackwardFilterInstance:
def
__init__
(
self
):
self
.
template
=
"""
// kernel instance "${operation_name}" generated by cutlass generator
using Convolution =
using Convolution
_${operation_name}
=
typename cutlass::conv::device::ConvolutionBackwardFilter<
${element_src},
${layout_src},
...
...
@@ -905,7 +905,7 @@ namespace cutlass {
namespace library {
void initialize_${operation_name}(Manifest &manifest) {
manifest.append(new ${convolution_name}<Convolution>(
manifest.append(new ${convolution_name}<Convolution
_${operation_name}
>(
"${operation_name}"
));
}
...
...
@@ -929,19 +929,6 @@ void initialize_${operation_name}(Manifest &manifest) {
self
.
kernel_path
,
"%s.cu"
%
self
.
operation
.
procedural_name
()
)
self
.
kernel_file
=
open
(
self
.
kernel_path
,
"w"
)
self
.
kernel_file
.
write
(
SubstituteTemplate
(
self
.
header_template
,
{
"required_cuda_ver_major"
:
str
(
self
.
operation
.
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
self
.
operation
.
required_cuda_ver_minor
),
},
)
)
return
self
#
...
...
@@ -965,7 +952,6 @@ void initialize_${operation_name}(Manifest &manifest) {
#
def
__exit__
(
self
,
exception_type
,
exception_value
,
traceback
):
self
.
kernel_file
.
write
(
self
.
epilogue_template
)
self
.
kernel_file
.
close
()
...
...
dnn/scripts/cutlass_generator/gemm_operation.py
浏览文件 @
81065cf0
...
...
@@ -1347,19 +1347,6 @@ void initialize_${operation_name}(Manifest &manifest) {
self
.
kernel_path
,
"%s.cu"
%
self
.
operation
.
procedural_name
()
)
self
.
kernel_file
=
open
(
self
.
kernel_path
,
"w"
)
self
.
kernel_file
.
write
(
SubstituteTemplate
(
self
.
header_template
,
{
"required_cuda_ver_major"
:
str
(
self
.
operation
.
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
self
.
operation
.
required_cuda_ver_minor
),
},
)
)
return
self
#
...
...
@@ -1379,7 +1366,6 @@ void initialize_${operation_name}(Manifest &manifest) {
#
def
__exit__
(
self
,
exception_type
,
exception_value
,
traceback
):
self
.
kernel_file
.
write
(
self
.
epilogue_template
)
self
.
kernel_file
.
close
()
...
...
@@ -1435,20 +1421,6 @@ ${operation_instance}
self
.
kernel_path
,
"%s.cu"
%
self
.
operation
.
procedural_name
()
)
self
.
kernel_file
=
open
(
self
.
kernel_path
,
"w"
)
self
.
kernel_file
.
write
(
SubstituteTemplate
(
self
.
header_template
,
{
"wrapper_path"
:
self
.
wrapper_path
,
"required_cuda_ver_major"
:
str
(
self
.
operation
.
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
self
.
operation
.
required_cuda_ver_minor
),
},
)
)
return
self
#
...
...
@@ -1468,7 +1440,6 @@ ${operation_instance}
#
def
__exit__
(
self
,
exception_type
,
exception_value
,
traceback
):
self
.
kernel_file
.
write
(
self
.
epilogue_template
)
self
.
kernel_file
.
close
()
...
...
dnn/scripts/cutlass_generator/gen_list.py
浏览文件 @
81065cf0
...
...
@@ -35,24 +35,31 @@ def write_op_list(f, gen_op, gen_type):
if
gen_op
!=
"gemv"
:
f
.
write
(
' "all_%s_%s_operations.cu",
\n
'
%
(
gen_op
,
gen_type
))
# Write down a list of merged filenames
def
write_merge_file_name
(
f
,
gen_op
,
gen_type
):
f
.
write
(
' "{}_{}_1.cu",
\n
'
.
format
(
gen_op
,
gen_type
))
f
.
write
(
' "{}_{}_2.cu",
\n
'
.
format
(
gen_op
,
gen_type
))
if
gen_op
!=
"gemv"
:
f
.
write
(
' "all_{}_{}_operations.cu",
\n
'
.
format
(
gen_op
,
gen_type
))
if
__name__
==
"__main__"
:
with
open
(
"list.bzl"
,
"w"
)
as
f
:
f
.
write
(
"# Generated by dnn/scripts/cutlass_generator/gen_list.py
\n\n
"
)
f
.
write
(
"cutlass_gen_list = [
\n
"
)
write_op_list
(
f
,
"gemm"
,
"simt"
)
write_op_list
(
f
,
"gemm"
,
"tensorop1688"
)
write_op_list
(
f
,
"gemm"
,
"tensorop884"
)
write_op_list
(
f
,
"gemv"
,
"simt"
)
write_op_list
(
f
,
"deconv"
,
"simt"
)
write_op_list
(
f
,
"deconv"
,
"tensorop8816"
)
write_op_list
(
f
,
"conv2d"
,
"simt"
)
write_op_list
(
f
,
"conv2d"
,
"tensorop8816"
)
write_op_list
(
f
,
"conv2d"
,
"tensorop8832"
)
write_op_list
(
f
,
"dwconv2d_fprop"
,
"simt"
)
write_op_list
(
f
,
"dwconv2d_fprop"
,
"tensorop884"
)
write_op_list
(
f
,
"dwconv2d_dgrad"
,
"simt"
)
write_op_list
(
f
,
"dwconv2d_dgrad"
,
"tensorop884"
)
write_op_list
(
f
,
"dwconv2d_wgrad"
,
"simt"
)
write_op_list
(
f
,
"dwconv2d_wgrad"
,
"tensorop884"
)
write_merge_file_name
(
f
,
"gemm"
,
"simt"
)
write_merge_file_name
(
f
,
"gemm"
,
"tensorop1688"
)
write_merge_file_name
(
f
,
"gemm"
,
"tensorop884"
)
write_merge_file_name
(
f
,
"gemv"
,
"simt"
)
write_merge_file_name
(
f
,
"deconv"
,
"simt"
)
write_merge_file_name
(
f
,
"deconv"
,
"tensorop8816"
)
write_merge_file_name
(
f
,
"conv2d"
,
"simt"
)
write_merge_file_name
(
f
,
"conv2d"
,
"tensorop8816"
)
write_merge_file_name
(
f
,
"conv2d"
,
"tensorop8832"
)
write_merge_file_name
(
f
,
"dwconv2d_fprop"
,
"simt"
)
write_merge_file_name
(
f
,
"dwconv2d_fprop"
,
"tensorop884"
)
write_merge_file_name
(
f
,
"dwconv2d_dgrad"
,
"simt"
)
write_merge_file_name
(
f
,
"dwconv2d_dgrad"
,
"tensorop884"
)
write_merge_file_name
(
f
,
"dwconv2d_wgrad"
,
"simt"
)
write_merge_file_name
(
f
,
"dwconv2d_wgrad"
,
"tensorop884"
)
f
.
write
(
"]"
)
dnn/scripts/cutlass_generator/generator.py
浏览文件 @
81065cf0
...
...
@@ -9,7 +9,7 @@ import os.path
import
shutil
import
argparse
import
platform
import
string
from
library
import
*
from
manifest
import
*
...
...
@@ -1657,6 +1657,108 @@ def GenerateGemvOperations(args):
return
GenerateGemv_Simt
(
args
)
def
concat_file
(
file_path
:
str
,
file_name_first
:
str
,
file_name_last
:
str
,
head
:
str
,
required_cuda_ver_major
:
str
,
required_cuda_ver_minor
:
str
,
epilogue
:
str
,
wrapper_path
=
None
):
import
os
meragefiledir
=
file_path
filenames
=
os
.
listdir
(
meragefiledir
)
file1
=
open
(
file_path
+
'/{}_{}_1.cu'
.
format
(
file_name_first
,
file_name_last
),
'w'
)
file2
=
open
(
file_path
+
'/{}_{}_2.cu'
.
format
(
file_name_first
,
file_name_last
),
'w'
)
if
wrapper_path
is
None
:
file1
.
write
(
SubstituteTemplate
(
head
,
{
"required_cuda_ver_major"
:
str
(
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
required_cuda_ver_minor
),
},
)
)
file2
.
write
(
SubstituteTemplate
(
head
,
{
"required_cuda_ver_major"
:
str
(
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
required_cuda_ver_minor
),
},
)
)
else
:
file1
.
write
(
SubstituteTemplate
(
head
,
{
"wrapper_path"
:
wrapper_path
,
"required_cuda_ver_major"
:
str
(
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
required_cuda_ver_minor
),
},
)
)
file2
.
write
(
SubstituteTemplate
(
head
,
{
"wrapper_path"
:
wrapper_path
,
"required_cuda_ver_major"
:
str
(
required_cuda_ver_major
),
"required_cuda_ver_minor"
:
str
(
required_cuda_ver_minor
),
},
)
)
flag
=
0
if
"tensorop"
in
file_name_last
:
sub_string_1
=
"tensorop"
sub_string_2
=
file_name_last
[
8
:]
else
:
sub_string_1
=
sub_string_2
=
"simt"
if
"dwconv2d_"
in
file_name_first
:
file_name_first
=
file_name_first
[:
2
]
+
file_name_first
[
9
:]
elif
(
"conv2d"
in
file_name_first
)
or
(
"deconv"
in
file_name_first
):
file_name_first
=
"cutlass"
for
filename
in
filenames
:
if
(
file_name_first
in
filename
)
and
(
sub_string_1
in
filename
)
and
(
sub_string_2
in
filename
)
and
(
"all_"
not
in
filename
):
flag
+=
1
filepath
=
meragefiledir
+
'/'
+
filename
if
flag
<=
len
(
filenames
)
/
2
:
for
line
in
open
(
filepath
):
file1
.
writelines
(
line
)
else
:
for
line
in
open
(
filepath
):
file2
.
writelines
(
line
)
os
.
remove
(
filepath
)
file1
.
write
(
'
\n
'
)
file2
.
write
(
'
\n
'
)
elif
filename
[
0
].
isdigit
()
and
(
"all_"
not
in
filename
):
flag
+=
1
filepath
=
meragefiledir
+
'/'
+
filename
if
flag
<=
len
(
filenames
)
/
2
:
for
line
in
open
(
filepath
):
file1
.
writelines
(
line
)
else
:
for
line
in
open
(
filepath
):
file2
.
writelines
(
line
)
os
.
remove
(
filepath
)
file1
.
write
(
'
\n
'
)
file2
.
write
(
'
\n
'
)
file1
.
write
(
epilogue
)
file2
.
write
(
epilogue
)
file1
.
close
()
file2
.
close
()
###################################################################################################
###################################################################################################
...
...
@@ -1727,18 +1829,33 @@ if __name__ == "__main__":
args
.
output
,
operation
,
short_path
)
as
emitter
:
emitter
.
emit
()
head
=
EmitConvSingleKernelWrapper
(
args
.
output
,
operations
[
0
],
short_path
).
header_template
required_cuda_ver_major
=
operations
[
0
].
required_cuda_ver_major
required_cuda_ver_minor
=
operations
[
0
].
required_cuda_ver_minor
epilogue
=
EmitConvSingleKernelWrapper
(
args
.
output
,
operations
[
0
],
short_path
).
epilogue_template
concat_file
(
args
.
output
,
args
.
operations
,
args
.
type
,
head
,
required_cuda_ver_major
,
required_cuda_ver_minor
,
epilogue
)
elif
args
.
operations
==
"gemm"
:
for
operation
in
operations
:
with
EmitGemmSingleKernelWrapper
(
args
.
output
,
operation
,
short_path
)
as
emitter
:
emitter
.
emit
()
head
=
EmitGemmSingleKernelWrapper
(
args
.
output
,
operations
[
0
],
short_path
).
header_template
required_cuda_ver_major
=
operations
[
0
].
required_cuda_ver_major
required_cuda_ver_minor
=
operations
[
0
].
required_cuda_ver_minor
epilogue
=
EmitGemmSingleKernelWrapper
(
args
.
output
,
operations
[
0
],
short_path
).
epilogue_template
concat_file
(
args
.
output
,
args
.
operations
,
args
.
type
,
head
,
required_cuda_ver_major
,
required_cuda_ver_minor
,
epilogue
)
elif
args
.
operations
==
"gemv"
:
for
operation
in
operations
:
with
EmitGemvSingleKernelWrapper
(
args
.
output
,
operation
,
gemv_wrapper_path
,
short_path
)
as
emitter
:
emitter
.
emit
()
head
=
EmitGemvSingleKernelWrapper
(
args
.
output
,
operations
[
0
],
gemv_wrapper_path
,
short_path
).
header_template
required_cuda_ver_major
=
operations
[
0
].
required_cuda_ver_major
required_cuda_ver_minor
=
operations
[
0
].
required_cuda_ver_minor
epilogue
=
EmitGemvSingleKernelWrapper
(
args
.
output
,
operations
[
0
],
gemv_wrapper_path
,
short_path
).
epilogue_template
concat_file
(
args
.
output
,
args
.
operations
,
args
.
type
,
head
,
required_cuda_ver_major
,
required_cuda_ver_minor
,
epilogue
,
wrapper_path
=
gemv_wrapper_path
)
if
args
.
operations
!=
"gemv"
:
GenerateManifest
(
args
,
operations
,
args
.
output
)
...
...
dnn/scripts/cutlass_generator/list.bzl
浏览文件 @
81065cf0
此差异由.gitattributes 抑制。
dnn/src/cuda/matrix_mul/cutlass_matrix_mul_wrapper_batched_gemv_strided.cuinl
浏览文件 @
81065cf0
#pragma once
#include "cutlass/gemm/kernel/default_gemv.h"
#include "cutlass/gemm/kernel/gemv_batched_strided.h"
#include "src/cuda/matrix_mul/cutlass_matrix_mul_wrapper.cuh"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录