提交 81065cf0 编写于 作者: M Megvii Engine Team

build(mgb/cutlass): merge partial headers

GitOrigin-RevId: 1bc2af604bea52159f8bfed7adcd2049bb900287
上级 d610c987
......@@ -213,7 +213,7 @@ class EmitConv2dInstance:
def __init__(self):
self.template = """
// kernel instance "${operation_name}" generated by cutlass generator
using Convolution =
using Convolution_${operation_name} =
typename cutlass::conv::device::Convolution<
${element_src},
${layout_src},
......@@ -317,7 +317,7 @@ class EmitDeconvInstance:
def __init__(self):
self.template = """
// kernel instance "${operation_name}" generated by cutlass generator
using Convolution =
using Convolution_${operation_name} =
typename cutlass::conv::device::Deconvolution<
${element_src},
${layout_src},
......@@ -419,7 +419,7 @@ class EmitConvolutionBackwardFilterInstance:
def __init__(self):
self.template = """
// kernel instance "${operation_name}" generated by cutlass generator
using Convolution =
using Convolution_${operation_name} =
typename cutlass::conv::device::ConvolutionBackwardFilter<
${element_src},
${layout_src},
......@@ -905,7 +905,7 @@ namespace cutlass {
namespace library {
void initialize_${operation_name}(Manifest &manifest) {
manifest.append(new ${convolution_name}<Convolution>(
manifest.append(new ${convolution_name}<Convolution_${operation_name}>(
"${operation_name}"
));
}
......@@ -929,19 +929,6 @@ void initialize_${operation_name}(Manifest &manifest) {
self.kernel_path, "%s.cu" % self.operation.procedural_name()
)
self.kernel_file = open(self.kernel_path, "w")
self.kernel_file.write(
SubstituteTemplate(
self.header_template,
{
"required_cuda_ver_major": str(
self.operation.required_cuda_ver_major
),
"required_cuda_ver_minor": str(
self.operation.required_cuda_ver_minor
),
},
)
)
return self
#
......@@ -965,7 +952,6 @@ void initialize_${operation_name}(Manifest &manifest) {
#
def __exit__(self, exception_type, exception_value, traceback):
self.kernel_file.write(self.epilogue_template)
self.kernel_file.close()
......
......@@ -1347,19 +1347,6 @@ void initialize_${operation_name}(Manifest &manifest) {
self.kernel_path, "%s.cu" % self.operation.procedural_name()
)
self.kernel_file = open(self.kernel_path, "w")
self.kernel_file.write(
SubstituteTemplate(
self.header_template,
{
"required_cuda_ver_major": str(
self.operation.required_cuda_ver_major
),
"required_cuda_ver_minor": str(
self.operation.required_cuda_ver_minor
),
},
)
)
return self
#
......@@ -1379,7 +1366,6 @@ void initialize_${operation_name}(Manifest &manifest) {
#
def __exit__(self, exception_type, exception_value, traceback):
self.kernel_file.write(self.epilogue_template)
self.kernel_file.close()
......@@ -1435,20 +1421,6 @@ ${operation_instance}
self.kernel_path, "%s.cu" % self.operation.procedural_name()
)
self.kernel_file = open(self.kernel_path, "w")
self.kernel_file.write(
SubstituteTemplate(
self.header_template,
{
"wrapper_path": self.wrapper_path,
"required_cuda_ver_major": str(
self.operation.required_cuda_ver_major
),
"required_cuda_ver_minor": str(
self.operation.required_cuda_ver_minor
),
},
)
)
return self
#
......@@ -1468,7 +1440,6 @@ ${operation_instance}
#
def __exit__(self, exception_type, exception_value, traceback):
self.kernel_file.write(self.epilogue_template)
self.kernel_file.close()
......
......@@ -35,24 +35,31 @@ def write_op_list(f, gen_op, gen_type):
if gen_op != "gemv":
f.write(' "all_%s_%s_operations.cu",\n' % (gen_op, gen_type))
# Write down a list of merged filenames
def write_merge_file_name(f, gen_op, gen_type):
f.write(' "{}_{}_1.cu",\n'.format(gen_op,gen_type))
f.write(' "{}_{}_2.cu",\n'.format(gen_op,gen_type))
if gen_op != "gemv":
f.write(' "all_{}_{}_operations.cu",\n'.format(gen_op,gen_type))
if __name__ == "__main__":
with open("list.bzl", "w") as f:
f.write("# Generated by dnn/scripts/cutlass_generator/gen_list.py\n\n")
f.write("cutlass_gen_list = [\n")
write_op_list(f, "gemm", "simt")
write_op_list(f, "gemm", "tensorop1688")
write_op_list(f, "gemm", "tensorop884")
write_op_list(f, "gemv", "simt")
write_op_list(f, "deconv", "simt")
write_op_list(f, "deconv", "tensorop8816")
write_op_list(f, "conv2d", "simt")
write_op_list(f, "conv2d", "tensorop8816")
write_op_list(f, "conv2d", "tensorop8832")
write_op_list(f, "dwconv2d_fprop", "simt")
write_op_list(f, "dwconv2d_fprop", "tensorop884")
write_op_list(f, "dwconv2d_dgrad", "simt")
write_op_list(f, "dwconv2d_dgrad", "tensorop884")
write_op_list(f, "dwconv2d_wgrad", "simt")
write_op_list(f, "dwconv2d_wgrad", "tensorop884")
write_merge_file_name(f, "gemm", "simt")
write_merge_file_name(f, "gemm", "tensorop1688")
write_merge_file_name(f, "gemm", "tensorop884")
write_merge_file_name(f, "gemv", "simt")
write_merge_file_name(f, "deconv", "simt")
write_merge_file_name(f, "deconv", "tensorop8816")
write_merge_file_name(f, "conv2d", "simt")
write_merge_file_name(f, "conv2d", "tensorop8816")
write_merge_file_name(f, "conv2d", "tensorop8832")
write_merge_file_name(f, "dwconv2d_fprop", "simt")
write_merge_file_name(f, "dwconv2d_fprop", "tensorop884")
write_merge_file_name(f, "dwconv2d_dgrad", "simt")
write_merge_file_name(f, "dwconv2d_dgrad", "tensorop884")
write_merge_file_name(f, "dwconv2d_wgrad", "simt")
write_merge_file_name(f, "dwconv2d_wgrad", "tensorop884")
f.write("]")
......@@ -9,7 +9,7 @@ import os.path
import shutil
import argparse
import platform
import string
from library import *
from manifest import *
......@@ -1657,6 +1657,108 @@ def GenerateGemvOperations(args):
return GenerateGemv_Simt(args)
def concat_file(file_path:str,file_name_first:str,file_name_last:str,head:str,required_cuda_ver_major:str, required_cuda_ver_minor:str, epilogue:str, wrapper_path = None):
import os
meragefiledir = file_path
filenames=os.listdir(meragefiledir)
file1=open(file_path + '/{}_{}_1.cu'.format(file_name_first,file_name_last),'w')
file2=open(file_path + '/{}_{}_2.cu'.format(file_name_first,file_name_last),'w')
if wrapper_path is None:
file1.write(
SubstituteTemplate(
head,
{
"required_cuda_ver_major": str(
required_cuda_ver_major
),
"required_cuda_ver_minor": str(
required_cuda_ver_minor
),
},
)
)
file2.write(
SubstituteTemplate(
head,
{
"required_cuda_ver_major": str(
required_cuda_ver_major
),
"required_cuda_ver_minor": str(
required_cuda_ver_minor
),
},
)
)
else:
file1.write(
SubstituteTemplate(
head,
{
"wrapper_path": wrapper_path,
"required_cuda_ver_major": str(
required_cuda_ver_major
),
"required_cuda_ver_minor": str(
required_cuda_ver_minor
),
},
)
)
file2.write(
SubstituteTemplate(
head,
{
"wrapper_path": wrapper_path,
"required_cuda_ver_major": str(
required_cuda_ver_major
),
"required_cuda_ver_minor": str(
required_cuda_ver_minor
),
},
)
)
flag = 0
if "tensorop" in file_name_last:
sub_string_1 = "tensorop"
sub_string_2 = file_name_last[8:]
else:
sub_string_1 = sub_string_2 = "simt"
if "dwconv2d_" in file_name_first:
file_name_first = file_name_first[:2]+file_name_first[9:]
elif ("conv2d" in file_name_first) or ("deconv" in file_name_first):
file_name_first = "cutlass"
for filename in filenames:
if (file_name_first in filename) and (sub_string_1 in filename) and (sub_string_2 in filename) and ("all_" not in filename):
flag += 1
filepath=meragefiledir+'/'+filename
if flag <= len(filenames)/2:
for line in open(filepath):
file1.writelines(line)
else:
for line in open(filepath):
file2.writelines(line)
os.remove(filepath)
file1.write('\n')
file2.write('\n')
elif filename[0].isdigit() and ("all_" not in filename):
flag += 1
filepath=meragefiledir+'/'+filename
if flag <= len(filenames)/2:
for line in open(filepath):
file1.writelines(line)
else:
for line in open(filepath):
file2.writelines(line)
os.remove(filepath)
file1.write('\n')
file2.write('\n')
file1.write(epilogue)
file2.write(epilogue)
file1.close()
file2.close()
###################################################################################################
###################################################################################################
......@@ -1727,18 +1829,33 @@ if __name__ == "__main__":
args.output, operation, short_path
) as emitter:
emitter.emit()
head = EmitConvSingleKernelWrapper(args.output, operations[0], short_path).header_template
required_cuda_ver_major = operations[0].required_cuda_ver_major
required_cuda_ver_minor = operations[0].required_cuda_ver_minor
epilogue = EmitConvSingleKernelWrapper(args.output, operations[0], short_path).epilogue_template
concat_file(args.output,args.operations, args.type, head,required_cuda_ver_major, required_cuda_ver_minor, epilogue)
elif args.operations == "gemm":
for operation in operations:
with EmitGemmSingleKernelWrapper(
args.output, operation, short_path
) as emitter:
emitter.emit()
head = EmitGemmSingleKernelWrapper(args.output, operations[0], short_path).header_template
required_cuda_ver_major = operations[0].required_cuda_ver_major
required_cuda_ver_minor = operations[0].required_cuda_ver_minor
epilogue = EmitGemmSingleKernelWrapper(args.output, operations[0], short_path).epilogue_template
concat_file(args.output, args.operations, args.type, head,required_cuda_ver_major, required_cuda_ver_minor, epilogue)
elif args.operations == "gemv":
for operation in operations:
with EmitGemvSingleKernelWrapper(
args.output, operation, gemv_wrapper_path, short_path
) as emitter:
emitter.emit()
head = EmitGemvSingleKernelWrapper(args.output, operations[0], gemv_wrapper_path, short_path).header_template
required_cuda_ver_major = operations[0].required_cuda_ver_major
required_cuda_ver_minor = operations[0].required_cuda_ver_minor
epilogue = EmitGemvSingleKernelWrapper(args.output, operations[0], gemv_wrapper_path, short_path).epilogue_template
concat_file(args.output,args.operations, args.type, head,required_cuda_ver_major, required_cuda_ver_minor, epilogue, wrapper_path = gemv_wrapper_path)
if args.operations != "gemv":
GenerateManifest(args, operations, args.output)
......
#pragma once
#include "cutlass/gemm/kernel/default_gemv.h"
#include "cutlass/gemm/kernel/gemv_batched_strided.h"
#include "src/cuda/matrix_mul/cutlass_matrix_mul_wrapper.cuh"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册