From 586e51f6a06fa1b923212111667a34dadf527bc8 Mon Sep 17 00:00:00 2001 From: lingyunli63 Date: Mon, 20 Jul 2020 14:45:03 +0800 Subject: [PATCH] compile gpu ops and use op_build from kernel_exec --- python/akg/ms/gpu/__init__.py | 1 - python/akg/ms/gpu/cast.py | 22 ++-------------------- python/akg/ms/message.py | 17 ++++++++++++++--- python/akg/ms/op_build.py | 10 +++++----- python/akg/utils/dump_cuda_meta.py | 4 ++-- 5 files changed, 23 insertions(+), 31 deletions(-) diff --git a/python/akg/ms/gpu/__init__.py b/python/akg/ms/gpu/__init__.py index 1931881..0cf7937 100644 --- a/python/akg/ms/gpu/__init__.py +++ b/python/akg/ms/gpu/__init__.py @@ -20,7 +20,6 @@ from .equal import gpu_schedule_Equal from .tile import Tile from .tile import gpu_schedule_Tile from .cast import Cast -from .cast import gpu_schedule_Cast from .relu6 import ReLU6, gpu_schedule_ReLU6 from .relu6_grad import ReLU6Grad, gpu_schedule_ReLU6Grad from .squeeze import Squeeze, gpu_schedule_Squeeze diff --git a/python/akg/ms/gpu/cast.py b/python/akg/ms/gpu/cast.py index f6f545f..4ef744d 100644 --- a/python/akg/ms/gpu/cast.py +++ b/python/akg/ms/gpu/cast.py @@ -19,27 +19,9 @@ import logging import akg.tvm from akg.ops.math import cast from akg.topi.generic import schedule_elemwise +import akg.topi as topi +@akg.schedule(topi.cuda.schedule_injective) def Cast(x, dst_type): """cast.""" return cast.cast(x, dst_type) - - -def gpu_schedule_Cast(outs): - """ - gpu schedule for cast. - - Args: - outs (tvm.tensor.Tensor): outputs of compute. - - Returns: - sch (schedule.Schedule): The created schedule. - """ - device = 'cuda' - ctx = akg.tvm.context(device, 0) - if not ctx.exist: - logging.info("Skip because %s is not enabled", device) - return None - with akg.tvm.target.create(device): - sch = schedule_elemwise(outs) - return sch diff --git a/python/akg/ms/message.py b/python/akg/ms/message.py index 4ac057b..89f7276 100644 --- a/python/akg/ms/message.py +++ b/python/akg/ms/message.py @@ -29,8 +29,8 @@ from akg.utils import validation_check as vc_util from akg import composite from akg.tvm import _api_internal from . import cce -from . import op_build_to_func - +from . import gpu +from . import op_build @vc_util.check_input_type(str) def compilewithjson_to_func(json_str): @@ -68,6 +68,17 @@ def compilewithjson_to_func(json_str): if op_func is None: if processor == 'cuda': op_func = getattr(gpu, op_name, None) + input_shapes = [] + input_types = [] + for input_desc in kernel_info['input_desc']: + input_shapes.append(input_desc[0]['shape']) + input_types.append(input_desc[0]['data_type']) + op_attrs = [] + if kernel_info['attr']: + for ext_arg in kernel_info['attr']: + op_attrs.append(ext_arg['value']) + mod = utils.op_build(op_func, input_shapes, input_types, op_attrs, kernel_info['op']) + return True else: op_func = getattr(cce, op_name, None) @@ -121,7 +132,7 @@ def compilewithjson_to_func(json_str): output = [output] tsr = tsr + [i for i in output if utils.TensorUtils.is_output_value(i)] - return op_build_to_func([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs) + return op_build([op_name], output, tsr, schedule_func, processor, kernel_info['op'], attrs) def compilewithjson(json_str): tmp_rst = compilewithjson_to_func(json_str) diff --git a/python/akg/ms/op_build.py b/python/akg/ms/op_build.py index 50b1174..1bb70b0 100644 --- a/python/akg/ms/op_build.py +++ b/python/akg/ms/op_build.py @@ -33,7 +33,6 @@ BINDS = "binds" MS_AKG_DUMP_IR = "MS_AKG_DUMP_IR" MS_AKG_DUMP_CCE = "MS_AKG_DUMP_CCE" MS_DAVINCI_KERNEL_PATH = "./kernel_meta/" -MS_CUDA_KERNEL_PATH = "./cuda_meta/" @vc_util.check_input_type(list, (list, tuple), (list, tuple), (types.FunctionType, type(None)), str, str, dict) @@ -72,10 +71,11 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr """op_build""" if device in ("aicore", "aicpu"): tmp_rst = op_build_to_func(opnames, computes, args, custom_schedule, device, kernel_name, attrs) - return _api_internal._BuildToModule(tmp_rst) + return tmp_rst if device == "cuda": - cuda_path = os.path.realpath(MS_CUDA_KERNEL_PATH) + kernel_meta_path = "./cuda_meta_" + str(os.getpid()) + "/" + cuda_path = os.path.realpath(kernel_meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path) if not opnames: @@ -88,7 +88,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr logging.error("no schedule func found %s", str(schedule_name)) return None - ptx_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".ptx") + ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx") if os.path.exists(ptx_file): os.remove(ptx_file) try: @@ -100,7 +100,7 @@ def op_build(opnames, computes, args, custom_schedule, device, kernel_name, attr foo = akg.tvm.build(s, args, device, name=kernel_name) ptx_code = foo.imported_modules[0].get_source("ptx") file.write(ptx_code) - json_file = os.path.realpath(MS_CUDA_KERNEL_PATH + kernel_name + ".json") + json_file = os.path.realpath(kernel_meta_path + kernel_name + ".json") kernel_info = (ptx_code, json_file, kernel_name) gpu_utils.save_gpu_params(s, args, kernel_info) os.chmod(ptx_file, 0o400) diff --git a/python/akg/utils/dump_cuda_meta.py b/python/akg/utils/dump_cuda_meta.py index 5bdc590..659cb88 100644 --- a/python/akg/utils/dump_cuda_meta.py +++ b/python/akg/utils/dump_cuda_meta.py @@ -85,7 +85,7 @@ def save_gpu_params(s, args, kernel_info): fo.write("}\n") def dump(mod, kernel_name, sch, args): - meta_path = "./cuda_meta/" + meta_path = "./cuda_meta_/" + str(os.getpid()) + "/" cuda_path = os.path.realpath(meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path) @@ -97,4 +97,4 @@ def dump(mod, kernel_name, sch, args): ptx_code = mod.imported_modules[0].get_source('ptx') f.write(ptx_code) param_path = os.path.realpath(meta_path + kernel_name + '.json') - save_gpu_params(sch, args, (ptx_code, param_path, kernel_name)) \ No newline at end of file + save_gpu_params(sch, args, (ptx_code, param_path, kernel_name)) -- GitLab