# Copyright (c) 2021 CINN Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import os import numpy as np import tvm import tvm.contrib.graph_runtime as runtime import tvm.relay.testing from tvm import autotvm, relay, te from tvm.autotvm.tuner import GATuner, GridSearchTuner, RandomTuner, XGBTuner from tvm.contrib.utils import tempdir # To test different ops, change this single-op network. # See https://github.com/apache/incubator-tvm/blob/main/docs/langref/relay_op.rst to get the op list. def get_network_conv2d(): input_shape = [(2, 512, 7, 7), (512, 512, 3, 3)] output_shape = (2, 512, 7, 7) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.conv2d") mod = relay.Function( [x, y], relay.nn.conv2d( x, y, kernel_size=(3, 3), padding=(1, 1), strides=(1, 1) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_conv2d_resnet1(): input_shape = [(2, 3, 224, 224), (64, 3, 7, 7)] output_shape = (2, 64, 112, 112) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.conv2d resnet1") mod = relay.Function( [x, y], relay.nn.conv2d( x, y, kernel_size=(7, 7), padding=(3, 3), strides=(2, 2) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_conv2d_resnet2(): input_shape = [(2, 64, 56, 56), (64, 64, 3, 3)] output_shape = (2, 64, 56, 56) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.conv2d resnet2") mod = relay.Function( [x, y], relay.nn.conv2d( x, y, kernel_size=(3, 3), padding=(1, 1), strides=(1, 1) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_conv2d_resnet3(): input_shape = [(2, 64, 56, 56), (64, 64, 1, 1)] output_shape = (2, 64, 56, 56) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.conv2d resnet2") mod = relay.Function( [x, y], relay.nn.conv2d( x, y, kernel_size=(1, 1), padding=(0, 0), strides=(1, 1) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_conv2d_resnet4(): input_shape = [(2, 64, 56, 56), (128, 64, 1, 1)] output_shape = (2, 128, 28, 28) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.conv2d resnet2") mod = relay.Function( [x, y], relay.nn.conv2d( x, y, kernel_size=(1, 1), padding=(0, 0), strides=(2, 2) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_conv2d_resnet5(): input_shape = [(2, 128, 28, 28), (256, 128, 3, 3)] output_shape = (2, 256, 14, 14) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.conv2d resnet2") mod = relay.Function( [x, y], relay.nn.conv2d( x, y, kernel_size=(3, 3), padding=(1, 1), strides=(2, 2) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_relu(): input_shape = [(2, 512, 112, 112)] output_shape = (2, 512, 112, 112) input_names = ["x"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) print("[Test]Begin building graph with op relay.nn.relu") mod = relay.Function([x], relay.nn.relu(x)) params = [] return mod, params, input_shape, output_shape, input_names def get_network_elementwise(): input_shape = [(64, 64), (64, 64)] output_shape = (64, 64) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.multiply") mod = relay.Function([x, y], relay.multiply(x, y)) params = [] return mod, params, input_shape, output_shape, input_names def get_network_matmul(): input_shape = [(32, 32), (32, 32)] output_shape = (32, 32) input_names = ["x", "y"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) y = relay.Var(input_names[1], tvm.relay.TensorType(input_shape[1])) print("[Test]Begin building graph with op relay.nn.dense (matmul)") mod = relay.Function([x, y], relay.nn.dense(x, y)) params = [] return mod, params, input_shape, output_shape, input_names def get_network_softmax(): input_shape = [(1024, 2048)] output_shape = (1024, 2048) input_names = ["x"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) print("[Test]Begin building graph with op relay.nn.softmax") mod = relay.Function([x], relay.nn.softmax(x)) params = [] return mod, params, input_shape, output_shape, input_names def get_network_pool2d(): input_shape = [(2, 64, 112, 112)] output_shape = (2, 64, 56, 56) input_names = ["x"] x = relay.Var(input_names[0], tvm.relay.TensorType(input_shape[0])) print("[Test]Begin building graph with op relay.nn.max_pool2d") mod = relay.Function( [x], relay.nn.max_pool2d( x, pool_size=(3, 3), strides=(2, 2), padding=(1, 1) ), ) params = [] return mod, params, input_shape, output_shape, input_names def get_network_batchnorm(): data0 = relay.var("data0", relay.TensorType((2, 512, 32, 32), "float32")) bn_gamma = relay.var("bn_gamma1", relay.TensorType((512,), "float32")) bn_beta = relay.var("bn_beta1", relay.TensorType((512,), "float32")) bn_mmean = relay.var("bn_mean1", relay.TensorType((512,), "float32")) bn_mvar = relay.var("bn_var1", relay.TensorType((512,), "float32")) bn = relay.nn.batch_norm(data0, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] input_shape = [(2, 512, 32, 32), (512), (512), (512), (512)] output_shape = (2, 512, 32, 32) input_names = ["data0", "bn_gamma1", "bn_beta1", "bn_mean1", "bn_var1"] print("[Test]Begin building graph with op relay.nn.batch_norm") mod = relay.Function([data0, bn_gamma, bn_beta, bn_mmean, bn_mvar], bn) params = [] return mod, params, input_shape, output_shape, input_names ################################################################## # For CUDA backends, use # :code:`target = "cuda"` # For X86 backends, use # :code:`target = "llvm"` target = "cuda" dtype = "float32" def tune_and_evaluate(func): # extract workloads from relay program mod, params, input_shape, out_shape, input_names = func() runtime_mod = relay.build_module.build(mod, target=target) print("-----GPU code-----") print(runtime_mod.get_lib().imported_modules[0].get_source()) # load parameters ctx = tvm.context(str(target), 0) module = runtime.GraphModule(runtime_mod["default"](ctx)) for index in range(len(input_shape)): data_temp = tvm.nd.array( (np.random.uniform(size=input_shape[index])).astype(dtype) ) module.set_input(input_names[index], data_temp) # evaluate evaluator_preheat = module.module.time_evaluator( "run", ctx, number=10, repeat=10 ) evaluator = module.module.time_evaluator("run", ctx, number=100, repeat=10) prof_res1 = ( np.array(evaluator_preheat().results) * 1000 ) # convert to millisecond print( "[PreHeat]Mean inference time (std dev): {:.4f} ms ({:.4f} ms)".format( np.mean(prof_res1), np.std(prof_res1) ) ) prof_res2 = np.array(evaluator().results) * 1000 # convert to millisecond print( "[Benchmark]Mean inference time (std dev): {:.4f} ms ({:.4f} ms)".format( np.mean(prof_res2), np.std(prof_res2) ) ) # tune_and_evaluate(get_network_pool2d) # tune_and_evaluate(get_network_softmax) # tune_and_evaluate(get_network_matmul) # tune_and_evaluate(get_network_batchnorm) tune_and_evaluate(get_network_relu) # tune_and_evaluate(get_network_elementwise) # tune_and_evaluate(get_network_conv2d_resnet1) # tune_and_evaluate(get_network_conv2d_resnet2) # tune_and_evaluate(get_network_conv2d_resnet3) # tune_and_evaluate(get_network_conv2d_resnet4) # tune_and_evaluate(get_network_conv2d_resnet5) # tune_and_evaluate(get_network_conv2d)