mace_tools.py

# Copyright 2018 Xiaomi, Inc.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# python tools/mace_tools.py \
#     --config=tools/example.yaml \
#     --round=100 \
#     --mode=all

import argparse
import enum
import filelock
import hashlib
import os
import sh
import subprocess
import sys
import urllib
import yaml
import re

import common
import sh_commands

from ConfigParser import ConfigParser


def get_target_socs(configs):
    if "host" in configs["target_abis"]:
        return [""]
    else:
        available_socs = sh_commands.adb_get_all_socs()
        target_socs = available_socs
        if hasattr(configs, "target_socs"):
            target_socs = set(configs["target_socs"])
            target_socs = target_socs & available_socs

        if FLAGS.target_socs != "all":
            socs = set(FLAGS.target_socs.split(','))
            target_socs = target_socs & socs
            missing_socs = socs.difference(target_socs)
            if len(missing_socs) > 0:
                print(
                    "Error: devices with SoCs are not connected %s" %
                    missing_socs)
                exit(1)

        if not target_socs:
            print("Error: no device to run")
            exit(1)

        return target_socs


def get_data_and_device_type(runtime):
    data_type = ""
    device_type = ""

    if runtime == "dsp":
        data_type = "DT_UINT8"
        device_type = "HEXAGON"
    elif runtime == "gpu":
        data_type = "DT_HALF"
        device_type = "GPU"
    elif runtime == "cpu":
        data_type = "DT_FLOAT"
        device_type = "CPU"

    return data_type, device_type


def get_hexagon_mode(configs):
    runtime_list = []
    for model_name in configs["models"]:
        model_runtime = configs["models"][model_name]["runtime"]
        runtime_list.append(model_runtime.lower())

    global_runtime = ""
    if "dsp" in runtime_list:
        return True
    return False


def gen_opencl_and_tuning_code(target_abi,
                               serialno,
                               model_output_dirs,
                               pull_or_not):
    if pull_or_not:
        sh_commands.pull_binaries(target_abi, serialno, model_output_dirs)

    codegen_path = "mace/codegen"

    # generate opencl binary code
    sh_commands.gen_opencl_binary_code(model_output_dirs)

    sh_commands.gen_tuning_param_code(model_output_dirs)


def model_benchmark_stdout_processor(stdout,
                                     abi,
                                     serialno,
                                     model_name,
                                     runtime):
    metrics = [0] * 5
    for line in stdout.split('\n'):
        line = line.strip()
        parts = line.split()
        if len(parts) == 6 and parts[0].startswith("time"):
            metrics[0] = str(float(parts[1]))
            metrics[1] = str(float(parts[2]))
            metrics[2] = str(float(parts[3]))
            metrics[3] = str(float(parts[4]))
            metrics[4] = str(float(parts[5]))
            break

    device_name = ""
    target_soc = ""
    if abi != "host":
        props = sh_commands.adb_getprop_by_serialno(serialno)
        device_name = props.get("ro.product.model", "")
        target_soc = props.get("ro.board.platform", "")

    report_filename = FLAGS.output_dir + "/report.csv"
    if not os.path.exists(report_filename):
        with open(report_filename, 'w') as f:
            f.write("model_name,device_name,soc,abi,runtime,create_net,"
                    "engine_ctor,init,warmup,run_avg\n")

    data_str = "{model_name},{device_name},{soc},{abi},{runtime}," \
               "{create_net},{engine_ctor},{init},{warmup},{run_avg}\n" \
        .format(
            model_name=model_name,
            device_name=device_name,
            soc=target_soc,
            abi=abi,
            runtime=runtime,
            create_net=metrics[0],
            engine_ctor=metrics[1],
            init=metrics[2],
            warmup=metrics[3],
            run_avg=metrics[4]
        )
    with open(report_filename, 'a') as f:
        f.write(data_str)


def tuning_run(runtime,
               target_abi,
               serialno,
               vlog_level,
               embed_model_data,
               model_output_dir,
               input_nodes,
               output_nodes,
               input_shapes,
               output_shapes,
               model_name,
               device_type,
               running_round,
               restart_round,
               out_of_range_check,
               phone_data_dir,
               tuning=False,
               limit_opencl_kernel_time=0,
               omp_num_threads=-1,
               cpu_affinity_policy=1,
               gpu_perf_hint=3,
               gpu_priority_hint=3):
    stdout = sh_commands.tuning_run(
        target_abi,
        serialno,
        vlog_level,
        embed_model_data,
        model_output_dir,
        input_nodes,
        output_nodes,
        input_shapes,
        output_shapes,
        model_name,
        device_type,
        running_round,
        restart_round,
        limit_opencl_kernel_time,
        tuning,
        out_of_range_check,
        phone_data_dir,
        omp_num_threads,
        cpu_affinity_policy,
        gpu_perf_hint,
        gpu_priority_hint,
        valgrind=FLAGS.valgrind,
        valgrind_path=FLAGS.valgrind_path,
        valgrind_args=FLAGS.valgrind_args
    )

    if running_round > 0 and FLAGS.collect_report:
        model_benchmark_stdout_processor(
            stdout, target_abi, serialno, model_name, runtime)


def build_mace_run_prod(hexagon_mode, runtime, target_abi,
                        serialno, vlog_level, embed_model_data,
                        model_output_dir, input_nodes, output_nodes,
                        input_shapes, output_shapes, model_name, device_type,
                        running_round, restart_round, tuning,
                        limit_opencl_kernel_time, phone_data_dir,
                        enable_openmp):
    mace_run_target = "//mace/tools/validation:mace_run"
    strip = "always"
    debug = False
    if FLAGS.valgrind:
        strip = "never"
        debug = True

    if runtime == "gpu":
        gen_opencl_and_tuning_code(target_abi, serialno, [], False)
        sh_commands.bazel_build(
            mace_run_target,
            abi=target_abi,
            model_tag=model_name,
            production_mode=False,
            hexagon_mode=hexagon_mode,
            enable_openmp=enable_openmp
        )
        sh_commands.update_mace_run_lib(model_output_dir, target_abi,
                                        model_name, embed_model_data)

        tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data,
                   model_output_dir, input_nodes, output_nodes, input_shapes,
                   output_shapes, model_name, device_type, running_round=0,
                   restart_round=1, out_of_range_check=False,
                   phone_data_dir=phone_data_dir, tuning=tuning,
                   limit_opencl_kernel_time=limit_opencl_kernel_time)

        tuning_run(runtime, target_abi, serialno, vlog_level, embed_model_data,
                   model_output_dir, input_nodes, output_nodes, input_shapes,
                   output_shapes, model_name, device_type, running_round=0,
                   restart_round=1, out_of_range_check=True,
                   phone_data_dir=phone_data_dir, tuning=False)

        gen_opencl_and_tuning_code(target_abi, serialno, [model_output_dir],
                                   True)
        sh_commands.bazel_build(
            mace_run_target,
            strip,
            abi=target_abi,
            model_tag=model_name,
            production_mode=True,
            hexagon_mode=hexagon_mode,
            debug=debug,
            enable_openmp=enable_openmp
        )
        sh_commands.update_mace_run_lib(model_output_dir, target_abi,
                                        model_name, embed_model_data)
    else:
        gen_opencl_and_tuning_code(target_abi, serialno, [], False)
        sh_commands.bazel_build(
            mace_run_target,
            strip,
            abi=target_abi,
            model_tag=model_name,
            production_mode=True,
            hexagon_mode=hexagon_mode,
            debug=debug,
            enable_openmp=enable_openmp
        )
        sh_commands.update_mace_run_lib(model_output_dir, target_abi,
                                        model_name, embed_model_data)


def merge_libs_and_tuning_results(target_soc,
                                  target_abi,
                                  serialno,
                                  project_name,
                                  output_dir,
                                  model_output_dirs,
                                  hexagon_mode,
                                  embed_model_data):
    gen_opencl_and_tuning_code(
            target_abi, serialno, model_output_dirs, False)
    sh_commands.build_production_code(target_abi)

    sh_commands.merge_libs(target_soc,
                           target_abi,
                           project_name,
                           output_dir,
                           model_output_dirs,
                           hexagon_mode,
                           embed_model_data)


def get_model_files(model_file_path,
                    model_output_dir,
                    weight_file_path=""):
    model_file = ""
    weight_file = ""
    if model_file_path.startswith("http://") or \
            model_file_path.startswith("https://"):
        model_file = model_output_dir + "/model.pb"
        urllib.urlretrieve(model_file_path, model_file)
    else:
        model_file = model_file_path

    if weight_file_path.startswith("http://") or \
            weight_file_path.startswith("https://"):
        weight_file = model_output_dir + "/model.caffemodel"
        urllib.urlretrieve(weight_file_path, weight_file)
    else:
        weight_file = weight_file_path

    return model_file, weight_file


def md5sum(str):
    md5 = hashlib.md5()
    md5.update(str)
    return md5.hexdigest()


################################
# Parsing arguments
################################
def str2bool(v):
    if v.lower() in ('yes', 'true', 't', 'y', '1'):
        return True
    elif v.lower() in ('no', 'false', 'f', 'n', '0'):
        return False
    else:
        raise argparse.ArgumentTypeError('Boolean value expected.')


def str_to_caffe_env_type(v):
    if v.lower() == 'docker':
        return common.CaffeEnvType.DOCKER
    elif v.lower() == 'local':
        return common.CaffeEnvType.LOCAL
    else:
        raise argparse.ArgumentTypeError('[docker | local] expected.')


def parse_model_configs():
    print("============== Load and Parse configs ==============")
    with open(FLAGS.config) as f:
        configs = yaml.load(f)
        target_abis = configs.get("target_abis", [])
        if not isinstance(target_abis, list) or not target_abis:
            print("CONFIG ERROR:")
            print("target_abis list is needed!")
            print("For example: 'target_abis: [armeabi-v7a, arm64-v8a]'")
            exit(1)

        embed_model_data = configs.get("embed_model_data", "")
        if embed_model_data == "" or not isinstance(embed_model_data, int) or \
                embed_model_data < 0 or embed_model_data > 1:
            print("CONFIG ERROR:")
            print("embed_model_data must be integer in range [0, 1]")
            exit(1)

        model_names = configs.get("models", "")
        if not model_names:
            print("CONFIG ERROR:")
            print("models attribute not found in config file")
            exit(1)

        for model_name in model_names:
            model_config = configs["models"][model_name]
            platform = model_config.get("platform", "")
            if platform == "" or platform not in ["tensorflow", "caffe"]:
                print("CONFIG ERROR:")
                print("'platform' must be 'tensorflow' or 'caffe'")
                exit(1)

            for key in ["model_file_path", "model_sha256_checksum",
                        "runtime"]:
                value = model_config.get(key, "")
                if value == "":
                    print("CONFIG ERROR:")
                    print("'%s' is necessary" % key)
                    exit(1)

            for key in ["input_nodes", "input_shapes", "output_nodes",
                        "output_shapes"]:
                value = model_config.get(key, "")
                if value == "":
                    print("CONFIG ERROR:")
                    print("'%s' is necessary" % key)
                    exit(1)
                if not isinstance(value, list):
                    model_config[key] = [value]

            for key in ["limit_opencl_kernel_time", "dsp_mode", "obfuscate",
                        "fast_conv"]:
                value = model_config.get(key, "")
                if value == "":
                    model_config[key] = 0
                    print("'%s' for %s is set to default value: 0" %
                          (key, model_name))

            validation_inputs_data = model_config.get("validation_inputs_data",
                                                      [])
            model_config["validation_inputs_data"] = validation_inputs_data
            if not isinstance(validation_inputs_data, list):
                model_config["validation_inputs_data"] = [
                        validation_inputs_data]

            weight_file_path = model_config.get("weight_file_path", "")
            model_config["weight_file_path"] = weight_file_path

        print("Parse model configs successfully!\n")
        return configs


def parse_args():
    """Parses command line arguments."""
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--config",
        type=str,
        default="./tool/config",
        required=True,
        help="The global config file of models.")
    parser.add_argument(
        "--output_dir", type=str, default="build", help="The output dir.")
    parser.add_argument(
        "--round", type=int, default=1, help="The model running round.")
    parser.add_argument(
        "--run_seconds",
        type=int,
        default=10,
        help="The model throughput test running seconds.")
    parser.add_argument(
        "--restart_round",
        type=int,
        default=1,
        help="The model restart round.")
    parser.add_argument(
        "--tuning",
        type=str2bool,
        default=True,
        help="Tune opencl params.")
    parser.add_argument(
        "--mode",
        type=str,
        default="all",
        help="[build|run|validate|benchmark|merge|all|throughput_test].")
    parser.add_argument(
        "--target_socs",
        type=str,
        default="all",
        help="SoCs to build, comma seperated list (getprop ro.board.platform)")
    parser.add_argument(
        "--out_of_range_check",
        type=str2bool,
        default=False,
        help="Enable out of range check for opencl.")
    parser.add_argument(
        "--enable_openmp",
        type=str2bool,
        default=True,
        help="Enable openmp.")
    parser.add_argument(
        "--omp_num_threads",
        type=int,
        default=-1,
        help="num of openmp threads")
    parser.add_argument(
        "--cpu_affinity_policy",
        type=int,
        default=1,
        help="0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY")
    parser.add_argument(
        "--gpu_perf_hint",
        type=int,
        default=3,
        help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
    parser.add_argument(
        "--gpu_priority_hint",
        type=int,
        default=3,
        help="0:DEFAULT/1:LOW/2:NORMAL/3:HIGH")
    parser.add_argument(
        "--collect_report",
        type=str2bool,
        default=False,
        help="Collect report.")
    parser.add_argument(
        "--vlog_level",
        type=int,
        default=0,
        help="VLOG level.")
    parser.add_argument(
        "--caffe_env",
        type=str_to_caffe_env_type,
        default='docker',
        help="[docker | local] caffe environment.")
    parser.add_argument(
        "--valgrind",
        type=bool,
        default=False,
        help="Whether to use valgrind to check memory error.")
    parser.add_argument(
        "--valgrind_path",
        type=str,
        default="/data/local/tmp/valgrind",
        help="Valgrind install path.")
    parser.add_argument(
        "--valgrind_args",
        type=str,
        default="",
        help="Valgrind command args.")
    return parser.parse_known_args()


def process_models(project_name, configs, embed_model_data, vlog_level,
                   target_abi, phone_data_dir, target_soc="", serialno=""):
    hexagon_mode = get_hexagon_mode(configs)
    model_output_dirs = []
    for model_name in configs["models"]:
        print '===================', model_name, '==================='
        model_config = configs["models"][model_name]
        input_file_list = model_config["validation_inputs_data"]
        data_type, device_type = get_data_and_device_type(
                model_config["runtime"])

        # Create model build directory
        model_path_digest = md5sum(model_config["model_file_path"])

        if target_abi == "host":
            model_output_dir = "%s/%s/%s/%s/%s/%s" % (
                FLAGS.output_dir, project_name, "build",
                model_name, model_path_digest, target_abi)
        else:
            device_name = sh_commands.adb_get_device_name_by_serialno(serialno)
            model_output_dir = "%s/%s/%s/%s/%s/%s_%s/%s" % (
                FLAGS.output_dir, project_name, "build",
                model_name, model_path_digest, device_name.replace(' ', ''),
                target_soc, target_abi)
        model_output_dirs.append(model_output_dir)

        if FLAGS.mode == "build" or FLAGS.mode == "all":
            if os.path.exists(model_output_dir):
                sh.rm("-rf", model_output_dir)
            os.makedirs(model_output_dir)

        if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
                FLAGS.mode == "all":
            sh_commands.clear_mace_run_data(
                    target_abi, serialno, phone_data_dir)

        model_file_path, weight_file_path = get_model_files(
                model_config["model_file_path"],
                model_output_dir,
                model_config["weight_file_path"])

        if FLAGS.mode == "build" or FLAGS.mode == "run" or \
                FLAGS.mode == "validate" or \
                FLAGS.mode == "benchmark" or FLAGS.mode == "all":
            sh_commands.gen_random_input(model_output_dir,
                                         model_config["input_nodes"],
                                         model_config["input_shapes"],
                                         input_file_list)

        if FLAGS.mode == "build" or FLAGS.mode == "benchmark" or \
                FLAGS.mode == "all":
            sh_commands.gen_model_code(
                    "mace/codegen/models/%s" % model_name,
                    model_config["platform"],
                    model_file_path,
                    weight_file_path,
                    model_config["model_sha256_checksum"],
                    ",".join(model_config["input_nodes"]),
                    ",".join(model_config["output_nodes"]),
                    data_type,
                    model_config["runtime"],
                    model_name,
                    ":".join(model_config["input_shapes"]),
                    model_config["dsp_mode"],
                    embed_model_data,
                    model_config["fast_conv"],
                    model_config["obfuscate"])

        if FLAGS.mode == "build" or FLAGS.mode == "all":
            build_mace_run_prod(hexagon_mode,
                                model_config["runtime"],
                                target_abi,
                                serialno,
                                vlog_level,
                                embed_model_data,
                                model_output_dir,
                                model_config["input_nodes"],
                                model_config["output_nodes"],
                                model_config["input_shapes"],
                                model_config["output_shapes"],
                                model_name,
                                device_type,
                                FLAGS.round,
                                FLAGS.restart_round,
                                FLAGS.tuning,
                                model_config["limit_opencl_kernel_time"],
                                phone_data_dir,
                                FLAGS.enable_openmp)

        if FLAGS.mode == "run" or FLAGS.mode == "validate" or \
                FLAGS.mode == "all":
            tuning_run(model_config["runtime"],
                       target_abi,
                       serialno,
                       vlog_level,
                       embed_model_data,
                       model_output_dir,
                       model_config["input_nodes"],
                       model_config["output_nodes"],
                       model_config["input_shapes"],
                       model_config["output_shapes"],
                       model_name,
                       device_type,
                       FLAGS.round,
                       FLAGS.restart_round,
                       FLAGS.out_of_range_check,
                       phone_data_dir,
                       omp_num_threads=FLAGS.omp_num_threads,
                       cpu_affinity_policy=FLAGS.cpu_affinity_policy,
                       gpu_perf_hint=FLAGS.gpu_perf_hint,
                       gpu_priority_hint=FLAGS.gpu_priority_hint)

        if FLAGS.mode == "benchmark":
            gen_opencl_and_tuning_code(
                    target_abi, serialno, [model_output_dir], False)
            sh_commands.benchmark_model(target_abi,
                                        serialno,
                                        vlog_level,
                                        embed_model_data,
                                        model_output_dir,
                                        model_config["input_nodes"],
                                        model_config["output_nodes"],
                                        model_config["input_shapes"],
                                        model_config["output_shapes"],
                                        model_name,
                                        device_type,
                                        hexagon_mode,
                                        phone_data_dir,
                                        FLAGS.omp_num_threads,
                                        FLAGS.cpu_affinity_policy,
                                        FLAGS.gpu_perf_hint,
                                        FLAGS.gpu_priority_hint)

        if FLAGS.mode == "validate" or FLAGS.mode == "all":
            sh_commands.validate_model(target_abi,
                                       serialno,
                                       model_file_path,
                                       weight_file_path,
                                       model_config["platform"],
                                       model_config["runtime"],
                                       model_config["input_nodes"],
                                       model_config["output_nodes"],
                                       model_config["input_shapes"],
                                       model_config["output_shapes"],
                                       model_output_dir,
                                       phone_data_dir,
                                       FLAGS.caffe_env)

    if FLAGS.mode == "build" or FLAGS.mode == "merge" or \
            FLAGS.mode == "all":
        merge_libs_and_tuning_results(
            target_soc,
            target_abi,
            serialno,
            project_name,
            FLAGS.output_dir,
            model_output_dirs,
            hexagon_mode,
            embed_model_data)

    if FLAGS.mode == "throughput_test":
        merged_lib_file = FLAGS.output_dir + \
                "/%s/%s/libmace_%s.%s.a" % \
                (project_name, target_abi, project_name, target_soc)
        first_model = configs["models"].values()[0]
        throughput_test_output_dir = "%s/%s/%s/%s" % (
                FLAGS.output_dir, project_name, "build",
                "throughput_test")
        if os.path.exists(throughput_test_output_dir):
            sh.rm("-rf", throughput_test_output_dir)
        os.makedirs(throughput_test_output_dir)
        input_file_list = model_config["validation_inputs_data"]
        sh_commands.gen_random_input(throughput_test_output_dir,
                                     first_model["input_nodes"],
                                     first_model["input_shapes"],
                                     input_file_list)
        model_tag_dict = {}
        for model_name in configs["models"]:
            runtime = configs["models"][model_name]["runtime"]
            model_tag_dict[runtime] = model_name
        sh_commands.build_run_throughput_test(target_abi,
                                              serialno,
                                              vlog_level,
                                              FLAGS.run_seconds,
                                              merged_lib_file,
                                              throughput_test_output_dir,
                                              embed_model_data,
                                              model_config["input_nodes"],
                                              model_config["output_nodes"],
                                              model_config["input_shapes"],
                                              model_config["output_shapes"],
                                              model_tag_dict.get("cpu", ""),
                                              model_tag_dict.get("gpu", ""),
                                              model_tag_dict.get("dsp", ""),
                                              phone_data_dir)


def main(unused_args):
    common.init_logging()
    configs = parse_model_configs()

    if FLAGS.mode == "validate":
        FLAGS.round = 1
        FLAGS.restart_round = 1

    project_name = os.path.splitext(os.path.basename(FLAGS.config))[0]
    if FLAGS.mode == "build" or FLAGS.mode == "all":
        # Remove previous output dirs
        if not os.path.exists(FLAGS.output_dir):
            os.makedirs(FLAGS.output_dir)
        elif os.path.exists(os.path.join(FLAGS.output_dir, "libmace")):
            sh.rm("-rf", os.path.join(FLAGS.output_dir, project_name))
            os.makedirs(os.path.join(FLAGS.output_dir, project_name))

        # generate source
        sh_commands.gen_mace_version()
        sh_commands.gen_encrypted_opencl_source()

    target_socs = get_target_socs(configs)

    embed_model_data = configs["embed_model_data"]
    vlog_level = FLAGS.vlog_level
    phone_data_dir = "/data/local/tmp/mace_run/"
    for target_abi in configs["target_abis"]:
        for target_soc in target_socs:
            if target_abi != 'host':
                serialnos = sh_commands.get_target_socs_serialnos([target_soc])
                for serialno in serialnos:
                    props = sh_commands.adb_getprop_by_serialno(serialno)
                    print(
                        "===================================================="
                    )
                    print("Trying to lock device %s" % serialno)
                    with sh_commands.device_lock(serialno):
                        print("Run on device: %s, %s, %s" % (
                            serialno, props["ro.board.platform"],
                              props["ro.product.model"]))
                        process_models(project_name, configs, embed_model_data,
                                       vlog_level, target_abi, phone_data_dir,
                                       target_soc, serialno)
            else:
                print("====================================================")
                print("Run on host")
                process_models(project_name, configs, embed_model_data,
                               vlog_level, target_abi, phone_data_dir)

    if FLAGS.mode == "build" or FLAGS.mode == "all":
        sh_commands.packaging_lib(FLAGS.output_dir, project_name)


if __name__ == "__main__":
    FLAGS, unparsed = parse_args()
    main(unused_args=[sys.argv[0]] + unparsed)