diff --git a/sdk/c-opr-loaders/mace/README.md b/sdk/c-opr-loaders/mace/README.md index 60ab8e5a11d16b95e603f1ebea3d53520007bcc5..dd215ffe2dbc445f9e3ba389302cf1f4388fed4e 100644 --- a/sdk/c-opr-loaders/mace/README.md +++ b/sdk/c-opr-loaders/mace/README.md @@ -45,10 +45,9 @@ First of all, send all files to the executed device: - load_and_run - resnet_50.mdl - libmace_loader.so -- opencl library(something like libOpenCL.so, libmali.so or libEGL.so ...) if you want to run it on GPU ``` -MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_PATH=/path/to/opencl MGB_MACE_LOADER_FORMAT=NCHW /path/to/load_and_run /path/to/resnet_50.mdl --c-opr-lib /path/to/libmace_loader.so +MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_CACHE_PATH=/path/to/opencl MGB_MACE_LOADER_FORMAT=NCHW /path/to/load_and_run /path/to/resnet_50.mdl --c-opr-lib /path/to/libmace_loader.so ``` RUNTIME candidates: @@ -56,10 +55,26 @@ RUNTIME candidates: - CPU - GPU -Running with GPU runtime on android needs opencl library, one can set `MGB_MACE_OPENCL_PATH` by using environment variable +`MGB_MACE_OPENCL_CACHE_PATH` is the directory path where OpenCL binary cache writes to (the cache file name is always `mace_cl_compiled_program.bin`), if the cache file does not exist then it will be created. We mainly use NCHW data format, if you have NHWC model, use environment `MGB_MACE_LOADER_FORMAT=NHWC` For CPU runtime, default running thread is 1, could be specified with `MGB_MACE_NR_THREADS=n` if you want to run with HEXAGON runtime, more efforts should be made, please check [here](https://mace.readthedocs.io/en/latest/faq.html#why-is-mace-not-working-on-dsp). + +### Tuning on specific OpenCL device + +MACE supports tuning on specific SoC to optimize the performace on GPU, see [doc](https://mace.readthedocs.io/en/latest/user_guide/advanced_usage.html#tuning-for-specific-soc-s-gpu). + +To enable this feature, use `MGB_MACE_TUNING_PARAM_PATH` env to give the path to the tuning param file. + +To generate the tunig param file, give `MACE_TUNING=1` env and set the `MACE_RUN_PARAMETER_PATH` to the file name you want. + + ```bash + # search for tuning param + MACE_TUNING=1 MACE_RUN_PARAMETER_PATH=opencl/vgg16.tune_param MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_PATH=opencl MGB_MACE_LOADER_FORMAT=NCHW ./load_and_run mace/vgg16.mdl --c-opr-lib libmace_loader.so --input 4d.npy + + # then run test using the param + MGB_MACE_TUNING_PARAM_PATH=opencl/vgg16.tune_param MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_PATH=opencl MGB_MACE_LOADER_FORMAT=NCHW ./load_and_run mace/vgg16.mdl --c-opr-lib libmace_loader.so --input 4d.npy + ``` \ No newline at end of file diff --git a/sdk/c-opr-loaders/mace/dump_model.py b/sdk/c-opr-loaders/mace/dump_model.py index 21e41022694c4fdf8a0c680f24f108fed038c97b..f61ab83219a4481d6816a19dfc4ca883513c9da7 100644 --- a/sdk/c-opr-loaders/mace/dump_model.py +++ b/sdk/c-opr-loaders/mace/dump_model.py @@ -100,8 +100,7 @@ def main(): inputs = [ np.random.random(isizes[i]).astype(np.float32) for i in range(len(isizes)) ] - - inference.trace(inputs) + inference.trace(*inputs) inference.dump(args.output) diff --git a/sdk/c-opr-loaders/mace/mace_loader.cpp b/sdk/c-opr-loaders/mace/mace_loader.cpp index 97bd88303f31c33554e4ff2e6c1a283218b457e7..e5b35673ba1505c56ff363715929db583e4dea8c 100644 --- a/sdk/c-opr-loaders/mace/mace_loader.cpp +++ b/sdk/c-opr-loaders/mace/mace_loader.cpp @@ -129,7 +129,7 @@ class MGBOprDescImpl { std::map mace_outputs; auto mace_data_format = mace::DataFormat::NCHW; - char *data_format = getenv("MGB_MACE_LOADER_FROAMT"); + char *data_format = getenv("MGB_MACE_LOADER_FORMAT"); if (data_format != nullptr && !strcmp(data_format, "NHWC")) { mace_data_format = mace::DataFormat::NHWC; } @@ -206,22 +206,19 @@ public: if (device_type == mace::DeviceType::GPU) { std::shared_ptr gpu_context; - char *opencl_path = getenv("MGB_MACE_OPENCL_PATH"); + char *cache_path = getenv("MGB_MACE_OPENCL_CACHE_PATH"); + ASSERT(cache_path, "there must be an opencl cache file path"); - // check default opencl paths - if (opencl_path == nullptr) { - for (size_t i = 0; i < (sizeof(default_so_paths) / sizeof(char*)); i++) { - if (file_exists(default_so_paths[i])) { - opencl_path = const_cast(default_so_paths[i]); - break; - } - } + char *param_path = getenv("MGB_MACE_TUNING_PARAM_PATH"); + std::string opencl_param_path(""); + if (param_path != nullptr) { + opencl_param_path = std::string(param_path); } - ASSERT(opencl_path, "Please set opencl library path"); - std::string storage_path(opencl_path); + std::string storage_path(cache_path); gpu_context = mace::GPUContextBuilder() .SetStoragePath(storage_path) + .SetOpenCLParameterPath(opencl_param_path) .Finalize(); config.SetGPUContext(gpu_context);