fix(sdk/c-opr-loaders): fix mace dump model and env params and add tuning

GitOrigin-RevId: 5e9a045410df19bbe7d3425c7f696180570a9d7d

fix(sdk/c-opr-loaders): fix mace dump model and env params and add tuning
GitOrigin-RevId: 5e9a045410df19bbe7d3425c7f696180570a9d7d
a9e7a670 · Megvii Engine Team · c1fb3c8c · a9e7a670 · a9e7a670 · a9e7a670
3 changed file
--- a/sdk/c-opr-loaders/mace/README.md
+++ b/sdk/c-opr-loaders/mace/README.md
@@ -45,10 +45,9 @@ First of all, send all files to the executed device:
 - load_and_run
 - resnet_50.mdl
 - libmace_loader.so
- opencl library(something like libOpenCL.so, libmali.so or libEGL.so ...) if you want to run it on GPU

 ```
-MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_PATH=/path/to/opencl MGB_MACE_LOADER_FORMAT=NCHW /path/to/load_and_run /path/to/resnet_50.mdl --c-opr-lib /path/to/libmace_loader.so
+MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_CACHE_PATH=/path/to/opencl MGB_MACE_LOADER_FORMAT=NCHW /path/to/load_and_run /path/to/resnet_50.mdl --c-opr-lib /path/to/libmace_loader.so
 ```

 RUNTIME candidates:
@@ -56,10 +55,26 @@ RUNTIME candidates:
 - CPU
 - GPU

-Running with GPU runtime on android needs opencl library, one can set `MGB_MACE_OPENCL_PATH` by using environment variable
+`MGB_MACE_OPENCL_CACHE_PATH` is the directory path where OpenCL binary cache writes to (the cache file name is always `mace_cl_compiled_program.bin`), if the cache file does not exist then it will be created.

 We mainly use NCHW data format, if you have NHWC model, use environment `MGB_MACE_LOADER_FORMAT=NHWC`

 For CPU runtime, default running thread is 1, could be specified with `MGB_MACE_NR_THREADS=n`

 if you want to run with HEXAGON runtime, more efforts should be made, please check [here](https://mace.readthedocs.io/en/latest/faq.html#why-is-mace-not-working-on-dsp).
+
+### Tuning on specific OpenCL device
+
+MACE supports tuning on specific SoC to optimize the performace on GPU, see [doc](https://mace.readthedocs.io/en/latest/user_guide/advanced_usage.html#tuning-for-specific-soc-s-gpu).
+
+To enable this feature, use `MGB_MACE_TUNING_PARAM_PATH` env to give the path to the tuning param file.
+
+To generate the tunig param file, give `MACE_TUNING=1` env and set the `MACE_RUN_PARAMETER_PATH` to the file name you want.
+
+ ```bash
+ # search for tuning param
+ MACE_TUNING=1 MACE_RUN_PARAMETER_PATH=opencl/vgg16.tune_param MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_PATH=opencl MGB_MACE_LOADER_FORMAT=NCHW ./load_and_run mace/vgg16.mdl --c-opr-lib libmace_loader.so --input 4d.npy
+
+ # then run test using the param
+ MGB_MACE_TUNING_PARAM_PATH=opencl/vgg16.tune_param MGB_MACE_RUNTIME=GPU MGB_MACE_OPENCL_PATH=opencl MGB_MACE_LOADER_FORMAT=NCHW ./load_and_run mace/vgg16.mdl --c-opr-lib libmace_loader.so --input 4d.npy
+ ```
\ No newline at end of file
--- a/sdk/c-opr-loaders/mace/dump_model.py
+++ b/sdk/c-opr-loaders/mace/dump_model.py
@@ -100,8 +100,7 @@ def main():
        inputs = [
            np.random.random(isizes[i]).astype(np.float32) for i in range(len(isizes))
        ]
-
-        inference.trace(inputs)
+        inference.trace(*inputs)
        inference.dump(args.output)



--- a/sdk/c-opr-loaders/mace/mace_loader.cpp
+++ b/sdk/c-opr-loaders/mace/mace_loader.cpp
@@ -129,7 +129,7 @@ class MGBOprDescImpl {
        std::map<std::string, mace::MaceTensor> mace_outputs;

        auto mace_data_format = mace::DataFormat::NCHW;
-        char *data_format = getenv("MGB_MACE_LOADER_FROAMT");
+        char *data_format = getenv("MGB_MACE_LOADER_FORMAT");
        if (data_format != nullptr && !strcmp(data_format, "NHWC")) {
            mace_data_format = mace::DataFormat::NHWC;
        }
@@ -206,22 +206,19 @@ public:
        if (device_type == mace::DeviceType::GPU) {
            std::shared_ptr<mace::GPUContext> gpu_context;

-            char *opencl_path = getenv("MGB_MACE_OPENCL_PATH");
+            char *cache_path = getenv("MGB_MACE_OPENCL_CACHE_PATH");
+            ASSERT(cache_path, "there must be an opencl cache file path");

-            // check default opencl paths
-            if (opencl_path == nullptr) {
-                for (size_t i = 0; i < (sizeof(default_so_paths) / sizeof(char*)); i++) {
-                    if (file_exists(default_so_paths[i])) {
-                        opencl_path = const_cast<char *>(default_so_paths[i]);
-                        break;
-                    }
-                }
+            char *param_path = getenv("MGB_MACE_TUNING_PARAM_PATH");
+            std::string opencl_param_path("");
+            if (param_path != nullptr) {
+                opencl_param_path = std::string(param_path);
            }

-            ASSERT(opencl_path, "Please set opencl library path");
-            std::string storage_path(opencl_path);
+            std::string storage_path(cache_path);
            gpu_context = mace::GPUContextBuilder()
                            .SetStoragePath(storage_path)
+                            .SetOpenCLParameterPath(opencl_param_path)
                            .Finalize();

            config.SetGPUContext(gpu_context);