From 3078d7e02c53c81aff4c4c56bfd49329db24387d Mon Sep 17 00:00:00 2001 From: liutuo Date: Tue, 3 Jul 2018 17:44:13 +0800 Subject: [PATCH] update basic usage doc --- docs/user_guide/basic_usage.rst | 315 +++++++++++++++++++- docs/user_guide/models/demo_app_models.yaml | 39 --- 2 files changed, 314 insertions(+), 40 deletions(-) delete mode 100644 docs/user_guide/models/demo_app_models.yaml diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index d9b60a87..1ce17469 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -1,3 +1,316 @@ Basic usage -============ +============= + +Build and run an example model +-------------------------------- + +Make sure the environment has been set up correctly already.(refer to `installation`) + +Pull the mace model zoo project. + +.. code:: sh + + git clone https://github.com/XiaoMi/mace-models.git + +Here we use the provided mobilenet-v2 model in mace model zoo as an example. +Plug an android phone into your pc and enable Developer Mode of the phone. + +.. code:: sh + + cd /path/to/mace + python tools/converter.py build --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml + +Validate and benchmark the model. + +.. code:: sh + + # Validate the model. + python tools/converter.py run --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml --validate + # Benchmark + python tools/converter.py benchmark --config=/path/to/mace-models/mobilenet-v2/mobilenet-v2.yml + +.. note:: + + 1. If you want to build and run the model on pc, just use the mobilenet-v2-host.yml file instead. + + +Build your own model +---------------------------- +================================== +1. Prepare your model +================================== + +Mace now supports models from tensorflow and caffe. + +- TensorFlow + + Prepare your tensorflow model.pb file. + + Use `Graph Transform Tool `__ + to optimize you model for inference. + This tool will improve the efficiency of inference by making several optimizations like operations + folding, redundant node removal etc. We strongly recommend to use it before building. + + The following command shows how to use it for CPU/GPU, + + .. code:: bash + + # CPU/GPU: + ./transform_graph \ + --in_graph=tf_model.pb \ + --out_graph=tf_model_opt.pb \ + --inputs='input' \ + --outputs='output' \ + --transforms='strip_unused_nodes(type=float, shape="1,64,64,3") + strip_unused_nodes(type=float, shape="1,64,64,3") + remove_nodes(op=Identity, op=CheckNumerics) + fold_constants(ignore_errors=true) + flatten_atrous_conv + fold_batch_norms + fold_old_batch_norms + strip_unused_nodes + sort_by_execution_order' + +- Caffe + + MACE converter only supports Caffe 1.0+, you need to upgrade + your models with Caffe built-in tool if necessary, + + .. code:: bash + + # Upgrade prototxt + $CAFFE_ROOT/build/tools/upgrade_net_proto_text MODEL.prototxt MODEL.new.prototxt + + # Upgrade caffemodel + $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel + +============================================ +2. Create a deployment file for your model +============================================ + +The followings are basic usage example deployment files for Tensorflow and Caffe models. +Modify one of them for your own case. + +- Tensorflow + + .. literalinclude:: models/demo_app_models_tf.yml + :language: yaml + +- Caffe + + .. literalinclude:: models/demo_app_models_caffe.yml + :language: yaml + +More details about model deployment file, refer to `advanced_usage`. + + +====================================== +3. Build a library for your model +====================================== + +MACE provides a python tool (``tools/converter.py``) for +model conversion, compiling, test run, benchmark and correctness validation. + +MACE can build either static or shared link library (which is +specified by ``linkshared`` in YAML model deployment file). + +**Commands** + + * **build** + + build library. + + .. code:: sh + + cd path/to/mace + # Build library + python tools/converter.py build --config=path/to/your/model_deployment_file.yml + + * **run** + + run the model. + + .. code:: sh + + # Test model run time + python tools/converter.py run --config=path/to/your/model_deployment_file.yml --round=100 + + # Validate the correctness by comparing the results against the + # original model and framework, measured with cosine distance for similarity. + python tools/converter.py run --config=path/to/your/model_deployment_file.yml --validate + + * **benchmark** + + benchmark and profile the model. + + .. code:: sh + + # Benchmark model, get detailed statistics of each Op. + python tools/converter.py benchmark --config=path/to/your/model_deployment_file.yml + + .. warning:: + + 1. Plug an android phone into your pc and enable Developer Mode before building. + 2. Please ``build`` your model before ``run`` or ``benchmark`` it. + + +============================================ +4. Deploy generated library in your project +============================================ + +``build`` command will generate the static/shared library, model files and +header files. All of these files have been packaged into +``path/to/mace/build/${library_name}/libmace_${library_name}.tar.gz``. + +``${library_name}`` is the name you defined in the first line of your demployment yaml file. + +- The generated ``static`` library files are organized as follows, + +.. code:: + + build/ + └── mobilenet-v2-gpu + ├── include + │   └── mace + │   └── public + │   ├── mace.h + │   └── mace_runtime.h + ├── libmace_mobilenet-v2-gpu.tar.gz + ├── lib + │   ├── arm64-v8a + │   │   └── libmace_mobilenet-v2-gpu.MI6.msm8998.a + │   └── armeabi-v7a + │   └── libmace_mobilenet-v2-gpu.MI6.msm8998.a + ├── model + │   ├── mobilenet_v2.data + │   └── mobilenet_v2.pb + └── opencl + ├── arm64-v8a + │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + └── armeabi-v7a + └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + +- The generated ``shared`` library files are organized as follows, + +.. code:: + + build + └── mobilenet-v2-gpu + ├── include + │   └── mace + │   └── public + │   ├── mace.h + │   └── mace_runtime.h + ├── lib + │   ├── arm64-v8a + │   │   ├── libgnustl_shared.so + │   │   └── libmace.so + │   └── armeabi-v7a + │   ├── libgnustl_shared.so + │   └── libmace.so + ├── model + │   ├── mobilenet_v2.data + │   └── mobilenet_v2.pb + └── opencl + ├── arm64-v8a + │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + └── armeabi-v7a + └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin + +.. note:: + + 1. ``${MODEL_TAG}.pb`` file will be generated only when ``build_type`` is ``proto``. + 2. ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` will + be generated only when ``target_socs`` and ``gpu`` runtime are specified. + 3. Generated shared library depends on ``libgnustl_shared.so``. + +.. warning:: + + ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` depends + on the OpenCL version of the device, you should maintan the compatibility or + configure compiling cache store with ``ConfigKVStorageFactory``. + + +Unpack the generated libmace_${library_name}.tar.gz file and copy all of the uncompressed files into your project. +Please refer to \ ``mace/examples/example.cc``\ for full usage. The following lists the key steps. + +.. code:: cpp + + // Include the headers + #include "mace/public/mace.h" + #include "mace/public/mace_runtime.h" + // If the build_type is code + #include "mace/public/mace_engine_factory.h" + + // 0. Set pre-compiled OpenCL binary program file paths when available + if (device_type == DeviceType::GPU) { + mace::SetOpenCLBinaryPaths(opencl_binary_paths); + } + + // 1. Set compiled OpenCL kernel cache to reduce the + // initialization time. + const std::string file_path ="path/to/opencl_cache_file"; + std::shared_ptr storage_factory( + new FileStorageFactory(file_path)); + ConfigKVStorageFactory(storage_factory); + + // 2. Declare the device type (must be same with ``runtime`` in deployment file) + DeviceType device_type = DeviceType::GPU; + + // 3. Define the input and output tensor names. + std::vector input_names = {...}; + std::vector output_names = {...}; + + // 4. Create MaceEngine instance + std::shared_ptr engine; + MaceStatus create_engine_status; + // If the build_type is code, create Engine from compiled code + create_engine_status = + CreateMaceEngineFromCode(model_name.c_str(), + nullptr, + input_names, + output_names, + device_type, + &engine); + // If the build_type is proto, Create Engine from model file + create_engine_status = + CreateMaceEngineFromProto(model_pb_data, + model_data_file.c_str(), + input_names, + output_names, + device_type, + &engine); + if (create_engine_status != MaceStatus::MACE_SUCCESS) { + // Report error + } + + // 5. Create Input and Output tensor buffers + std::map inputs; + std::map outputs; + for (size_t i = 0; i < input_count; ++i) { + // Allocate input and output + int64_t input_size = + std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1, + std::multiplies()); + auto buffer_in = std::shared_ptr(new float[input_size], + std::default_delete()); + // Load input here + // ... + + inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in); + } + + for (size_t i = 0; i < output_count; ++i) { + int64_t output_size = + std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, + std::multiplies()); + auto buffer_out = std::shared_ptr(new float[output_size], + std::default_delete()); + outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); + } + + // 6. Run the model + MaceStatus status = engine.Run(inputs, &outputs); + +More details in `advanced_usage`. \ No newline at end of file diff --git a/docs/user_guide/models/demo_app_models.yaml b/docs/user_guide/models/demo_app_models.yaml deleted file mode 100644 index df130c4b..00000000 --- a/docs/user_guide/models/demo_app_models.yaml +++ /dev/null @@ -1,39 +0,0 @@ -# The name of library -library_name: mobilenet -target_abis: [arm64-v8a] -embed_model_data: 1 -# The build mode for model(s). -# 'code' stand for transfer model(s) into cpp code, 'proto' for model(s) in protobuf file(s). -build_type: code -linkshared: 0 -# One yaml config file can contain multi models' config message. -models: - mobilenet_v1: # model tag, which will be used in model loading and must be specific. - platform: tensorflow - # support local path, http:// and https:// - model_file_path: https://cnbj1.fds.api.xiaomi.com/mace/miai-models/mobilenet-v1/mobilenet-v1-1.0.pb - model_sha256_checksum: 71b10f540ece33c49a7b51f5d4095fc9bd78ce46ebf0300487b2ee23d71294e6 - subgraphs: - - input_tensors: input - input_shapes: 1,224,224,3 - output_tensors: MobilenetV1/Predictions/Reshape_1 - output_shapes: 1,1001 - runtime: cpu+gpu - limit_opencl_kernel_time: 0 - nnlib_graph_mode: 0 - obfuscate: 0 - winograd: 0 - mobilenet_v2: - platform: tensorflow - model_file_path: https://cnbj1.fds.api.xiaomi.com/mace/miai-models/mobilenet-v2/mobilenet-v2-1.0.pb - model_sha256_checksum: 369f9a5f38f3c15b4311c1c84c032ce868da9f371b5f78c13d3ea3c537389bb4 - subgraphs: - - input_tensors: input - input_shapes: 1,224,224,3 - output_tensors: MobilenetV2/Predictions/Reshape_1 - output_shapes: 1,1001 - runtime: cpu+gpu - limit_opencl_kernel_time: 0 - nnlib_graph_mode: 0 - obfuscate: 0 - winograd: 0 -- GitLab