From 7cd78d1e81bb3ae2ad5e1d367070fd13311ad710 Mon Sep 17 00:00:00 2001 From: yejianwu Date: Wed, 4 Jul 2018 19:30:43 +0800 Subject: [PATCH] refactor dockerfiles and update installation docs --- docs/index.rst | 9 + docs/installation/env_requirement.rst | 68 ++++ docs/installation/manual_setup.rst | 68 ++++ docs/installation/using_docker.rst | 49 +++ docs/user_guide/how_to_build.rst | 486 -------------------------- 5 files changed, 194 insertions(+), 486 deletions(-) create mode 100644 docs/installation/env_requirement.rst create mode 100644 docs/installation/manual_setup.rst create mode 100644 docs/installation/using_docker.rst delete mode 100644 docs/user_guide/how_to_build.rst diff --git a/docs/index.rst b/docs/index.rst index a2529f05..b293ce45 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -11,6 +11,15 @@ The main documentation is organized into the following sections: introduction +.. toctree:: + :maxdepth: 1 + :caption: Installation + :name: sec-install + + installation/env_requirement + installation/using_docker + installation/manual_setup + .. toctree:: :maxdepth: 1 :caption: User guide diff --git a/docs/installation/env_requirement.rst b/docs/installation/env_requirement.rst new file mode 100644 index 00000000..c0202de6 --- /dev/null +++ b/docs/installation/env_requirement.rst @@ -0,0 +1,68 @@ +Environment Requirement +======================= + +MACE requires the following dependencies: + +Necessary Dependencies: +----------------------- + +.. list-table:: + :widths: auto + :header-rows: 1 + :align: left + + * - software + - version + - install command + * - bazel + - >= 0.13.0 + - `bazel installation guide `__ + * - android-ndk + - r15c/r16b + - `NDK installation guide `__ or refers to the docker file + * - adb + - >= 1.0.32 + - apt-get install android-tools-adb + * - cmake + - >= 3.11.3 + - apt-get install cmake + * - numpy + - >= 1.14.0 + - pip install -I numpy==1.14.0 + * - scipy + - >= 1.0.0 + - pip install -I scipy==1.0.0 + * - jinja2 + - >= 2.10 + - pip install -I jinja2==2.10 + * - PyYaml + - >= 3.12.0 + - pip install -I pyyaml==3.12 + * - sh + - >= 1.12.14 + - pip install -I sh==1.12.14 + * - filelock + - >= 3.0.0 + - pip install -I filelock==3.0.0 + * - docker (for caffe) + - >= 17.09.0-ce + - `docker installation guide `__ + +.. note:: + + ``export ANDROID_NDK_HOME=/path/to/ndk`` to specify ANDROID_NDK_HOME + +Optional Dependencies: +--------------------- + +.. list-table:: + :widths: auto + :header-rows: 1 + :align: left + + * - software + - version + - install command + * - tensorflow + - >= 1.6.0 + - pip install -I tensorflow==1.6.0 (if you use tensorflow model) diff --git a/docs/installation/manual_setup.rst b/docs/installation/manual_setup.rst new file mode 100644 index 00000000..c6aa39d2 --- /dev/null +++ b/docs/installation/manual_setup.rst @@ -0,0 +1,68 @@ +Manual setup +============ + +The setup steps are based on ``Ubuntu``. And dependencies to install can refer to :doc:`env_requirement`. + +Install Necessary Dependencies +------------------------------ + +Install Bazel +~~~~~~~~~~~~~ + +Recommend bazel with version larger than ``0.13.0`` (Refer to `Bazel documentation `__). + +.. code:: sh + + export BAZEL_VERSION=0.13.1 + mkdir /bazel && \ + cd /bazel && \ + wget https://github.com/bazelbuild/bazel/releases/download/$BAZEL_VERSION/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ + chmod +x bazel-*.sh && \ + ./bazel-$BAZEL_VERSION-installer-linux-x86_64.sh && \ + cd / && \ + rm -f /bazel/bazel-$BAZEL_VERSION-installer-linux-x86_64.sh + +Install NDK +~~~~~~~~~~~ + +Recommend NDK with version r15c or r16 (Refer to `NDK installation guide `__). + +.. code:: sh + + # Download NDK r15c + cd /opt/ && \ + wget -q https://dl.google.com/android/repository/android-ndk-r15c-linux-x86_64.zip && \ + unzip -q android-ndk-r15c-linux-x86_64.zip && \ + rm -f android-ndk-r15c-linux-x86_64.zip + + export ANDROID_NDK_VERSION=r15c + export ANDROID_NDK=/opt/android-ndk-${ANDROID_NDK_VERSION} + export ANDROID_NDK_HOME=${ANDROID_NDK} + + # add to PATH + export PATH=${PATH}:${ANDROID_NDK_HOME} + +Install extra tools +~~~~~~~~~~~~~~~~~~~ + +.. code:: sh + + apt-get install -y --no-install-recommends \ + cmake \ + android-tools-adb + pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com setuptools + pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com \ + "numpy>=1.14.0" \ + scipy \ + jinja2 \ + pyyaml \ + sh==1.12.14 \ + pycodestyle==2.4.0 \ + filelock + +Install Optional Dependencies +----------------------------- + +.. code:: sh + + pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com tensorflow==1.8.0 diff --git a/docs/installation/using_docker.rst b/docs/installation/using_docker.rst new file mode 100644 index 00000000..b6f3ea49 --- /dev/null +++ b/docs/installation/using_docker.rst @@ -0,0 +1,49 @@ +Using docker +============ + +Pull or Build docker image +-------------------------- + +MACE provides docker images with necessary dependencies installed and also Dockerfiles for images building, +you can pull the existing ones directly or build them from the Dockerfiles. +In most cases, the ``lite edition`` image can satify developer's basic needs. + +.. note:: + It's highly recommended to pull built images. + +- ``lite edition`` docker image. + +.. code:: sh + + # Pull lite edition docker image + docker pull registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite + # Build lite edition docker image + docker build -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite ./docker/mace-dev-lite + +- ``full edition`` docker image (which contains multiple NDK versions and other dev tools). + +.. code:: sh + + # Pull full edition docker image + docker pull registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev + # Build full edition docker image + docker build -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev ./docker/mace-dev + +.. note:: + + We will show steps with lite edition later. + +Using the image +--------------- + +Create container with the following command + +.. code:: sh + + # Create a container named `mace-dev` + docker run -it --privileged -d --name mace-dev \ + -v /dev/bus/usb:/dev/bus/usb --net=host \ + -v /local/path:/container/path \ + registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite + # Execute an interactive bash shell on the container + docker exec -it mace-dev /bin/bash diff --git a/docs/user_guide/how_to_build.rst b/docs/user_guide/how_to_build.rst deleted file mode 100644 index 3bb6e2b9..00000000 --- a/docs/user_guide/how_to_build.rst +++ /dev/null @@ -1,486 +0,0 @@ -How to build -============ - -Supported Platforms -------------------- - -.. list-table:: - :widths: auto - :header-rows: 1 - :align: left - - * - Platform - - Explanation - * - TensorFlow - - >= 1.6.0. - * - Caffe - - >= 1.0. - -Environment Requirement -------------------------- - -MACE requires the following dependencies: - -.. list-table:: - :widths: auto - :header-rows: 1 - :align: left - - * - software - - version - - install command - * - bazel - - >= 0.13.0 - - `bazel installation guide `__ - * - android-ndk - - r15c/r16b - - `NDK installation guide `__ or refers to the docker file - * - adb - - >= 1.0.32 - - apt-get install android-tools-adb - * - tensorflow - - >= 1.6.0 - - pip install -I tensorflow==1.6.0 (if you use tensorflow model) - * - numpy - - >= 1.14.0 - - pip install -I numpy==1.14.0 - * - scipy - - >= 1.0.0 - - pip install -I scipy==1.0.0 - * - jinja2 - - >= 2.10 - - pip install -I jinja2==2.10 - * - PyYaml - - >= 3.12.0 - - pip install -I pyyaml==3.12 - * - sh - - >= 1.12.14 - - pip install -I sh==1.12.14 - * - filelock - - >= 3.0.0 - - pip install -I filelock==3.0.0 - * - docker (for caffe) - - >= 17.09.0-ce - - `docker installation guide `__ - -.. note:: - - ``export ANDROID_NDK_HOME=/path/to/ndk`` to specify ANDROID_NDK_HOME - -MACE provides a Dockerfile with these dependencies installed, -you can build the image from it, - -.. code:: sh - - docker build -t registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite ./docker/mace-dev-lite - -or pull the pre-built image from Docker Hub, - -.. code:: sh - - docker pull registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite - -and then run the container with the following command. - -.. code:: sh - - # Create container - # Set 'host' network to use ADB - docker run -it --privileged -v /dev/bus/usb:/dev/bus/usb --net=host \ - -v /local/path:/container/path \ - registry.cn-hangzhou.aliyuncs.com/xiaomimace/mace-dev-lite \ - /bin/bash - - -Usage --------- - -======================================= -1. Pull MACE source code -======================================= - -.. code:: sh - - git clone https://github.com/XiaoMi/mace.git - git fetch --all --tags --prune - - # Checkout the latest tag (i.e. release version) - tag_name=`git describe --abbrev=0 --tags` - git checkout tags/${tag_name} - -.. note:: - - It's highly recommanded to use a release version instead of master branch. - -============================ -2. Model Preprocessing -============================ - -- TensorFlow - -TensorFlow provides -`Graph Transform Tool `__ -to improve inference efficiency by making various optimizations like Ops -folding, redundant node removal etc. It's strongly recommended to make these -optimizations before graph conversion step. - -The following commands show the suggested graph transformations and -optimizations for different runtimes, - -.. code:: sh - - # CPU/GPU: - ./transform_graph \ - --in_graph=tf_model.pb \ - --out_graph=tf_model_opt.pb \ - --inputs='input' \ - --outputs='output' \ - --transforms='strip_unused_nodes(type=float, shape="1,64,64,3") - strip_unused_nodes(type=float, shape="1,64,64,3") - remove_nodes(op=Identity, op=CheckNumerics) - fold_constants(ignore_errors=true) - flatten_atrous_conv - fold_batch_norms - fold_old_batch_norms - strip_unused_nodes - sort_by_execution_order' - -.. code:: sh - - # DSP: - ./transform_graph \ - --in_graph=tf_model.pb \ - --out_graph=tf_model_opt.pb \ - --inputs='input' \ - --outputs='output' \ - --transforms='strip_unused_nodes(type=float, shape="1,64,64,3") - strip_unused_nodes(type=float, shape="1,64,64,3") - remove_nodes(op=Identity, op=CheckNumerics) - fold_constants(ignore_errors=true) - fold_batch_norms - fold_old_batch_norms - backport_concatv2 - quantize_weights(minimum_size=2) - quantize_nodes - strip_unused_nodes - sort_by_execution_order' - -- Caffe - -MACE converter only supports Caffe 1.0+, you need to upgrade -your models with Caffe built-in tool when necessary, - -.. code:: bash - - # Upgrade prototxt - $CAFFE_ROOT/build/tools/upgrade_net_proto_text MODEL.prototxt MODEL.new.prototxt - - # Upgrade caffemodel - $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel - -============================== -3. Build static/shared library -============================== - ------------------ -3.1 Overview ------------------ -MACE can build either static or shared library (which is -specified by ``linkshared`` in YAML model deployment file). -The followings are two use cases. - -* **Build well tuned library for specific SoCs** - - When ``target_socs`` is specified in YAML model deployment file, the build - tool will enable automatic tuning for GPU kernels. This usually takes some - time to finish depending on the complexity of your model. - - .. note:: - - You should plug in device(s) with the correspoding SoC(s). - -* **Build generic library for all SoCs** - - When ``target_socs`` is not specified, the generated library is compatible - with general devices. - - .. note:: - - There will be around of 1 ~ 10% performance drop for GPU - runtime compared to the well tuned library. - -MACE provide command line tool (``tools/converter.py``) for -model conversion, compiling, test run, benchmark and correctness validation. - -.. note:: - - 1. ``tools/converter.py`` should be run at the root directory of this project. - 2. When ``linkshared`` is set to ``1``, ``build_type`` should be ``proto``. - And currently only android devices supported. - - ------------------------------------------- -3.2 \ ``tools/converter.py``\ usage ------------------------------------------- - -**Commands** - - * **build** - - build library and test tools. - - .. code:: sh - - # Build library - python tools/converter.py build --config=models/config.yaml - - - - * **run** - - run the model(s). - - .. code:: sh - - # Test model run time - python tools/converter.py run --config=models/config.yaml --round=100 - - # Validate the correctness by comparing the results against the - # original model and framework, measured with cosine distance for similarity. - python tools/converter.py run --config=models/config.yaml --validate - - # Check the memory usage of the model(**Just keep only one model in configuration file**) - python tools/converter.py run --config=models/config.yaml --round=10000 & - sleep 5 - adb shell dumpsys meminfo | grep mace_run - kill %1 - - - .. warning:: - - ``run`` rely on ``build`` command, you should ``run`` after ``build``. - - * **benchmark** - - benchmark and profiling model. - - .. code:: sh - - # Benchmark model, get detailed statistics of each Op. - python tools/converter.py benchmark --config=models/config.yaml - - - .. warning:: - - ``benchmark`` rely on ``build`` command, you should ``benchmark`` after ``build``. - -**Common arguments** - - .. list-table:: - :widths: auto - :header-rows: 1 - :align: left - - * - option - - type - - default - - commands - - explanation - * - --omp_num_threads - - int - - -1 - - ``run``/``benchmark`` - - number of threads - * - --cpu_affinity_policy - - int - - 1 - - ``run``/``benchmark`` - - 0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY - * - --gpu_perf_hint - - int - - 3 - - ``run``/``benchmark`` - - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH - * - --gpu_perf_hint - - int - - 3 - - ``run``/``benchmark`` - - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH - * - --gpu_priority_hint - - int - - 3 - - ``run``/``benchmark`` - - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH - -Using ``-h`` to get detailed help. - -.. code:: sh - - python tools/converter.py -h - python tools/converter.py build -h - python tools/converter.py run -h - python tools/converter.py benchmark -h - - -============= -4. Deployment -============= - -``build`` command will generate the static/shared library, model files and -header files and package them as -``build/${library_name}/libmace_${library_name}.tar.gz``. - -- The generated ``static`` libraries are organized as follows, - -.. code:: - - build/ - └── mobilenet-v2-gpu - ├── include - │   └── mace - │   └── public - │   ├── mace.h - │   └── mace_runtime.h - ├── libmace_mobilenet-v2-gpu.tar.gz - ├── lib - │   ├── arm64-v8a - │   │   └── libmace_mobilenet-v2-gpu.MI6.msm8998.a - │   └── armeabi-v7a - │   └── libmace_mobilenet-v2-gpu.MI6.msm8998.a - ├── model - │   ├── mobilenet_v2.data - │   └── mobilenet_v2.pb - └── opencl - ├── arm64-v8a - │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin - └── armeabi-v7a - └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin - -- The generated ``shared`` libraries are organized as follows, - -.. code:: - - build - └── mobilenet-v2-gpu - ├── include - │   └── mace - │   └── public - │   ├── mace.h - │   └── mace_runtime.h - ├── lib - │   ├── arm64-v8a - │   │   ├── libgnustl_shared.so - │   │   └── libmace.so - │   └── armeabi-v7a - │   ├── libgnustl_shared.so - │   └── libmace.so - ├── model - │   ├── mobilenet_v2.data - │   └── mobilenet_v2.pb - └── opencl - ├── arm64-v8a - │   └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin - └── armeabi-v7a - └── mobilenet-v2-gpu_compiled_opencl_kernel.MI6.msm8998.bin - -.. note:: - - 1. DSP runtime depends on ``libhexagon_controller.so``. - 2. ``${MODEL_TAG}.pb`` file will be generated only when ``build_type`` is ``proto``. - 3. ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` will - be generated only when ``target_socs`` and ``gpu`` runtime are specified. - 4. Generated shared library depends on ``libgnustl_shared.so``. - -.. warning:: - - ``${library_name}_compiled_opencl_kernel.${device_name}.${soc}.bin`` depends - on the OpenCL version of the device, you should maintan the compatibility or - configure compiling cache store with ``ConfigKVStorageFactory``. - -========================================= -5. How to use the library in your project -========================================= - -Please refer to \ ``mace/examples/example.cc``\ for full usage. The following list the key steps. - -.. code:: cpp - - // Include the headers - #include "mace/public/mace.h" - #include "mace/public/mace_runtime.h" - // If the build_type is code - #include "mace/public/mace_engine_factory.h" - - // 0. Set pre-compiled OpenCL binary program file paths when available - if (device_type == DeviceType::GPU) { - mace::SetOpenCLBinaryPaths(opencl_binary_paths); - } - - // 1. Set compiled OpenCL kernel cache, this is used to reduce the - // initialization time since the compiling is too slow. It's suggested - // to set this even when pre-compiled OpenCL program file is provided - // because the OpenCL version upgrade may also leads to kernel - // recompilations. - const std::string file_path ="path/to/opencl_cache_file"; - std::shared_ptr storage_factory( - new FileStorageFactory(file_path)); - ConfigKVStorageFactory(storage_factory); - - // 2. Declare the device type (must be same with ``runtime`` in configuration file) - DeviceType device_type = DeviceType::GPU; - - // 3. Define the input and output tensor names. - std::vector input_names = {...}; - std::vector output_names = {...}; - - // 4. Create MaceEngine instance - std::shared_ptr engine; - MaceStatus create_engine_status; - // Create Engine from compiled code - create_engine_status = - CreateMaceEngineFromCode(model_name.c_str(), - nullptr, - input_names, - output_names, - device_type, - &engine); - // Create Engine from model file - create_engine_status = - CreateMaceEngineFromProto(model_pb_data, - model_data_file.c_str(), - input_names, - output_names, - device_type, - &engine); - if (create_engine_status != MaceStatus::MACE_SUCCESS) { - // Report error - } - - // 5. Create Input and Output tensor buffers - std::map inputs; - std::map outputs; - for (size_t i = 0; i < input_count; ++i) { - // Allocate input and output - int64_t input_size = - std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1, - std::multiplies()); - auto buffer_in = std::shared_ptr(new float[input_size], - std::default_delete()); - // Load input here - // ... - - inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in); - } - - for (size_t i = 0; i < output_count; ++i) { - int64_t output_size = - std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1, - std::multiplies()); - auto buffer_out = std::shared_ptr(new float[output_size], - std::default_delete()); - outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out); - } - - // 6. Run the model - MaceStatus status = engine.Run(inputs, &outputs); - -- GitLab