Merge branch 'update-docs' into 'master'

Replace how_to_build doc with english. See merge request !545

Merge branch 'update-docs' into 'master'
Replace how_to_build doc with english. See merge request !545
ac426fb2 · Liangliang He · f24d6cd0 · 5dd99908 · ac426fb2 · ac426fb2
4 changed file
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -24,3 +24,4 @@ pygments_style = 'sphinx'
 html_theme = "sphinx_rtd_theme"
 html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 html_static_path = ['_static']
+smartquotes = False
--- a/docs/getting_started/how_to_build.rst
+++ b/docs/getting_started/how_to_build.rst
 How to build
 ============

-模型格式支持
-------------
-
-+--------------+------------------------------------------------------------------------------------------+
-| 框架格式     | 支持情况                                                                                 |
-+==============+==========================================================================================+
-| TensorFlow   | 推荐使用1.4以上版本，否则可能达不到最佳性能 (考虑到后续Android NN，建议首选TensorFLow)   |
-+--------------+------------------------------------------------------------------------------------------+
-| Caffe        | 推荐使用1.0以上版本，低版本可能不支持，建议改用TensorFlow                                |
-+--------------+------------------------------------------------------------------------------------------+
-| MXNet        | 尚未支持                                                                                 |
-+--------------+------------------------------------------------------------------------------------------+
-| ONNX         | 尚未支持                                                                                 |
-+--------------+------------------------------------------------------------------------------------------+
-
-环境要求
---------
-
-``mace``\ 提供了包含开发运行所需环境的docker镜像，镜像文件可以参考\ ``./docker/``\ 。启动命令：
+Supported Platforms
+-------------------
+
+.. list-table::
+    :widths: auto
+    :header-rows: 1
+    :align: left
+
+    * - Platform
+      - Explanation
+    * - Tensorflow
+      - >= 1.6.0. (first choice, convenient for Android NN API in the future)
+    * - Caffe
+      - >= 1.0.
+
+Environment Requirement
+-------------------------
+
+``mace``\ supply a docker image which contains all required environment. ``Dockerfile`` under the ``./docker`` directory.
+the followings are start commands:

 .. code:: sh

    sudo docker pull cr.d.xiaomi.net/mace/mace-dev
    sudo docker run -it --rm --privileged -v /dev/bus/usb:/dev/bus/usb --net=host -v /local/path:/container/path cr.d.xiaomi.net/mace/mace-dev /bin/bash

-如果用户希望配置开发机上的环境，可以参考如下环境要求：
-
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| 软件                | 版本号          | 安装命令                                                                                          |
-+=====================+=================+===================================================================================================+
-| bazel               | >= 0.5.4        | -                                                                                                 |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| android-ndk         | r15c,r16b       | -                                                                                                 |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| adb                 | >= 1.0.32       | apt install -y android-tools-adb                                                                  |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| tensorflow          | 1.7.0           | pip install tensorflow==1.7.0                                                                     |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| numpy               | >= 1.14.0       | pip install numpy                                                                                 |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| scipy               | >= 1.0.0        | pip install scipy                                                                                 |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| jinja2              | >= 2.10         | pip install jinja2                                                                                |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| PyYaml              | >= 3.12         | pip install pyyaml                                                                                |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| sh                  | >= 1.12.14      | pip install sh                                                                                    |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| filelock            | >= 3.0.0        | pip install filelock                                                                              |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
-| docker(for caffe)   | >= 17.09.0-ce   | `install doc <https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository>`__   |
-+---------------------+-----------------+---------------------------------------------------------------------------------------------------+
+if you want to run on your local computer, you have to install the following softwares.
+
+.. list-table::
+    :widths: auto
+    :header-rows: 1
+    :align: left
+
+    * - software
+      - version
+      - install command
+    * - bazel
+      - >= 0.13.0
+      - `bazel installation <https://docs.bazel.build/versions/master/install.html>`__
+    * - android-ndk
+      - r15c/r16b
+      - reference the docker file
+    * - adb
+      - >= 1.0.32
+      - apt-get install android-tools-adb
+    * - tensorflow
+      - >= 1.6.0
+      - pip install -I tensorflow==1.6.0 (if you use tensorflow model)
+    * - numpy
+      - >= 1.14.0
+      - pip install -I numpy=1.14.0
+    * - scipy
+      - >= 1.0.0
+      - pip install -I scipy=1.0.0
+    * - jinja2
+      - >= 2.10
+      - pip install -I jinja2=2.10
+    * - PyYaml
+      - >= 3.12.0
+      - pip install -I pyyaml=3.12
+    * - sh
+      - >= 1.12.14
+      - pip install -I sh=1.12.14
+    * - filelock
+      - >= 3.0.0
+      - pip install -I filelock=3.0.0
+    * - docker (for caffe)
+      - >= 17.09.0-ce
+      - `install doc <https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository>`__
+

 Docker Images
 ----------------
@@ -84,12 +102,16 @@ Docker Images
    docker run -it --rm -v /local/path:/container/path --net=host cr.d.xiaomi.net/mace/mace-dev /bin/bash


-使用简介
+Usage
 --------

-1. 获取最新tag的代码
+============================
+1. Pull code with latest tag
+============================
+
+.. warning::

-**建议尽可能使用最新tag下的代码，以及不要直接使用master分支的最新代码。**
+    please do not use master branch for deployment.

 .. code:: sh

@@ -104,19 +126,23 @@ Docker Images
    # checkout to latest tag branch
    git checkout -b ${tag_name} tags/${tag_name}

-2. 模型优化
+============================
+2. Model Optimization
+============================

 -  Tensorflow

-TensorFlow训练得到的模型进行一系列的转换，可以提升设备上的运行速度。TensorFlow提供了官方工具
-`TensorFlow Graph Transform
-Tool <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md>`__
-来进行模型优化
-(此工具Docker镜像中已经提供，也可以直接点击`下载 <http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph>`__\ 这个工具，用户亦可从官方源码编译\`)。以下分别是GPU模型和DSP模型的优化命令：
+Tensorflow supply a
+`model optimization tool <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md>`__
+for speed up inference. The docker image contain the tool,
+by the way you can download from `transform_graph <http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph>`__
+or compile from tensorflow source code.
+
+The following commands are optimization for CPU, GPU and DSP.

 .. code:: sh

-    # GPU模型:
+    # CPU/GPU:
    ./transform_graph \
        --in_graph=tf_model.pb \
        --out_graph=tf_model_opt.pb \
@@ -132,7 +158,7 @@ Tool <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/grap
            strip_unused_nodes
            sort_by_execution_order'

-    # DSP模型:
+    # DSP:
    ./transform_graph \
        --in_graph=tf_model.pb \
        --out_graph=tf_model_opt.pb \
@@ -152,7 +178,7 @@ Tool <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/grap

 -  Caffe

-Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级。
+Only support versions greater then 1.0, please use the tools caffe supplied to upgrade the models.

 .. code:: bash

@@ -162,54 +188,81 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
    # Upgrade caffemodel
    $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel

-3. 生成模型静态库
+============================
+3. Build static library
+============================
+
+-----------------
+3.1 Overview
+-----------------
+Mace only build static library. the followings are two use cases.
+
+* **build for specified SOC**
+
+    You must assign ``target_socs`` in yaml configuration file.
+    if you want to use gpu for the soc, mace will tuning the parameters for better performance automatically.
+
+    .. warning::

-模型静态库的生成需要使用目标机型，\ ***并且要求必须在目标SOC的机型上编译生成静态库。***
+         you should plug in a phone with that soc.

-我们提供了\ ``converter.py``\ 工具，可以将模型文件转换成静态库。\ ``tools/converter.py``\ 使用步骤：
+* **build for all SOC**

+    When no ``target_soc`` specified, the library is suitable for all soc.

+    .. warning::

-3.2 运行\ ``tools/converter.py``\ 脚本
+         The performance will be a little poorer than the first case.
+
+We supply a python script ``tools/converter.py`` to build the library and run the model on the command line.
+
+.. warning::
+
+     must run the script on the root directory of the mace code.
+
+
+------------------------------------------
+3.2 \ ``tools/converter.py``\  explanation
+------------------------------------------

 **Commands**

-    **build**
+    * **build**

        .. note::

-            build模型静态库以及测试工具。
+            build static library and test tools.

-        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
-        * *--tuning* (default=false, optional)：是否为特定SOC调制GPU参数.
-        * *--enable_openmp* (default=true, optional)：是否启用openmp.
+        * *--config* (type=str,  default="",  required)： the path of model yaml configuration file.
+        * *--tuning* (default=false, optional)： whether tuning the parameters for the GPU of specified SOC.
+        * *--enable_openmp* (default=true, optional)： whether use openmp.

-    **run**
+    * **run**

        .. note::

-            命令行运行模型
+           run the models in command line

-        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
-        * *--round* (type=int, default=1,  optional)：模型运行次数。
-        * *--validate* (default=false, optional): 是否需要验证运行结果与框架运行结果是否一致。
-        * *--caffe_env* (type=local/docker, default=docker,  optional)：当vaildate时，可以选择指定caffe环境,local表示本地，docker表示使用docker容器.
-        * *--restart_round* (type=int, default=1,  optional)：模型重启次数。
-        * *--check_gpu_out_of_memory* (default=false, optional): 是否需要检查gpu内存越界。
-        * *--vlog_level* (type=int[0-5], default=0,  optional)：详细日志级别.
+        * *--config* (type=str,  default="",  required)： the path of model yaml configuration file.
+        * *--round* (type=int, default=1,  optional)： times for run.
+        * *--validate* (default=false, optional): whether to verify the results of mace are consistent with the frameworks。
+        * *--caffe_env* (type=local/docker, default=docker,  optional)： you can specific caffe environment for validation. local environment or caffe docker image.
+        * *--restart_round* (type=int, default=1,  optional)： restart round between run.
+        * *--check_gpu_out_of_memory* (default=false, optional): whether check out of memory for gpu.
+        * *--vlog_level* (type=int[0-5], default=0,  optional): verbose log level for debug.

        .. warning::

-            run依赖于build命令.build完成以后才可以执行run命令
+            ``run`` rely on ``build`` command, you should ``run`` after ``build``.

-    **benchmark**
-        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
+    * **benchmark**
+        * *--config* (type=str,  default="",  required)： the path of model yaml configuration file.

        .. warning::

-            benchmark依赖于build命令.
+            ``benchmark`` rely on ``build`` command, you should ``benchmark`` after ``build``.

-    **通用参数**
+    **common arguments**

    .. list-table::
        :widths: auto
@@ -226,33 +279,37 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
          - int
          - -1
          - N
-          - run/benchmark
+          - ``run``/``benchmark``
          - number of threads
        * - --cpu_affinity_policy
          - int
          - 1
          - N
-          - run/benchmark
+          - ``run``/``benchmark``
          - 0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY
        * - --gpu_perf_hint
          - int
          - 3
          - N
-          - run/benchmark
+          - ``run``/``benchmark``
          - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH
        * - --gpu_perf_hint
          - int
          - 3
          - N
-          - run/benchmark
+          - ``run``/``benchmark``
          - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH
        * - --gpu_priority_hint
          - int
          - 3
          - N
-          - run/benchmark
+          - ``run``/``benchmark``
          - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH

+---------------------------------------------
+3.3 \ ``tools/converter.py``\ usage examples
+---------------------------------------------
+
 .. code:: sh

    # print help message
@@ -261,107 +318,104 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
    python tools/converter.py run -h
    python tools/converter.py benchmark -h

-    # 仅编译模型和生成静态库
+    # Build the static library
    python tools/converter.py build --config=models/config.yaml

-    # 测试模型的运行时间
+    # Test model run time
    python tools/converter.py run --config=models/config.yaml --round=100

-    # 对比编译好的模型在mace上与直接使用tensorflow或者caffe运行的结果，相似度使用`余弦距离表示`
-    # 其中使用OpenCL设备，默认相似度大于等于`0.995`为通过；DSP设备下，相似度需要达到`0.930`。
+    # Compare the results of mace and platform. use the **cosine distance** to represent similarity.
    python tools/converter.py run --config=models/config.yaml --validate

-    # 模型Benchmark：查看每个Op的运行时间
+    # Benchmark Model: check the execution time of each Op.
    python tools/converter.py benchmark --config=models/config.yaml

-    # 查看模型运行时占用内存（如果有多个模型，可能需要注释掉一部分配置，只剩一个模型的配置）
+    # Check the memory usage of the model(**Just keep only one model in configuration file**)
    python tools/converter.py run --config=models/config.yaml --round=10000 &
    adb shell dumpsys meminfo | grep mace_run
    sleep 10
    kill %1

-4. 发布
+=============
+4. Deployment
+=============

-通过前面的步骤，我们得到了包含业务模型的库文件。在业务代码中，我们只需要引入下面3组文件（\ ``./build/``\ 是默认的编译结果输出目录）：
+``build`` command will generate a package which contains the static library, model files and header files.
+the package is at ``./build/${library_name}/libmace_${library_name}.tar.gz``.
+The followings list the details.

-**头文件**
-    * ``./build/${library_name}/include/mace/public/*.h``
+**header files**
+    * ``include/mace/public/*.h``

-**静态库**
-    * ``./build/${library_name}/library/${target_abi}/*.a``
+**static libraries**
+    * ``library/${target_abi}/*.a``

-**动态库**
-    * ``./build/${library_name}/library/${target_abi}/libhexagon_controller.so``
+**dynamic libraries**
+    * ``library/libhexagon_controller.so``

    .. note::

-        仅编译的模型中包含dsp模式时用到
+        only use for DSP

-**模型文件**
-    * ``./build/${library_name}/model/${MODEL_TAG}.pb``
-    * ``./build/${library_name}/model/${MODEL_TAG}.data``
+**model files**
+    * ``model/${MODEL_TAG}.pb``
+    * ``model/${MODEL_TAG}.data``

    .. note::

-        pb文件紧当模型build_type设置为proto时才会产生。
+        ``.pb`` file will be generated only when build_type is ``proto``.


-**库文件tar包**
-    * ``./build/${library_name}/libmace_${library_name}.tar.gz``
+=============
+5. how to use
+=============

-    .. note::
-
-        该文件包含了上述所有文件，可以发布使用。
-
-5. 使用
-
-具体使用流程可参考\ ``mace/examples/mace_run.cc``\ ，下面列出关键步骤。
+Please refer to \ ``mace/examples/example.cc``\ for full usage. the following list the key steps.

 .. code:: cpp

-    // 引入头文件
+    // include the header files
    #include "mace/public/mace.h"
+    #include "mace/public/mace_runtime.h"
    #include "mace/public/mace_engine_factory.h"

-    // 0. 设置内部存储（设置一次即可）
+    // 0. set internal storage factory（**Call once**）
    const std::string file_path ="/path/to/store/internel/files";
    std::shared_ptr<KVStorageFactory> storage_factory(
        new FileStorageFactory(file_path));
    ConfigKVStorageFactory(storage_factory);

-    //1. 声明设备类型(必须与build时指定的runtime一致）
+    //1. Declare the device type(must be same with ``runtime`` in configuration file)
    DeviceType device_type = DeviceType::GPU;

-    //2. 定义输入输出名称数组
+    //2. Define the input and output tensor names.
    std::vector<std::string> input_names = {...};
    std::vector<std::string> output_names = {...};

-    //3. 创建MaceEngine对象
+    //3. Create MaceEngine object
    std::shared_ptr<mace::MaceEngine> engine;
    MaceStatus create_engine_status;
-    // Create Engine
-    if (model_data_file.empty()) {
-      create_engine_status =
-          CreateMaceEngine(model_name.c_str(),
-                           nullptr,
-                           input_names,
-                           output_names,
-                           device_type,
-                           &engine);
-    } else {
-      create_engine_status =
-          CreateMaceEngine(model_name.c_str(),
-                           model_data_file.c_str(),
-                           input_names,
-                           output_names,
-                           device_type,
-                           &engine);
-    }
+    // Create Engine from code
+    create_engine_status =
+        CreateMaceEngineFromCode(model_name.c_str(),
+                                 nullptr,
+                                 input_names,
+                                 output_names,
+                                 device_type,
+                                 &engine);
+    // Create Engine from proto file
+    create_engine_status =
+        CreateMaceEngineFromProto(model_pb_data,
+                                  model_data_file.c_str(),
+                                  input_names,
+                                  output_names,
+                                  device_type,
+                                  &engine);
    if (create_engine_status != MaceStatus::MACE_SUCCESS) {
      // do something
    }

-    //4. 创建输入输出对象
+    //4. Create Input and Output objects
    std::map<std::string, mace::MaceTensor> inputs;
    std::map<std::string, mace::MaceTensor> outputs;
    for (size_t i = 0; i < input_count; ++i) {
@@ -386,6 +440,6 @@ Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级
      outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
    }

-    //5. 执行模型，得到结果
-    engine.Run(inputs, &outputs);
+    //5. Run the model
+    MaceStatus status = engine.Run(inputs, &outputs);

--- a/docs/getting_started/how_to_build_zh.rst
+++ b/docs/getting_started/how_to_build_zh.rst
+使用介绍
+============
+
+模型格式支持
+-------------
+
+.. list-table::
+    :widths: auto
+    :header-rows: 1
+    :align: left
+
+    * - Platform
+      - Explanation
+    * - Tensorflow
+      - >= 1.6.0. (first choice, convenient for Android NN API in the future)
+    * - Caffe
+      - >= 1.0.
+
+环境要求
+---------
+
+``mace``\ 提供了包含开发运行所需环境的docker镜像，镜像文件可以参考\ ``./docker/``\ 。启动命令：
+
+.. code:: sh
+
+    sudo docker pull cr.d.xiaomi.net/mace/mace-dev
+    sudo docker run -it --rm --privileged -v /dev/bus/usb:/dev/bus/usb --net=host -v /local/path:/container/path cr.d.xiaomi.net/mace/mace-dev /bin/bash
+
+如果用户希望配置开发机上的环境，可以参考如下环境要求：
+
+.. list-table::
+    :widths: auto
+    :header-rows: 1
+    :align: left
+
+    * - software
+      - version
+      - install command
+    * - bazel
+      - >= 0.13.0
+      - `bazel installation <https://docs.bazel.build/versions/master/install.html>`__
+    * - android-ndk
+      - r15c/r16b
+      - reference the docker file
+    * - adb
+      - >= 1.0.32
+      - apt-get install android-tools-adb
+    * - tensorflow
+      - >= 1.6.0
+      - pip install -I tensorflow==1.6.0 (if you use tensorflow model)
+    * - numpy
+      - >= 1.14.0
+      - pip install -I numpy=1.14.0
+    * - scipy
+      - >= 1.0.0
+      - pip install -I scipy=1.0.0
+    * - jinja2
+      - >= 2.10
+      - pip install -I jinja2=2.10
+    * - PyYaml
+      - >= 3.12.0
+      - pip install -I pyyaml=3.12
+    * - sh
+      - >= 1.12.14
+      - pip install -I sh=1.12.14
+    * - filelock
+      - >= 3.0.0
+      - pip install -I filelock=3.0.0
+    * - docker (for caffe)
+      - >= 17.09.0-ce
+      - `install doc <https://docs.docker.com/install/linux/docker-ce/ubuntu/#set-up-the-repository>`__
+
+Docker Images
+----------------
+
+* Login in `Xiaomi Docker Registry <http://docs.api.xiaomi.net/docker-registry/>`__
+
+.. code:: sh
+
+    docker login cr.d.xiaomi.net
+
+* Build with Dockerfile
+
+.. code:: sh
+
+    docker build -t cr.d.xiaomi.net/mace/mace-dev
+
+
+* Pull image from docker registry
+
+.. code:: sh
+
+    docker pull cr.d.xiaomi.net/mace/mace-dev
+
+* Create container
+
+.. code:: sh
+
+    # Set 'host' network to use ADB
+    docker run -it --rm -v /local/path:/container/path --net=host cr.d.xiaomi.net/mace/mace-dev /bin/bash
+
+
+使用简介
+--------
+
+=======================
+1. 获取最新tag的代码
+=======================
+
+.. warning::
+
+    建议尽可能使用最新tag下的代码。
+
+.. code:: sh
+
+    git clone git@v9.git.n.xiaomi.com:deep-computing/mace.git
+
+    # update
+    git fetch --all --tags --prune
+
+    # get latest tag version
+    tag_name=`git describe --abbrev=0 --tags`
+
+    # checkout to latest tag branch
+    git checkout -b ${tag_name} tags/${tag_name}
+
+==================
+2. 模型优化
+==================
+
+-  Tensorflow
+
+TensorFlow训练得到的模型进行一系列的转换，可以提升设备上的运行速度。TensorFlow提供了官方工具
+`TensorFlow Graph Transform
+Tool <https://github.com/tensorflow/tensorflow/blob/master/tensorflow/tools/graph_transforms/README.md>`__
+来进行模型优化
+(此工具Docker镜像中已经提供，也可以直接点击
+`transform_graph <http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph>`__
+下载这个工具，用户亦可从官方源码编译)。以下分别是GPU模型和DSP模型的优化命令：
+
+.. code:: sh
+
+    # GPU模型:
+    ./transform_graph \
+        --in_graph=tf_model.pb \
+        --out_graph=tf_model_opt.pb \
+        --inputs='input' \
+        --outputs='output' \
+        --transforms='strip_unused_nodes(type=float, shape="1,64,64,3") 
+            strip_unused_nodes(type=float, shape="1,64,64,3")
+            remove_nodes(op=Identity, op=CheckNumerics)
+            fold_constants(ignore_errors=true)
+            flatten_atrous_conv
+            fold_batch_norms
+            fold_old_batch_norms
+            strip_unused_nodes
+            sort_by_execution_order'
+
+    # DSP模型:
+    ./transform_graph \
+        --in_graph=tf_model.pb \
+        --out_graph=tf_model_opt.pb \
+        --inputs='input' \
+        --outputs='output' \
+        --transforms='strip_unused_nodes(type=float, shape="1,64,64,3") 
+            strip_unused_nodes(type=float, shape="1,64,64,3")
+            remove_nodes(op=Identity, op=CheckNumerics)
+            fold_constants(ignore_errors=true)
+            fold_batch_norms
+            fold_old_batch_norms
+            backport_concatv2
+            quantize_weights(minimum_size=2)
+            quantize_nodes
+            strip_unused_nodes
+            sort_by_execution_order'
+
+-  Caffe
+
+Caffe目前只支持最新版本，旧版本请使用Caffe的工具进行升级。
+
+.. code:: bash
+
+    # Upgrade prototxt
+    $CAFFE_ROOT/build/tools/upgrade_net_proto_text MODEL.prototxt MODEL.new.prototxt
+
+    # Upgrade caffemodel
+    $CAFFE_ROOT/build/tools/upgrade_net_proto_binary MODEL.caffemodel MODEL.new.caffemodel
+
+==================
+3. 生成模型静态库
+==================
+
+---------------------------------------
+3.1 简介
+---------------------------------------
+
+Mace目前只提供静态库，有以下两种使用场景。
+
+**特定SOC库**
+
+    该使用场景要求在``yaml``文件中必须制定``target_socs``。主要用于为编译适用于指定手机SOC的静态库。
+    如果希望使用GPU，那么编译过程会自动测试选择最佳的GPU相关参数以获得更好的性能。
+
+    .. warning::
+
+         该场景下，你必须插入符合SOC的手机。
+
+**通用库**
+
+    如果在``yaml``文件中没有指定``target_soc``，生成的静态库适用于所有手机。
+
+    .. warning::
+
+         该场景下，GPU性能会略逊于第一种场景。
+
+
+我们提供了\ ``tools/converter.py``\ 工具，用于编译和运行。
+
+.. warning::
+
+     必须在mace项目的根目录下运行\ ``tools/converter.py``\ 脚本。
+
+
+---------------------------------------
+3.2 \ ``tools/converter.py``\ 脚本
+---------------------------------------
+
+**Commands**
+
+    **build**
+
+        .. note::
+
+            build模型静态库以及测试工具。
+
+        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
+        * *--tuning* (default=false, optional)：是否为特定SOC调制GPU参数.
+        * *--enable_openmp* (default=true, optional)：是否启用openmp.
+
+    **run**
+
+        .. note::
+
+            命令行运行模型
+
+        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
+        * *--round* (type=int, default=1,  optional)：模型运行次数。
+        * *--validate* (default=false, optional): 是否需要验证运行结果与框架运行结果是否一致。
+        * *--caffe_env* (type=local/docker, default=docker,  optional)：当vaildate时，可以选择指定caffe环境,local表示本地，docker表示使用docker容器.
+        * *--restart_round* (type=int, default=1,  optional)：模型重启次数。
+        * *--check_gpu_out_of_memory* (default=false, optional): 是否需要检查gpu内存越界。
+        * *--vlog_level* (type=int[0-5], default=0,  optional)：详细日志级别.
+
+        .. warning::
+
+            run依赖于build命令.build完成以后才可以执行run命令
+
+    **benchmark**
+        * *--config* (type=str,  default="",  required)：模型配置yaml文件路径.
+
+        .. warning::
+
+            benchmark依赖于build命令.
+
+    **通用参数**
+
+    .. list-table::
+        :widths: auto
+        :header-rows: 1
+        :align: left
+
+        * - argument(key)
+          - argument(value)
+          - default
+          - required
+          - commands
+          - explanation
+        * - --omp_num_threads
+          - int
+          - -1
+          - N
+          - run/benchmark
+          - number of threads
+        * - --cpu_affinity_policy
+          - int
+          - 1
+          - N
+          - run/benchmark
+          - 0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY
+        * - --gpu_perf_hint
+          - int
+          - 3
+          - N
+          - run/benchmark
+          - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH
+        * - --gpu_perf_hint
+          - int
+          - 3
+          - N
+          - run/benchmark
+          - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH
+        * - --gpu_priority_hint
+          - int
+          - 3
+          - N
+          - run/benchmark
+          - 0:DEFAULT/1:LOW/2:NORMAL/3:HIGH
+
+---------------------------------------
+3.3 \ ``tools/converter.py``\ 使用示例
+---------------------------------------
+.. code:: sh
+
+    # print help message
+    python tools/converter.py -h
+    python tools/converter.py build -h
+    python tools/converter.py run -h
+    python tools/converter.py benchmark -h
+
+    # 仅编译模型和生成静态库
+    python tools/converter.py build --config=models/config.yaml
+
+    # 测试模型的运行时间
+    python tools/converter.py run --config=models/config.yaml --round=100
+
+    # 对比编译好的模型在mace上与直接使用tensorflow或者caffe运行的结果，相似度使用`余弦距离表示`
+    # 其中使用OpenCL设备，默认相似度大于等于`0.995`为通过；DSP设备下，相似度需要达到`0.930`。
+    python tools/converter.py run --config=models/config.yaml --validate
+
+    # 模型Benchmark：查看每个Op的运行时间
+    python tools/converter.py benchmark --config=models/config.yaml
+
+    # 查看模型运行时占用内存（如果有多个模型，可能需要注释掉一部分配置，只剩一个模型的配置）
+    python tools/converter.py run --config=models/config.yaml --round=10000 &
+    adb shell dumpsys meminfo | grep mace_run
+    sleep 10
+    kill %1
+
+==========
+4. 发布
+==========
+
+``build``命令会生成一个tar包，里面包含了发布所需要的所有文件，其位于``./build/${library_name}/libmace_${library_name}.tar.gz``.
+下面解释了该包中包含了哪些文件。
+
+**头文件**
+    * ``./build/${library_name}/include/mace/public/*.h``
+
+**静态库**
+    * ``./build/${library_name}/library/${target_abi}/*.a``
+
+**动态库**
+    * ``./build/${library_name}/library/${target_abi}/libhexagon_controller.so``
+
+    .. note::
+
+        仅编译的模型中包含dsp模式时用到
+
+**模型文件**
+    * ``./build/${library_name}/model/${MODEL_TAG}.pb``
+    * ``./build/${library_name}/model/${MODEL_TAG}.data``
+
+    .. note::
+
+        pb文件紧当模型build_type设置为proto时才会产生。
+
+
+**库文件tar包**
+    * ``./build/${library_name}/libmace_${library_name}.tar.gz``
+
+    .. note::
+
+        该文件包含了上述所有文件，可以发布使用。
+
+============
+5. 使用
+============
+
+具体使用流程可参考\ ``mace/examples/mace_run.cc``\ ，下面列出关键步骤。
+
+.. code:: cpp
+
+    // 引入头文件
+    #include "mace/public/mace.h"
+    #include "mace/public/mace_engine_factory.h"
+
+    // 0. 设置内部存储（设置一次即可）
+    const std::string file_path ="/path/to/store/internel/files";
+    std::shared_ptr<KVStorageFactory> storage_factory(
+        new FileStorageFactory(file_path));
+    ConfigKVStorageFactory(storage_factory);
+
+    //1. 声明设备类型(必须与build时指定的runtime一致）
+    DeviceType device_type = DeviceType::GPU;
+
+    //2. 定义输入输出名称数组
+    std::vector<std::string> input_names = {...};
+    std::vector<std::string> output_names = {...};
+
+    //3. 创建MaceEngine对象
+    std::shared_ptr<mace::MaceEngine> engine;
+    MaceStatus create_engine_status;
+    // Create Engine from code
+    create_engine_status =
+        CreateMaceEngineFromCode(model_name.c_str(),
+                                 nullptr,
+                                 input_names,
+                                 output_names,
+                                 device_type,
+                                 &engine);
+    // Create Engine from proto file
+    create_engine_status =
+        CreateMaceEngineFromProto(model_pb_data,
+                                  model_data_file.c_str(),
+                                  input_names,
+                                  output_names,
+                                  device_type,
+                                  &engine);
+    if (create_engine_status != MaceStatus::MACE_SUCCESS) {
+      // do something
+    }
+
+    //4. 创建输入输出对象
+    std::map<std::string, mace::MaceTensor> inputs;
+    std::map<std::string, mace::MaceTensor> outputs;
+    for (size_t i = 0; i < input_count; ++i) {
+      // Allocate input and output
+      int64_t input_size =
+          std::accumulate(input_shapes[i].begin(), input_shapes[i].end(), 1,
+                          std::multiplies<int64_t>());
+      auto buffer_in = std::shared_ptr<float>(new float[input_size],
+                                              std::default_delete<float[]>());
+      // load input
+      ...
+
+      inputs[input_names[i]] = mace::MaceTensor(input_shapes[i], buffer_in);
+    }
+
+    for (size_t i = 0; i < output_count; ++i) {
+      int64_t output_size =
+          std::accumulate(output_shapes[i].begin(), output_shapes[i].end(), 1,
+                          std::multiplies<int64_t>());
+      auto buffer_out = std::shared_ptr<float>(new float[output_size],
+                                               std::default_delete<float[]>());
+      outputs[output_names[i]] = mace::MaceTensor(output_shapes[i], buffer_out);
+    }
+
+    //5. 执行模型，得到结果
+    engine.Run(inputs, &outputs);
+
--- a/tools/sh_commands.py
+++ b/tools/sh_commands.py
@@ -847,7 +847,8 @@ def merge_libs(target_soc,
    project_output_dir = "%s/%s" % (build_output_dir, project_name)
    model_header_dir = "%s/include/mace/public" % project_output_dir
    hexagon_lib_file = "third_party/nnlib/libhexagon_controller.so"
-    model_bin_dir = "%s/%s/%s/" % (project_output_dir, library_output_dir, abi)
+    library_dir = "%s/%s" % (project_output_dir, library_output_dir)
+    model_bin_dir = "%s/%s/" % (library_dir, abi)

    if os.path.exists(model_bin_dir):
        sh.rm("-rf", model_bin_dir)
@@ -858,7 +859,7 @@ def merge_libs(target_soc,
    # copy header files
    sh.cp("-f", glob.glob("mace/public/*.h"), model_header_dir)
    if hexagon_mode:
-        sh.cp("-f", hexagon_lib_file, model_bin_dir)
+        sh.cp("-f", hexagon_lib_file, library_dir)

    if model_build_type == BuildType.code:
        sh.cp("-f", glob.glob("mace/codegen/engine/*.h"), model_header_dir)