diff --git a/.gitignore b/.gitignore
index 91e5e303e1011a0185e2f0b04b4183ddd16285ae..075175a124d0a084b1e4f987353a6cc297dd576b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -22,12 +22,14 @@ mace/codegen/version/
 mace/codegen/engine/
 mace/codegen/lib/
 
-micro/codegen/models/
-micro/codegen/engines/
-
 examples/android/macelibrary/src/main/cpp/mace/
 examples/android/macelibrary/src/main/cpp/include/
 examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
 examples/android/macelibrary/src/main/jniLibs/arm64-v8a/
 
 tools/python/py_proto/*_pb2.py
+
+micro/codegen/models/
+micro/codegen/engines/
+micro/examples/micro
+micro/build
\ No newline at end of file
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 0b0e2436368096a3d72e571f8106e0666b22943b..659b7c9a0785ed3ba53e9474ba3981a9678a847d 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -19,7 +19,7 @@ cpplint:
 pylint:
   stage: linting
   script:
-    - pycodestyle $(find -name "*.py")
+    - pycodestyle . --filename=*.py --exclude=examples,third_party
 
 build_docs:
   stage: build
@@ -111,14 +111,12 @@ mace_cc_test:
         DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
       fi
     - python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
-    - python tools/bazel_adb_run.py --target="//micro/test/ccunit:micro_ops_test" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a
 
 mace_cc_benchmark:
   stage: test
   script:
     - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
     - python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*"
-    - python tools/bazel_adb_run.py --target="//micro/test/ccbenchmark:micro_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a
   only:
     - triggers
 
@@ -145,14 +143,6 @@ model_tests:
     - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file
     - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file
     - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark
-    - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
-    - python tools/converter.py convert --config=${CONF_FILE} --enable_micro
-    - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
-    - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark
-    - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml
-    - python tools/converter.py convert --config=${CONF_FILE} --enable_micro
-    - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
-    - rm -rf mace-models
 
 quantization_tests:
   stage: test
@@ -206,3 +196,4 @@ micro-child:
   trigger:
     include:
       - 'micro/.gitlab-ci.yml'
+    strategy: depend
diff --git a/.gitmodules b/.gitmodules
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..410fef8a41eb862c1d10be82b195eb35cde1ac18 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -0,0 +1,13 @@
+[submodule "micro/third_party/CMSIS_5"]
+	path = micro/third_party/CMSIS_5
+	url = https://github.com/ARM-software/CMSIS_5.git
+	shallow = true
+[submodule "micro/third_party/googletest"]
+	path = micro/third_party/googletest
+	url = https://github.com/google/googletest.git
+	shallow = true
+[submodule "micro/third_party/gflags"]
+	path = micro/third_party/gflags
+	url = https://github.com/gflags/gflags.git
+	shallow = true
+
diff --git a/docker/mace-micro-dev/mace-micro-dev.dockerfile b/docker/mace-micro-dev/mace-micro-dev.dockerfile
new file mode 100644
index 0000000000000000000000000000000000000000..abc1f132fbce20ccf42f2ee9e8eaf5ec38dfb4e3
--- /dev/null
+++ b/docker/mace-micro-dev/mace-micro-dev.dockerfile
@@ -0,0 +1,15 @@
+FROM ubuntu:18.04
+
+RUN apt-get update
+RUN apt-get install -y wget
+RUN apt-get install -y g++ gcc
+RUN apt-get install -y gcc-arm-none-eabi
+RUN apt-get install -y python3 python3-pip git mercurial
+
+RUN wget https://cdn.cnbj1.fds.api.mi-img.com/mace/third-party/cmake-3.18.3-Linux-x86_64.sh
+RUN chmod +x cmake-3.18.3-Linux-x86_64.sh && ./cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=/usr
+
+RUN python3 -m pip install -U pip
+RUN python3 -m pip install jinja2 pyyaml sh numpy six filelock
+RUN python3 -m pip install tensorflow==2.3.0 tensorflow_model_optimization
+RUN python3 -m pip install mbed-cli
diff --git a/docs/micro-controllers/basic_usage.rst b/docs/micro-controllers/basic_usage.rst
index a1228ecced3696772b39f4d97d51077b8f203a86..853a3fc6e9396eea39ac42761126c2cdc8580de2 100644
--- a/docs/micro-controllers/basic_usage.rst
+++ b/docs/micro-controllers/basic_usage.rst
@@ -1,128 +1,217 @@
 Basic usage for Micro Controllers
 ==================================
 
+MACE Micro is a lightweight neural network inference engine for MCUs and low-power DSPs.
+At now we support Cortex-M MCUs and Qualcomm Hexagon DSPs. You can get our projects from GitHub.
 
-Build and run an example model
--------------------------------
+Get MACE Micro Projects
+-----------------------
 
-At first, make sure the environment has been set up correctly already (refer to :doc:`../installation/env_requirement`).
+MACE Micro is a sub project of MACE, so you can get it from MACE.
 
-The followings are instructions about how to quickly build and run a provided model in
-`MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__.
+.. code-block:: sh
 
-Here we use the har-cnn model as an example.
+    git clone https://github.com/XiaoMi/mace.git
+    # Inits submodules by yourself
+    cd mace && git submodule update --init micro && cd ..
 
-**Commands**
+Environment Requirements
+------------------------
 
-    1. Pull `MACE <https://github.com/XiaoMi/mace>`__ project.
+On a ubuntu18.04/20.04 PC, do the following steps.
 
-    .. code-block:: sh
+.. code-block:: sh
 
-        git clone https://github.com/XiaoMi/mace.git
-        cd mace/
-        git fetch --all --tags --prune
+    apt-get update
+    apt-get install -y wget
 
-        # Checkout the latest tag (i.e. release version)
-        tag_name=`git describe --abbrev=0 --tags`
-        git checkout tags/${tag_name}
+    apt-get install -y g++
+    # Required for Cortex-M MCUs
+    apt-get install -y gcc-arm-none-eabi
+    apt-get install -y python3 python3-pip
 
-    .. note::
+    python3 -m pip install jinja2 pyyaml sh numpy six filelock
+    # Installs cmake above 3.13.0
+    wget https://cdn.cnbj1.fds.api.mi-img.com/mace/third-party/cmake-3.18.3-Linux-x86_64.sh
+    chmod +x cmake-3.18.3-Linux-x86_64.sh && ./cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=/usr
 
-        It's highly recommended to use a release version instead of master branch.
+    python3 -m pip install -U pip
+    # The Tensorflow version depends on your model
+    # The Tensroflow 1.x frozen model and Tensorflow 2.x Keras model are both supported
+    python3 -m pip install tensorflow==2.3.0
+    python3 -m pip install tensorflow_model_optimization
 
+You also can use a docker as the environment.
 
-    2. Pull `MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__ project.
+.. code-block:: sh
 
-    .. code-block:: sh
+    cd mace/docker/mace-micro-dev
+    docker build . -f mace-micro-dev.dockerfile --tag mace-micro-dev
+    cd ../../..
+    # Maps your workspace to docker container
+    docker run -ti -v $(pwd):/workspace/ -w /workspace  mace-micro-dev
 
-        git clone https://github.com/XiaoMi/mace-models.git
 
+Convert a model to c++ code
+----------------------------
 
-    3. Convert the pre-trained har-cnn model to c++ code.
+Here we use a pre-trained model of the MNIST database,
 
-    .. code-block:: sh
+.. code-block:: sh
 
-        cd path/to/mace
-        # output lib path: build/har-cnn/model/har_cnn_micro.tar.gz
-        CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml
-        python tools/python/convert.py --config=$CONF_FILE --enable_micro
+    cd mace
+    # Converts a tensorflow 2.x keras model, you need install python3 and tensorflow==2.x additional
+    python3 tools/python/convert.py --config=micro/pretrained_models/keras/mnist/mnist.yml --enable_micro
 
 
-    4. Build Micro-Controllers engine and models to library on host.
+Model config file
+-----------------
 
-    .. code-block:: sh
+The following is a completed model config file,
 
-        cd micro
-        ./tools/cmake/cmake-build-host.sh
+.. code-block:: sh
 
-    .. note::
+    library_name: har
+    target_abis: [host]
+    model_graph_format: file
+    model_data_format: file
+    models:
+    har_int8:
+        platform: keras
+        model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5
+        model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd
+        subgraphs:
+        - input_tensors:
+            - "conv2d_1_input:0"
+            input_shapes:
+            - 1, 90, 3, 1
+            input_ranges:
+            - -5, 15
+            output_tensors:
+            - "dense_3/Softmax:0"
+            output_shapes:
+            - "1, 6"
+        runtime: cpu
+        data_type: fp32_fp32
+        limit_opencl_kernel_time: 0
+        nnlib_graph_mode: 0
+        obfuscate: 0
+        winograd: 0
+        quantize: 1
+        quantize_schema: int8
+        quantize_range_file: /workspace/mace/micro/pretrained_models/keras/har/har.range
 
-        - The build result ``build/cmake-build/host/libmicro.a``'s abi is host, if you want to run the model on micro controllers, you should build the code with the right toolchain, for example
+For the bfloat16 model,
 
-    .. code-block:: sh
-        cd micro
-        export HEXAGON_SDK_ROOT=/home/user/Qualcomm/Hexagon_SDK/3.4.1
-        export HEXAGON_TOOLS=/home/user/Qualcomm/HEXAGON_Tools/6.4.06
-        ./tools/cmake/cmake-build-hexagon6.sh
+.. code-block:: yaml
 
-    5. Run the model on host.
+    data_type: bf16_fp32
 
-    .. code-block:: sh
+For the int8 model,
 
-        CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml
-        # Run
-        python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build
+.. code-block:: yaml
 
-    	# Test model run time
-        python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --round=100
+    quantize: 1
+    quantize_schema: int8
+    # Required when your model has not quantize info
+    quantize_range_file: range_file_path
 
-    	# Validate the correctness by comparing the results against the
-    	# original model and framework, measured with cosine distance for similarity.
-    	python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate
-        # Validate the layers' correctness.
-        python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate --layers 0:-1
 
 
+Build MACE Micro and models libraries
+--------------------------------------
 
-Deploy your model into applications
+Here, we build the MACE Micro engine and models to libraries on a linux host machine. The CMake build parameters depends on your model config file.
+
+For float32 model,
+
+.. code-block:: sh
+
+    ./micro/tools/cmake/cmake-build-host.sh
+
+For bfloat16 model,
+
+.. code-block:: sh
+
+    ./micro/tools/cmake/cmake-build-host.sh -DMACE_MICRO_ENABLE_BFLOAT16=ON
+
+.. note::
+
+    You can only use either float32 or bfloat16
+
+For int8 model,
+
+.. code-block:: sh
+
+    ./micro/tools/cmake/cmake-build-host.sh -DMACE_MICRO_ENABLE_CMSIS=ON
+
+Use libraries directly
+-----------------------
+
+With these steps, we can find necessary libraries and headers in the "build/micro/host/install" directory, you can use the libraries directly.
+
+.. code-block:: sh
+
+    # Builds example
+    g++ micro/examples/classifier/main.cc -DMICRO_MODEL_NAME=mnist -DMICRO_DATA_NAME=mnist  -I build/micro/host/install/include/ -L build/micro/host/install/lib/ -lmicro  -lmodels -lmicro -o mnist
+    # Runs the mnist example
+    ./mnist
+
+
+Code example
 ------------------------------------
 
-Please refer to \ ``/mace/micro/tools/micro_run.cc`` for full usage. The following list the key steps.
+The following code is the mnist example source files, which the main steps is annotated
 
 .. code-block:: cpp
 
-    // Include the headers
-    #include "micro/include/public/micro.h"
-
-    // 1. Create MaceMicroEngine instance
-    MaceMicroEngine *micro_engine = nullptr;
-    MaceStatus status = har_cnn::GetMicroEngineSingleton(&micro_engine);
-
-    // 1. Create and register Input buffers
-    std::vector<std::shared_ptr<char>> inputs;
-    std::vector<int32_t> input_sizes;
-    for (size_t i = 0; i < input_shapes.size(); ++i) {
-      input_sizes.push_back(std::accumulate(input_shapes[i].begin(),
-                                            input_shapes[i].end(), sizeof(float),
-                                            std::multiplies<int32_t>()));
-      inputs.push_back(std::shared_ptr<char>(new char[input_sizes[i]],
-                                             std::default_delete<char[]>()));
-    }
-    // TODO: fill data into input buffers
-    for (size_t i = 0; i < input_names.size(); ++i) {
-      micro_engine->RegisterInputData(i, inputs[i].get(),
-                                      input_shapes[i].data());
+    #include "data/mnist.h"
+
+    #include <cstdio>
+
+    // Include MACE Micro header
+    #include "micro.h"
+
+    namespace micro {
+    namespace minst {
+
+    // We use forward declaration to avoid include the special engine header
+    MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
+
     }
+    }  // namespace micro
 
-    // 3. Run the model
-    MaceStatus status = micro_engine->Run();
+    int main() {
+      // Step 1, get the mnist micro engine
+      micro::MaceMicroEngine *micro_engine = NULL;
+      micro::MaceStatus status =
+          micro::mnist::GetMicroEngineSingleton(&micro_engine);
 
-    // 4. Get the results
-    for (size_t i = 0; i < output_names.size(); ++i) {
-      void *output_buffer = nullptr;
-      const int32_t *output_dims = nullptr;
+      // Step 2, set input data
+      static float *input_data = data_mnist_4;
+      int32_t input_dims[4] = {1, 28, 28, 1};
+      micro_engine->RegisterInputData(0, input_data, input_dims);
+
+      // Step3, run the inference
+      micro_engine->Run();
+
+      // Step 4, get output data
+      float *output_buffer = NULL;
+      const int32_t *output_dims = NULL;
       uint32_t dim_size = 0;
-      MaceStatus status =
-          micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size);
-      // TODO: the result data is in output_buffer, you can not delete output_buffer.
+      micro_engine->GetOutputData(
+          0, reinterpret_cast<void **>(&output_buffer), &output_dims, &dim_size);
+
+      for (int32_t i = 0; i < output_dims[1]; ++i) {
+        printf("%d: %f\n", i, output_buffer[i]);
+      }
+
+      return 0;
     }
+
+For more examples, goto the directory "micro/examples"
+
+Performance
+-----------
+
+We deploy a `HAR-CNN <https://github.com/Shahnawax/HAR-CNN-Keras>`__ int8 model on the NUCLEO-F767ZI(Cortex-M7) board. Each inference of HAR CNN model takes 12 ms.
\ No newline at end of file
diff --git a/docs/micro-controllers/deploy.rst b/docs/micro-controllers/deploy.rst
new file mode 100644
index 0000000000000000000000000000000000000000..fa480beb472fc0872cd80bb047ec7c4b5b7556dd
--- /dev/null
+++ b/docs/micro-controllers/deploy.rst
@@ -0,0 +1,48 @@
+Deploy
+======
+
+MACE Micro module is written in c++98 and only depends on <cmath>.
+We can write a CMake toolchain file to build the program for the special platform.
+
+For Cortex-M MCU
+----------------
+
+Now we deploy the MNIST classifier example on a NUCLEO-F767ZI development with the Mbed OS.
+Install a GCC Arm Embedded compiler by the terminal.
+
+.. code-block:: sh
+
+    # Installs gcc arm
+    sudo apt-get install gcc-arm-none-eabi
+
+Refer to <https://os.mbed.com/docs/mbed-os/v6.3/build-tools/install-and-set-up.html/> to install Mbed OS tools.
+
+Now we can convert the model and build the program,
+
+.. code-block:: sh
+
+    python3 tools/python/convert.py --config=micro/pretrained_models/keras/mnist/mnist-int8.yml --enable_micro
+    ./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh  -DARM_CPU=cortex-m7 -DMACE_MICRO_ENABLE_CMSIS=ON -DMACE_MICRO_ENABLE_HARDFP=OFF
+
+The "-DARM_CPU=cortex-{m7|m4|..}" is a necessary CMake variable for different series of Arm MCUs.
+You can use the Mace Micro install package("build/micro/gcc-arm-none-eabi/install") in yourself project. Here we use "mbed-cli" to compile it
+
+.. code-block:: sh
+
+    # cp the MACE Micro libraries to the workspace directory
+    cp build/micro/gcc-arm-none-eabi/install micro/examples/classifier -r
+    cd micro/examples/classifier
+    # Compile the program
+    mbed compile -t GCC_ARM -m NUCLEO_F767ZI -D MICRO_MODEL_NAME=mnist_int8 -D MICRO_DATA_NAME=mnist
+    # Flash the program to the development board
+    cp BUILD/NUCLEO_F767ZI/GCC_ARM/classifier.bin  /media/$USER/NODE_F767ZI
+    # Connet to the default COM port
+    sudo chown $USER:$USER  /dev/ttyACM0
+    mbed sterm
+
+Press the reset(black) button to run the example again.
+
+For Hexagon DSP
+---------------
+
+In the micro/cmake/toolchain folder, there are two hexagon CMake toolchain files for reference, For more details, please goto <https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor/>
\ No newline at end of file
diff --git a/docs/micro-controllers/op_lists.rst b/docs/micro-controllers/op_lists.rst
new file mode 100644
index 0000000000000000000000000000000000000000..e95be8aee87902bc85edbd731e131d4e5c8bba2c
--- /dev/null
+++ b/docs/micro-controllers/op_lists.rst
@@ -0,0 +1,34 @@
+Operator lists
+===============
+
+Float32 and bfloat16 operators
+
+* batch_norm
+* conv_2d
+* depthwise_conv_2d
+* pooling
+* activation
+* argmax
+* bias_add
+* cast
+* concat
+* eltwise
+* expand_dims
+* matmul
+* reduce
+* reshape
+* softmax
+* squeeze
+* stack
+* stride_slice
+
+Int8 operators
+
+* conv_2d
+* depthwsie_conv_2d
+* eltwise
+* mat_mul
+* pooling
+* softmax
+* quantize
+* dequantize
diff --git a/mace/proto/CMakeLists.txt b/mace/proto/CMakeLists.txt
index 1fc025ee07ef9f7944e43e82800b3776c3a7870a..a2e70afc6323329564c18e1e787801ead67953d0 100644
--- a/mace/proto/CMakeLists.txt
+++ b/mace/proto/CMakeLists.txt
@@ -1,39 +1,42 @@
-set(MACE_PROTO_PROTOS mace.proto)
-set(MACE_PROTO_SRCS)
-set(MACE_PROTO_HDRS)
-set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto)
-
-foreach(proto_file ${MACE_PROTO_PROTOS})
+macro(generate_proto proto_file)
   get_filename_component(proto_file_abs ${proto_file} ABSOLUTE)
   get_filename_component(basename ${proto_file} NAME_WE)
-  set(PROTO_GENERATED_FILES ${basename}.pb.cc ${basename}.pb.h)
 
-  list(APPEND MACE_PROTO_SRCS ${basename}.pb.cc)
-  list(APPEND MACE_PROTO_HDRS ${basename}.pb.h)
+  set(${basename}_proto_files ${basename}.pb.cc ${basename}.pb.h)
+  set(${basename}_proto_srcs  ${basename}.pb.cc)
 
   add_custom_command(
-      OUTPUT ${PROTO_GENERATED_FILES}
+      OUTPUT ${basename}_proto_files
       COMMAND ${PROTOC_BIN} --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
-      COMMENT "Generating ${PROTO_GENERATED_FILES} from ${proto_file}"
+      COMMENT "Generating ${basename}_proto_files from ${proto_file}"
       DEPENDS protoc_bin
       VERBATIM
   )
 
-  set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py)
+  set(PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto)
+  set(PROTO_GENERATED_PY_FILES ${PROTO_PYTHON_DIR}/${basename}_pb2.py)
   add_custom_command(
       OUTPUT ${PROTO_GENERATED_PY_FILES}
-      COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
+      COMMAND ${PROTOC_BIN} --python_out ${PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
       COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}"
       DEPENDS protoc_bin
       VERBATIM
   )
-endforeach()
 
-add_custom_target(mace_proto_src DEPENDS ${PROTO_GENERATED_FILES}
+  add_custom_target(${basename}_proto_cpp DEPENDS ${basename}_proto_files
         COMMENT "Checking if re-generation is required")
-add_custom_target(mace_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES})
+  add_custom_target(${basename}_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES})
+endmacro()
+
+generate_proto(mace.proto)
+generate_proto(micro_mem.proto)
 
-add_library(proto ${MACE_PROTO_SRCS})
+add_library(proto ${mace_proto_srcs})
+add_dependencies(proto mace_proto_cpp)
+set_source_files_properties(
+    ${mace_proto_srcs}
+    PROPERTIES GENERATED TRUE
+)
 target_link_libraries(proto libprotobuf_lite)
 
 install(TARGETS proto ARCHIVE DESTINATION lib)
diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto
index 0997046095be6325f70a70f4626f97ba32e81857..ca031951884529e9cc1b2cde22a841591ae8cb54 100644
--- a/mace/proto/mace.proto
+++ b/mace/proto/mace.proto
@@ -16,6 +16,7 @@ enum DataType {
   DT_FLOAT16 = 5;
   DT_BFLOAT16 = 6;
   DT_INT16 = 7;
+  DT_INT8 = 8;
 }
 
 enum MemoryType {
diff --git a/micro/.gitignore b/micro/.gitignore
deleted file mode 100644
index 7f1bd1122eeab9d7ebc6363eb1473b28b1088823..0000000000000000000000000000000000000000
--- a/micro/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-build
-test/**/codegen
diff --git a/micro/.gitlab-ci.yml b/micro/.gitlab-ci.yml
index 6f244d5db207a4b47126b5d75834b7ed3406db68..2bd826c14c82e92cab84509bf59fa6b2da92f8b2 100644
--- a/micro/.gitlab-ci.yml
+++ b/micro/.gitlab-ci.yml
@@ -1,3 +1,8 @@
+before_script:
+  - git submodule deinit -f .
+  - git submodule sync
+  - git submodule update --init .
+
 stages:
   - convert
   - build
@@ -6,23 +11,18 @@ stages:
 model-convert:
   stage: convert
   script:
-    - rm -rf mace-models
-    - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
-    - >
-    - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
-    - python tools/python/convert.py --config=${CONF_FILE} --enable_micro
+    - bash micro/tools/ci/model_convert.sh
   artifacts:
     paths:
       - mace-models
     untracked: true
 
-host-build:
+cross-build:
   stage: build
   script:
-    - cd micro && ./tools/cmake/cmake-build-host.sh -DMICRO_MODEL_NAME=har_cnn
+    - bash micro/tools/ci/cross_build.sh
+    - bash micro/tools/ci/host_build_and_run_examples.sh
+    - bash micro/tools/ci/host_build_and_run_tests.sh
+    # The mbed-cli protobuf version conflicts with others
+    # - bash micro/tools/ci/build_mbed_example.sh
 
-host-test:
-  stage: test
-  script:
-    - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
-    - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
diff --git a/micro/CMakeLists.txt b/micro/CMakeLists.txt
index 72f2b509930a0f22fa0a78f30a6a5b361af23171..1914b98f12b726fab8a0010e58e028e7d54eb601 100644
--- a/micro/CMakeLists.txt
+++ b/micro/CMakeLists.txt
@@ -1,8 +1,48 @@
-cmake_minimum_required(VERSION 3.7 FATAL_ERROR)
-message("CMAKE_VERSION: ${CMAKE_VERSION}")
-project(micro C CXX)
+cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
 
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
+project(micro C CXX ASM)
+
+# CMSIS_5 requires C99
+set(CMAKE_C_STANDARD 99)
+
+add_compile_options("-Wall;-Wextra")
+
+option(MACE_MICRO_ENABLE_CMSIS "Whether to enable cmsis driver" OFF)
+option(MACE_MICRO_ENABLE_BFLOAT16    "Whether to enable bfloat16 support"         OFF)
+option(MACE_MICRO_ENABLE_TESTS "Whether to enable Mace Micro tests"         OFF)
+option(MACE_MICRO_ENABLE_EXAMPLES "Whether to enable Mace Micro examples"         OFF)
+
+if(MACE_MICRO_GCC_ARM)
+  include(cmake/config_gcc_arm.cmake)
+endif()
+
+#set CMAKE_BUILD_TYPE default value as Release
+if(NOT CMAKE_BUILD_TYPE)
+  set(CMAKE_BUILD_TYPE "Release"
+        CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
+        FORCE)
+endif()
+
+if(MACE_MICRO_ENABLE_CMSIS)
+  function(compilerSpecificCompileOptions PROJECTNAME ROOT)
+    target_compile_options(${PROJECTNAME}
+      PRIVATE "-Wno-unused-parameter"
+      PRIVATE "-Wno-sign-compare"
+      PRIVATE "-Wno-strict-aliasing"
+      PRIVATE "-Wno-unused-variable"
+    )
+  endfunction()
+  set(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/third_party/CMSIS_5)
+
+  include_directories(${ROOT}/CMSIS/Core/Include)
+
+  add_subdirectory(${ROOT}/CMSIS/DSP/Source EXCLUDE_FROM_ALL)
+  add_subdirectory(${ROOT}/CMSIS/NN/Source  EXCLUDE_FROM_ALL)
+
+  target_include_directories(CMSISDSP INTERFACE ${ROOT}/CMSIS/Core/Include)
+  target_include_directories(CMSISNN INTERFACE ${ROOT}/CMSIS/Core/Include)
+  include_directories(third_party/CMSIS_5/CMSIS/Core/Include)
+endif()
 
 if(HEXAGON6)
   # Does not work with "-O3"
@@ -13,10 +53,7 @@ if(MACE_MICRO_ARM_NONE)
   add_definitions(-DMACE_MICRO_ARM_NONE)
 endif()
 
-option(MACE_ENABLE_BFLOAT16    "Whether to enable bfloat16 support"         OFF)
-option(MACE_MICRO_ENABLE_TESTS "Whether to enable Mace Micro tests"         ON)
-
-if(MACE_ENABLE_BFLOAT16)
+if(MACE_MICRO_ENABLE_BFLOAT16)
   add_definitions(-DMACE_ENABLE_BFLOAT16)
 endif()
 
@@ -24,15 +61,12 @@ if(MACE_MICRO_NDEBUG)
   add_definitions(-DMACE_MICRO_NDEBUG)
 endif()
 
-include(third_party/third_party.cmake)
-
 add_subdirectory(include)
 add_subdirectory(port)
 add_subdirectory(base)
 add_subdirectory(model)
 add_subdirectory(framework)
 add_subdirectory(ops)
-add_subdirectory(codegen)
 
 file(GLOB micro_base_srcs base/*.cc)
 file(GLOB micro_codegen_models_srcs codegen/models/**/*.cc)
@@ -41,15 +75,13 @@ file(GLOB micro_framework_srcs framework/*.cc)
 file(GLOB micro_models_srcs model/*.cc)
 file(GLOB micro_ops_nhwc_base_srcs ops/nhwc/base/*.cc)
 file(GLOB micro_ops_nhwc_srcs ops/nhwc/*.cc)
+file(GLOB micro_ops_nhwc_cmsis_nn_srcs ops/nhwc/cmsis_nn/*.cc)
 file(GLOB micro_ops_srcs ops/*.cc)
 file(GLOB micro_ops_utils_srcs ops/utils/*.cc)
 file(GLOB micro_port_srcs port/*.cc)
 
-# To build a single library
-add_library(micro
+list(APPEND micro_src
   ${micro_base_srcs}
-  ${micro_codegen_models_srcs}
-  ${micro_codegen_engines_srcs}
   ${micro_framework_srcs}
   ${micro_models_srcs}
   ${micro_ops_srcs}
@@ -58,22 +90,56 @@ add_library(micro
   ${micro_ops_utils_srcs}
   ${micro_port_srcs}
 )
-target_include_directories(micro PUBLIC ..)
+
+if(MACE_MICRO_ENABLE_CMSIS)
+  list(APPEND micro_src ${micro_ops_nhwc_cmsis_nn_srcs})
+endif()
+
+add_library(micro ${micro_src})
+target_include_directories(micro PUBLIC .. PUBLIC include/public)
 
 install(TARGETS micro
   ARCHIVE DESTINATION lib
   LIBRARY DESTINATION lib
   RUNTIME DESTINATION bin
 )
+install(FILES include/public/micro.h DESTINATION include)
+
+if(MACE_MICRO_ENABLE_CMSIS)
+  target_link_libraries(micro PRIVATE CMSISNN)
+
+  install(TARGETS
+    CMSISNNReshape
+    CMSISNNBasicMaths
+    CMSISNNConcatenation
+    CMSISNNFullyConnected
+    CMSISNNConvolutions
+    CMSISNNActivation
+    CMSISNNPooling
+    CMSISNNSoftmax
+    CMSISNNSupport
+    ARCHIVE DESTINATION lib
+    LIBRARY DESTINATION lib
+    RUNTIME DESTINATION bin
+  )
+endif()
+
+add_subdirectory(codegen)
 
 if(HEXAGON OR HEXAGON_STUB)
   include(cmake/find_hexagon_sdk.cmake)
 endif()
 
-if(NOT HEXAGON)
+if(NOT HEXAGON AND MICRO_MODEL_NAME)
+  add_subdirectory(third_party/gflags EXCLUDE_FROM_ALL)
   add_subdirectory(tools)
 endif()
 
 if(MACE_MICRO_ENABLE_TESTS)
+  add_subdirectory(third_party/googletest EXCLUDE_FROM_ALL)
   add_subdirectory(test)
-endif(MACE_MICRO_ENABLE_TESTS)
+endif()
+
+if(MACE_MICRO_ENABLE_EXAMPLES)
+  add_subdirectory(examples)
+endif()
diff --git a/micro/base/logger.cc b/micro/base/logger.cc
index 4663e5741ef4458051e4e4260784aaf20cd8c319..005fe7c592694ef18ebe0f54b7b9f9ea79090b5e 100644
--- a/micro/base/logger.cc
+++ b/micro/base/logger.cc
@@ -30,7 +30,7 @@ const int32_t kInt8ValueBufferLength = 4;
 const int32_t kFloatValueBufferLength = 21;
 
 inline bool IsValidLogLevel(const LogLevel level) {
-  return level >= CLEAN && level < INVALID_MAX;
+  return level < INVALID_MAX;
 }
 
 char LogLevelToShortStr(LogLevel level) {
diff --git a/micro/base/serialize_type.h b/micro/base/serialize_type.h
index 3d26742856d2e54e0b4c22345984f9aeda47ca2d..258b27047895553f0b776d3f1fa687a8293ae479 100644
--- a/micro/base/serialize_type.h
+++ b/micro/base/serialize_type.h
@@ -18,6 +18,7 @@
 #include <stdint.h>
 
 #include "micro/include/public/micro.h"
+#include "micro/include/port/define.h"
 
 namespace micro {
 
diff --git a/micro/base/types.h b/micro/base/types.h
index 6de264b4119e0ac68080a11df129d17f7b04a364..0f018d6a7348129e02a11bce31423a44540dcfa8 100644
--- a/micro/base/types.h
+++ b/micro/base/types.h
@@ -52,6 +52,35 @@ MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32);
 MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16);
 #endif
 
+struct QuantizeInfo {
+  float scale;
+  int32_t zero;
+};
+
+namespace ops {
+namespace eltwise {  // for redefine
+
+enum Type {
+  SUM = 0,
+  SUB = 1,
+  PROD = 2,
+  DIV = 3,
+  MIN = 4,
+  MAX = 5,
+  NEG = 6,
+  ABS = 7,
+  SQR_DIFF = 8,
+  POW = 9,
+  EQUAL = 10,
+  FLOOR_DIV = 11,
+  CLIP = 12,
+  SIGN = 13,
+  NONE = 14,
+};
+
+}  // namespace eltwise
+}  // namespace ops
+
 }  // namespace micro
 
 #endif  // MICRO_BASE_TYPES_H_
diff --git a/micro/base/utils.cc b/micro/base/utils.cc
index 305e461f2411555063d0924fb185e1f3f2c6fcd4..5f8637da11fd1dbdb12593ba2767993c9e386b65 100644
--- a/micro/base/utils.cc
+++ b/micro/base/utils.cc
@@ -105,5 +105,25 @@ float log(float x) {
   return ::log(x);
 }
 
+
+template <typename T>
+const T &max(const T &a, const T &b) {
+  return (a < b) ? b : a;
+}
+
+template <typename T>
+const T &min(const T &a, const T &b) {
+  return (a < b) ? a : b;
+}
+
+bool ShapeIsEqual(const int32_t *dims0,
+                  const int32_t *dims1, uint32_t dim_size) {
+  while (dim_size-- > 0) {
+    if (dims0[dim_size] != dims1[dim_size])
+      return false;
+  }
+  return true;
+}
+
 }  // namespace base
 }  // namespace micro
diff --git a/micro/base/utils.h b/micro/base/utils.h
index 56eb955ebd7670e888527325e0bd5a142a0ade8f..d47394c2de901171f89bacaaa283855043861c64 100644
--- a/micro/base/utils.h
+++ b/micro/base/utils.h
@@ -26,6 +26,8 @@ uint32_t strlen(const char *str);
 int32_t strcmp(const char *str1, const char *str2);
 void memcpy(void *dst, const void *src, uint32_t bytes);
 int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims);
+bool ShapeIsEqual(const int32_t *dims0,
+                  const int32_t *dims1, uint32_t dim_size);
 float sqrt(float x);
 int32_t ceil(float f);
 int32_t floor(float f);
diff --git a/micro/cmake/config_gcc_arm.cmake b/micro/cmake/config_gcc_arm.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..ef626103eb7626e29039b494835b00d50137f2c8
--- /dev/null
+++ b/micro/cmake/config_gcc_arm.cmake
@@ -0,0 +1,36 @@
+if(NOT ARM_CPU)
+  message(FATAL_ERROR "please set ARM_CPU, such as: -DARM_CPU=cortex-m4. We set -mcpu=${ARM_CPU}")
+endif()
+
+add_compile_options("-mcpu=${ARM_CPU};-mthumb")
+add_compile_options("-ffunction-sections;-fdata-sections")
+
+# floating-point ABI
+option(MACE_MICRO_ENABLE_HARDFP "Whether to use hard float-point ABI" ON)
+
+if(MACE_MICRO_ENABLE_HARDFP)
+  add_compile_options("-mfloat-abi=hard")
+else()
+  add_compile_options("-mfloat-abi=softfp")
+endif()
+
+# FPU
+if (ARM_CPU STREQUAL "cortex-m55" )
+  add_compile_options("-mfpu=fpv5-d16")
+  add_link_options("-mfpu=fpv5-d16")
+endif()
+
+if (ARM_CPU STREQUAL "cortex-m33" )
+  add_compile_options("-mfpu=fpv5-sp-d16")
+  add_link_options("-mfpu=fpv5-sp-d16")
+endif()
+
+if (ARM_CPU STREQUAL "cortex-m7" )
+  add_compile_options("-mfpu=fpv5-d16")
+  add_link_options("-mfpu=fpv5-d16")
+endif()
+
+if (ARM_CPU STREQUAL "cortex-m4" )
+  add_compile_options("-mfpu=fpv4-sp-d16")
+  add_link_options("-mfpu=fpv4-sp-d16")
+endif()
diff --git a/micro/cmake/toolchain/gcc-arm-none-eabi.cmake b/micro/cmake/toolchain/gcc-arm-none-eabi.cmake
index a7610ae4f0ffa84340a43e2fffcceb254463bef6..6411be4b89e70be06ed712deafe9ad7f41de0a35 100644
--- a/micro/cmake/toolchain/gcc-arm-none-eabi.cmake
+++ b/micro/cmake/toolchain/gcc-arm-none-eabi.cmake
@@ -1,15 +1,25 @@
-set(CMAKE_SYSTEM_PROCESSOR arm)
-
-set(CMAKE_C_COMPILER "${GCC_ARM_ROOT}/arm-none-eabi-gcc")
-set(CMAKE_CXX_COMPILER "${GCC_ARM_ROOT}/arm-none-eabi-g++")
-set(CMAKE_AR "${GCC_ARM_ROOT}/arm-none-eabi-ar" CACHE FILEPATH "Archiver")
-set(CMAKE_LINKER "${GCC_ARM_ROOT}/arm-none-eabi-ld")
-set(CMAKE_EXE_LINKER_FLAGS "--specs=nosys.specs" CACHE INTERNAL "")
 
-set(MACE_MICRO_ARM_NONE ON)
+set(CMAKE_SYSTEM_NAME Generic)
+set(CMAKE_SYSTEM_PROCESSOR arm)
 
 set(CMAKE_FIND_ROOT_PATH "${GCC_ARM_ROOT}")
 set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
 set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
+
+find_program(CMAKE_C_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe)
+find_program(CMAKE_CXX_COMPILER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe)
+find_program(CMAKE_ASM_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe)
+find_program(CMAKE_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe)
+find_program(CMAKE_CXX_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe)
+find_program(CMAKE_C_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe)
+find_program(CMAKE_LINKER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe)
+
+find_program(ELF2BIN NAMES arm-none-eabi-objcopy arm-none-eabi-objcopy.exe)
+
+# Force compiler settings
+SET(CMAKE_C_COMPILER_WORKS TRUE)
+SET(CMAKE_CXX_COMPILER_WORKS TRUE)
+
+set(MACE_MICRO_GCC_ARM ON)
diff --git a/micro/codegen/CMakeLists.txt b/micro/codegen/CMakeLists.txt
index ee75d1a2d566a28e40fc4a86bf7600f8efdd8bdd..392d222f695664cbd8753ed05410c740c509062c 100644
--- a/micro/codegen/CMakeLists.txt
+++ b/micro/codegen/CMakeLists.txt
@@ -1,29 +1,17 @@
 file(GLOB_RECURSE generated_models_srcs models *.cc)
-add_library(generated_models
-  ${generated_models_srcs}
-)
-target_link_libraries(generated_models
-  micro_framework
-  micro_include
-  micro_model
-  micro_ops
-)
-
 file(GLOB_RECURSE micro_engine_srcs engines micro_engine_factory.cc)
-add_library(micro_engine
-  ${micro_engine_srcs}
-)
-target_link_libraries(micro_engine
-  micro_framework
-  micro_model
-  micro_ops
-  generated_models
-)
-
 file(GLOB_RECURSE micro_engine_c_srcs engines micro_engine_c_interface.cc)
-add_library(micro_engine_c
+
+#  Use ".keep.cc" as a source file when there are no model source files in "models" directory
+add_library(models
+  ${generated_models_srcs}
+  ${micro_engine_srcs}
   ${micro_engine_c_srcs}
 )
-target_link_libraries(micro_engine_c
-  micro_engine
+target_link_libraries(models
+  micro
 )
+
+install(TARGETS models
+  ARCHIVE DESTINATION lib
+)
\ No newline at end of file
diff --git a/micro/codegen/engines/.keep.cc b/micro/codegen/engines/.keep.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/micro/codegen/models/.keep.cc b/micro/codegen/models/.keep.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/micro/examples/CMakeLists.txt b/micro/examples/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c5aff86ea6de7fc2a4e13ab371cc7d6604a46009
--- /dev/null
+++ b/micro/examples/CMakeLists.txt
@@ -0,0 +1 @@
+add_subdirectory(classifier)
\ No newline at end of file
diff --git a/micro/examples/classifier/.gitignore b/micro/examples/classifier/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c6a0d80403faea040637a5e661839d4c5c08368e
--- /dev/null
+++ b/micro/examples/classifier/.gitignore
@@ -0,0 +1,6 @@
+mbed-os
+BUILD
+install
+mbed_app.json
+__pycache__
+mbed_settings.py
\ No newline at end of file
diff --git a/micro/examples/classifier/.mbed b/micro/examples/classifier/.mbed
new file mode 100644
index 0000000000000000000000000000000000000000..9c0edc955256fc417c5f1d340253bbb08ec16e2e
--- /dev/null
+++ b/micro/examples/classifier/.mbed
@@ -0,0 +1,2 @@
+TARGET=NUCLEO_F767ZI
+ROOT=.
diff --git a/micro/examples/classifier/CMakeLists.txt b/micro/examples/classifier/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..c900585d7ad304e18455946b061602aa10b5f694
--- /dev/null
+++ b/micro/examples/classifier/CMakeLists.txt
@@ -0,0 +1,10 @@
+
+if(NOT MICRO_MODEL_NAME OR NOT MICRO_DATA_NAME)
+  message(FATAL_ERROR "MICRO_MODEL_NAME or MICRO_DATA_NAME is undefined")
+endif()
+
+add_executable(${MICRO_MODEL_NAME} main.cc)
+target_compile_options(${MICRO_MODEL_NAME} PRIVATE "-Wno-error")
+target_link_libraries(${MICRO_MODEL_NAME} micro models)
+target_compile_definitions(${MICRO_MODEL_NAME} PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}")
+target_compile_definitions(${MICRO_MODEL_NAME} PRIVATE "-DMICRO_DATA_NAME=${MICRO_DATA_NAME}")
diff --git a/micro/examples/classifier/data.h b/micro/examples/classifier/data.h
new file mode 100644
index 0000000000000000000000000000000000000000..1f4eebcc5902fe13749320e112985dff9257fbb3
--- /dev/null
+++ b/micro/examples/classifier/data.h
@@ -0,0 +1,38 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_EXAMPLES_CLASSIFIER_DATA_H_
+#define MICRO_EXAMPLES_CLASSIFIER_DATA_H_
+
+#include "data/har.h"
+#include "data/kws.h"
+#include "data/mnist.h"
+#include "stdint.h"
+
+namespace mnist {
+const float *input = data_mnist_4;
+const int32_t input_dims[4] = {1, 28, 28, 1};
+}  // namespace mnist
+
+namespace har {
+const float *input = data_har_standing;
+const int32_t input_dims[4] = {1, 90, 3, 1};
+}  // namespace har
+
+namespace kws {
+const float *input = data_kws_yes;
+const int32_t input_dims[4] = {1, 98, 40, 1};
+}  // namespace kws
+
+#endif  // MICRO_EXAMPLES_CLASSIFIER_DATA_H_
diff --git a/micro/examples/classifier/data/har.h b/micro/examples/classifier/data/har.h
new file mode 100644
index 0000000000000000000000000000000000000000..515436cb7df3d612dbd2a1ffbb1f820a97ae4957
--- /dev/null
+++ b/micro/examples/classifier/data/har.h
@@ -0,0 +1,159 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_EXAMPLES_DATA_HAR_H_
+#define MICRO_EXAMPLES_DATA_HAR_H_
+
+static float data_har_jogging[270] = {
+    5.012288,    11.264028,  0.95342433,  -0.6946377,  12.680544,  0.50395286,
+    4.903325,    10.882658,  -0.08172209, -0.61291564, 18.496431,  3.0237172,
+    -1.1849703,  12.108489,  7.205164,    1.3756552,   -2.4925237, -6.510526,
+    -0.61291564, 10.56939,   5.706926,    -0.50395286, 13.947236,  7.0553403,
+    -8.430995,   11.413852,  5.134871,    0.95342433,  1.3756552,  1.6480621,
+    -8.19945,    19.57244,   2.7240696,   1.4165162,   5.7886477,  2.982856,
+    -1.879608,   -2.982856,  -0.29964766, -6.1291566,  6.851035,   -8.158588,
+    5.829509,    18.0061,    8.539958,    6.2789803,   2.982856,   2.9147544,
+    -1.56634,    8.308413,   -1.4573772,  3.5276701,   13.593107,  9.425281,
+    -2.0294318,  -5.706926,  -10.18802,   2.7649305,   10.337844,  -9.724928,
+    3.568531,    13.6748295, 1.5390993,   -0.50395286, 3.8681788,  3.718355,
+    -2.3018389,  1.6889231,  0.08172209,  -3.568531,   19.57244,   6.510526,
+    -0.8036005,  -3.2961242, -4.630918,   0.50395286,  10.841797,  13.525005,
+    5.706926,    15.595298,  6.1700177,   -8.662541,   7.273266,   4.0180025,
+    -1.334794,   1.2258313,  2.3699405,   -4.5900574,  19.57244,   4.7126403,
+    3.8681788,   3.759216,   0.84446156,  -1.7978859,  1.5390993,  8.730643,
+    7.668256,    11.264028,  -1.3075534,  -2.3699405,  14.2877445, 8.281172,
+    2.7240696,   1.4573772,  0.88532263,  -3.5957718,  18.659876,  -0.6537767,
+    3.9499009,   4.140586,   3.990762,    0.46309182,  -2.4108016, 2.4108016,
+    3.7864566,   14.137921,  -3.1463003,  3.336985,    19.231932,  6.5513873,
+    5.6660647,   3.7864566,  0.53119355,  0.23154591,  0.7627395,  0.7627395,
+    -4.8216033,  19.57244,   8.158588,    1.8387469,   -1.1168685, -2.7921712,
+    -3.2961242,  10.079058,  13.824653,   11.604536,   17.079916,  1.334794,
+    -3.173541,   14.015338,  5.706926,    0.61291564,  1.1168685,  2.5606253,
+    -7.8861814,  19.57244,   1.9885708,   3.1463003,   5.243834,   4.671779,
+    -3.0237172,  -4.3312707, -3.336985,   -0.08172209, 11.917805,  -7.8861814,
+    -1.0351465,  14.818938,  4.6036777,   -2.4516625,  2.5333846,  3.486809,
+    -1.3756552,  2.070293,   -0.19068487, -2.4925237,  19.57244,   6.469665,
+    1.4573772,   -5.243834,  -4.372132,   -1.4165162,  9.80665,    5.7477865,
+    -1.2666923,  14.709975,  6.2108784,   -3.6774938,  3.173541,   3.7864566,
+    1.8387469,   2.7649305,  -1.7570249,  -1.2666923,  19.313654,  6.3198414,
+    2.4108016,   -7.6546354, -6.1291566,  -0.61291564, 16.358038,  4.944186,
+    0.040861044, 17.502148,  2.5333846,   -7.6546354,  7.8180795,  4.372132,
+    -1.2666923,  0.7218784,  0.8036005,   -5.012288,   19.57244,   5.5162406,
+    1.9477097,   2.7921712,  2.070293,    -5.053149,   1.6480621,  7.6273947,
+    9.384419,    13.443283,  1.0351465,   -5.434519,   13.211738,  6.4424243,
+    -0.61291564, 1.879608,   1.4165162,   4.7126403,   -6.5513873, -6.0201936,
+    -1.7570249,  9.302697,   -6.428804,   -0.9125633,  10.501288,  -0.27240697,
+    2.6014864,   19.381754,  4.440233,    5.7886477,   3.214402,   1.1441092,
+    -1.9885708,  12.4489975, -2.7240696,  1.4165162,   16.780268,  8.471856,
+    0.42223078,  -8.267551,  -7.3549876,  -3.568531,   10.95076,   -0.8036005,
+    -4.671779,   11.727119,  0.38136974,  -2.1383946,  1.6889231,  3.5276701,
+    -1.334794,   2.4925237,  -0.3405087,  -2.9147544,  19.57244,   7.5865335,
+    3.5276701,   -3.9499009, -1.920469,   -4.0588636,  10.038197,  14.2877445};
+
+
+static float data_har_walking[270] = {
+    -0.99, 11.45, -3.0645783,   1.18,  14.94, -3.718355,
+    1.27,  13.82, -1.2258313,   -0.15, 11.14, -2.1111538,
+    -1.38, 8.05,  -0.84446156,  -1.99, 5.94,  0.14982383,
+    -0.08, 4.94,  0.88532263,   -0.27, 4.14,  2.2609777,
+    -3.26, 6.44,  4.1814466,    -5.75, 13.02, 7.273266,
+    -2.37, 10.65, 8.008764,     -0.46, 15.94, 0.7218784,
+    1.8,   6.13,  -1.1168685,   -4.75, 10.84, -3.0645783,
+    -1.46, 8.39,  0.88532263,   1.33,  7.78,  -0.46309182,
+    -3.72, 8.47,  -0.7218784,   -3.72, 8.47,  -0.7218784,
+    -1.88, 7.63,  -0.08172209,  -1.12, 9.3,   -0.10896278,
+    -2.37, 10.95, -0.8036005,   -4.06, 12.3,  -0.7627395,
+    -3.41, 14.52, -0.7218784,   0.34,  12.22, -3.7864566,
+    0.76,  15.32, -2.6014864,   -0.04, 13.53, -1.1849703,
+    -0.53, 9.72,  -2.1792557,   0.11,  5.52,  -1.6480621,
+    0.38,  4.06,  0.46309182,   0.04,  3.26,  0.14982383,
+    -3.34, 5.83,  4.862464,     -6.05, 13.14, 7.668256,
+    -0.91, 11.14, 11.073342,    -0.5,  16.13, -0.9125633,
+    -0.27, 7.7,   -1.1849703,   -3.45, 9.28,  -2.1383946,
+    -2.03, 9.04,  -0.53119355,  2.03,  6.89,  -0.5720546,
+    -2.18, 7.5,   -1.3756552,   -1.8,  7.21,  -0.0,
+    -1.57, 9.96,  0.08172209,   -3.21, 12.07, -0.14982383,
+    -5.09, 12.22, -0.7627395,   -2.68, 14.98, -3.173541,
+    1.99,  12.79, -3.2961242,   0.84,  14.82, -2.2609777,
+    0.69,  13.21, -2.2609777,   -1.08, 9.15,  -1.2258313,
+    -0.95, 4.9,   -0.7627395,   -0.11, 4.67,  0.19068487,
+    0.61,  3.49,  0.08172209,   -1.84, 5.48,  5.134871,
+    -5.6,  14.06, 7.3958488,    -1.08, 12.03, 8.308413,
+    1.73,  14.56, 2.9147544,    -0.76, 5.94,  -5.325556,
+    -5.6,  12.83, -0.0,         0.04,  6.66,  -0.9942854,
+    1.65,  7.89,  -0.6537767,   -2.3,  7.93,  -2.3426998,
+    -1.92, 8.24,  -0.040861044, -1.42, 9.96,  -0.14982383,
+    -3.72, 11.5,  0.14982383,   -4.59, 12.18, -0.5720546,
+    -2.79, 14.25, -3.2961242,   3.15,  13.02, -3.1054392,
+    1.46,  14.94, -2.2201166,   -2.22, 12.49, -2.1111538,
+    -1.42, 9.53,  -1.607201,    -0.11, 6.17,  -0.8036005,
+    0.34,  4.71,  0.10896278,   1.04,  3.49,  0.53119355,
+    -1.99, 5.05,  3.255263,     -6.66, 14.29, 7.082581,
+    -3.87, 10.04, 9.765789,     -1.5,  18.39, -0.6946377,
+    2.37,  5.01,  -0.5720546,   -5.24, 10.76, -3.173541,
+    -1.46, 8.2,   0.53119355,   2.6,   6.97,  -0.040861044,
+    -3.53, 8.85,  -1.879608,    -1.23, 7.06,  -0.23154591,
+    -1.53, 11.3,  0.23154591,   -2.53, 11.65, -0.6946377,
+    -3.83, 12.34, -0.50395286,  -2.96, 13.25, -3.173541,
+    2.83,  13.25, -3.173541,    0.65,  14.41, -1.1441092,
+    -0.89, 11.8,  -2.6014864,   -1.18, 7.21,  -1.334794};
+
+static float data_har_standing[270] = {
+    3.17,  9.28,  1.1441092,   3.3,   9.23,  1.1168685,
+    3.21,  9.3,   1.1849703,   3.17,  9.28,  1.0760075,
+    3.17,  9.34,  1.1168685,   3.26,  9.28,  1.1168685,
+    3.21,  9.3,   1.1168685,   3.21,  9.23,  1.1168685,
+    3.17,  9.28,  1.1168685,   3.15,  9.28,  1.1849703,
+    3.17,  9.34,  1.1168685,   3.21,  9.28,  1.1849703,
+    3.21,  9.3,   1.0760075,   3.15,  9.34,  1.1168685,
+    3.21,  9.28,  1.0760075,   3.21,  9.34,  1.1441092,
+    3.26,  9.3,   1.1441092,   3.17,  9.34,  1.1168685,
+    3.21,  9.3,   1.1168685,   3.21,  9.28,  1.1168685,
+    3.26,  9.28,  1.1849703,   3.17,  9.3,   1.1168685,
+    3.21,  9.28,  1.1168685,   -1.88, 9.85,  -0.23154591,
+    -0.19, 9.92,  -0.5720546,  -0.61, 10.27, -0.88532263,
+    0.76,  10.57, -1.7570249,  0.42,  9.47,  -1.1168685,
+    0.38,  9.47,  -1.9477097,  -1.04, 10.65, -1.525479,
+    -1.92, 9.51,  -0.5720546,  -1.31, 9.85,  -0.53119355,
+    -0.08, 9.92,  -1.7570249,  1.73,  9.77,  -0.8036005,
+    1.5,   9.92,  -1.4573772,  1.27,  10.5,  -1.879608,
+    0.61,  10.12, -1.9885708,  -0.53, 9.77,  -1.879608,
+    -0.42, 9.62,  -1.6480621,  0.65,  10.42, -2.2201166,
+    0.65,  10.42, -2.2201166,  1.61,  9.38,  -1.8387469,
+    1.61,  9.43,  -1.525479,   1.61,  9.43,  -1.525479,
+    0.95,  10.27, -1.3075534,  0.19,  10.38, -1.1849703,
+    0.31,  9.81,  -1.4165162,  1.12,  9.62,  -1.6889231,
+    1.23,  9.85,  -1.6480621,  1.04,  9.7,   -1.8387469,
+    0.57,  9.89,  -2.0294318,  0.65,  9.96,  -1.9885708,
+    0.95,  9.96,  -1.7570249,  1.42,  10,    -1.7297841,
+    1.69,  9.89,  -1.525479,   1.46,  10,    -1.4165162,
+    0.69,  9.77,  -1.6889231,  0.08,  9.96,  -1.9477097,
+    -0.08, 10.19, -2.1111538,  0.38,  9.72,  -1.9885708,
+    0.93,  10.12, -2.1111538,  1.33,  9.62,  -1.9885708,
+    1.08,  9.85,  -1.9477097,  0.8,   9.77,  -1.7570249,
+    0.69,  10.34, -1.6889231,  0.72,  9.66,  -1.3075534,
+    0.69,  10,    -1.3756552,  0.93,  9.62,  -1.4573772,
+    0.76,  10.12, -1.607201,   0.93,  9.72,  -1.7978859,
+    0.76,  10.23, -1.9885708,  0.76,  9.23,  -1.920469,
+    0.57,  10.34, -2.1383946,  0.99,  9.58,  -1.879608,
+    1.33,  10.04, -1.7978859,  1.61,  9.85,  -1.4165162,
+    0.61,  10.15, -0.88532263, 0.53,  9.58,  -1.4573772,
+    0.15,  10.19, -1.920469,   0.34,  9.85,  -1.334794,
+    0.8,   10.31, -1.7978859,  0.69,  9.53,  -1.9477097,
+    0.8,   9.92,  -1.879608,   0.5,   10.04, -1.1849703,
+    1.12,  9.43,  -1.7978859,  1.31,  10.27, -1.2666923,
+    1.5,   9.77,  -1.607201,   0.46,  10.04, -0.9125633,
+    0.31,  9.85,  -1.0760075,  0.61,  10.19, -1.1849703};
+
+#endif  // MICRO_EXAMPLES_DATA_HAR_H_
diff --git a/micro/examples/classifier/data/kws.h b/micro/examples/classifier/data/kws.h
new file mode 100644
index 0000000000000000000000000000000000000000..c64a3a8b8c657829a41b80a4e94fd9767551f586
--- /dev/null
+++ b/micro/examples/classifier/data/kws.h
@@ -0,0 +1,122 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_EXAMPLES_DATA_KWS_H_
+#define MICRO_EXAMPLES_DATA_KWS_H_
+
+// speech_commands_dataset/google_speech_commands/splitted_data/test/yes/1b4c9b89_nohash_1.wav
+// clang-format off
+static float data_kws_yes[98 * 40] = {
+-44.89, -1.35,  0.99,  0.13, -0.37, -0.9 , -1.43, -0.4 , -0.35, -0.64,  0.33,  0.17, -0.19,  0.23, -0.3 , -0.33, -0.17,  0.34,  0.6 , -0.03, -0.74,  0.39, -0.07, -0.05,  0.03,  0.3 , -0.39, -0.43, -0.33, -0.49, -0.1 , -0.31, -0.73, -0.28, -0.34, -0.4 , -0.54, -0.06,  0.19,  0.24,
+-45.04, -1.31,  0.75,  0.35,  0.07, -0.86, -1.23,  0.39, -0.71, -0.42,  0.64,  0.4 ,  0.45,  0.44,  0.5 ,  0.36, -0.47, -0.63, -0.02,  0.18, -0.44,  0.78,  0.28, -0.13,  0.14,  0.11, -0.14, -0.09, -0.25, -0.08,  0.02,  0.15, -0.22,  0.29, -0.19, -0.39, -0.26, -0.06,  0.09, -0.14,
+-44.73, -0.97,  1.66, -0.16, -0.5 , -1.7 , -1.91,  0.2 , -0.92, -0.64,  0.09,  0.66, -0.36, -0.12,  0.65,  0.01, -0.55, -0.78, -0.7 , -0.45, -0.41, -0.04,  0.83,  0.05,  0.32,  0.16, -0.61, -0.28, -0.87, -0.55, -0.23,  0.26, -0.14,  0.11, -0.49, -0.7 , -0.75, -0.47,  0.22,  0.13,
+-44.85, -0.92,  0.63, -0.89, -1.39, -1.89, -1.43,  0.38,  0.06, -0.01,  0.06,  0.67, -0.31, -0.12,  0.86,  0.32,  0.12, -0.48, -1.72, -1.9 , -1.21, -0.01,  0.57,  0.31,  0.41,  0.34, -0.38, -0.26, -1.1 , -0.02,  0.15,  0.29,  0.43,  0.3 ,  0.05, -0.07, -0.63, -0.01, -0.32, -0.12,
+-44.73, -0.27,  1.15, -1.17, -1.25, -2.07, -1.58,  0.33,  0.73,  0.34, -0.21, -0.14,  0.28,  0.2 , -0.33,  0.72,  0.22, -0.97, -1.08, -1.59, -1.26,  0.45,  0.35,  0.74,  1.27,  1.11, -0.04, -1.31, -1.  ,  0.23,  0.25,  0.73,  0.15,  0.15,  0.13, -0.51, -0.78, -0.11, -0.18,  0.17,
+-44.49, -0.47,  1.83,  0.01, -1.43, -2.24, -2.64, -0.45,  0.95,  0.53, -0.76, -0.85,  0.08,  0.19, -0.75, -0.6 , -0.31, -1.21, -1.72, -1.67, -1.37,  0.55,  0.38,  1.28,  1.35,  0.17, -0.56, -0.84, -1.34, -0.13,  0.42,  1.16, -0.31, -0.02,  0.82, -0.12,  0.31,  0.43, -0.39, -0.05,
+-44.51,  0.27,  1.46, -1.51, -1.65, -1.17, -1.26, -0.55,  0.53,  0.75,  1.09,  0.27,  0.24,  0.72, -0.84, -1.15, -0.93, -1.85, -1.79, -1.52, -1.79, -1.2 , -0.15,  1.32,  0.82,  0.49, -0.39, -0.19, -0.83, -0.39,  0.04,  0.53, -0.17, -0.06,  0.85, -0.21, -0.57, -0.39, -0.35,  0.09,
+-44.21, -0.23,  0.99, -1.6 , -2.04, -1.24, -0.91, -0.59, -0.62,  0.01,  0.88,  0.58,  0.77,  0.55,  0.09, -0.27, -0.54, -0.74, -1.  , -0.99, -0.94, -0.7 ,  0.41,  1.46,  0.97, -0.46, -1.12, -0.44, -0.1 ,  0.15,  0.56,  0.9 , -0.27, -0.46,  0.24, -0.15, -0.37,  0.05,  0.1 ,  0.3 ,
+-43.8 ,  0.31,  1.09, -1.  , -1.23, -0.9 , -0.67, -1.09, -0.6 ,  0.04,  1.09,  0.57,  0.97,  0.78,  0.46, -0.21, -1.13, -0.11, -0.62, -0.52, -1.  , -1.51,  0.4 ,  0.91,  0.47, -0.2 , -0.2 , -0.29, -0.55,  0.48, -0.09,  0.5 , -0.21, -0.16,  0.36, -0.15, -0.07, -0.31, -0.46,  0.26,
+-42.98,  0.38,  1.25, -0.87, -0.74, -0.48, -1.4 , -0.95, -2.06,  0.06,  1.74,  0.45,  1.07,  0.93,  0.6 ,  0.44, -0.81, -0.76, -0.14, -0.37, -0.33, -0.79, -0.08,  0.42,  0.64, -0.42, -0.49, -0.83, -0.97,  0.34,  0.18,  0.18, -0.25,  0.15,  0.36, -0.21, -0.31, -0.02, -0.66,  0.14,
+-43.47, -0.22,  0.98, -0.73, -1.34, -0.63, -1.71, -2.36, -2.27, -0.22,  0.69, -0.45, -0.44, -0.93,  0.31,  0.84, -0.36, -1.56, -1.77, -1.12, -0.7 , -1.69, -0.96,  0.19,  0.22, -0.62, -0.77, -0.48, -0.49,  0.74,  0.34,  0.33, -0.13,  0.19,  0.54, -0.38, -0.09, -0.08, -0.7 , -0.26,
+-40.58,  2.42,  3.86,  1.07, -0.08,  1.06, -0.27, -1.88, -1.96, -0.06,  0.87, -0.88, -1.11, -0.86, -0.72,  0.44, -0.7 , -1.09, -0.63,  0.26,  0.56, -0.48, -0.08,  0.18,  0.81,  0.84, -0.72,  0.08,  0.15,  0.3 , -0.41, -0.13, -0.47, -0.22, -0.66, -0.95,  0.36,  0.47,  0.35, -0.16,
+-30.63,  5.5 ,  7.67,  1.49,  1.11,  0.08, -2.62, -2.58, -1.61,  0.04, -0.34, -2.59, -1.08, -0.79, -1.11,  0.12, -2.28, -0.39, -0.02,  0.43,  0.3 , -0.08, -0.97,  0.45, -0.27,  0.46, -1.06, -0.06, -0.01, -0.16, -0.32,  0.18, -0.06, -0.49,  0.23, -0.46, -0.17,  0.27, -0.26, -0.21,
+-23.7 ,  5.92,  8.26,  1.79, -1.58,  0.32, -3.54, -2.29, -1.89, -1.11, -0.6 , -3.36, -0.73, -0.66, -1.49, -1.37, -2.87, -0.23, -1.15,  0.18,  0.13, -0.27, -0.42,  1.24,  0.26, -0.04, -0.9 , -0.26, -0.4 , -0.34,  0.17, -0.08,  0.14, -0.3 ,  0.14, -0.1 ,  0.5 ,  0.09, -0.15,  0.21,
+-20.41,  5.76,  8.87,  2.16, -3.84,  0.8 , -3.07, -0.3 , -1.21, -1.29,  0.39, -1.81,  0.52, -0.46, -1.18, -1.07, -2.39, -1.82, -1.47, -0.07, -1.03, -0.29,  0.27,  1.35,  0.68,  1.06, -0.28,  0.38, -0.71, -0.51,  0.33,  0.18,  0.62,  0.31, -0.09, -0.13,  0.3 , -0.  ,  0.35,  0.29,
+-16.62,  5.57,  7.72,  3.08, -5.01,  1.33, -3.02,  0.9 , -1.33, -1.08,  1.04, -1.93,  1.08, -0.5 , -1.92, -1.18, -1.59, -1.83, -1.65, -0.77, -2.08, -0.32, -0.21,  1.61,  1.74,  1.41,  0.39,  0.69, -0.43, -0.58, -0.13, -0.56,  0.52, -0.24, -0.31,  0.19,  0.06,  0.01,  0.45,  0.13,
+-14.  ,  4.68,  6.43,  4.07, -6.68,  1.08, -2.65,  0.83, -1.69, -0.75,  1.04, -2.29,  1.32, -0.69, -1.83, -2.7 , -1.66, -1.77, -1.68, -1.04, -2.47, -0.43,  0.68,  2.06,  1.64,  1.44,  0.36,  0.29, -0.74, -0.89, -0.57, -0.68,  0.97, -0.11, -0.31,  0.13, -0.17, -0.03,  0.39,  0.19,
+-12.78,  4.39,  5.95,  4.88, -7.19,  0.37, -2.62,  0.29, -2.01, -0.74, -0.1 , -2.34,  1.3 , -0.49, -1.01, -3.36, -1.41, -1.35, -1.55, -1.82, -2.38, -0.87, -0.02,  1.38,  1.  ,  1.32,  0.36,  0.9 , -0.71, -1.36, -0.61, -0.69,  1.31,  0.41, -0.29,  0.23,  0.34, -0.08,  0.16, -0.08,
+-10.62,  4.2 ,  5.31,  5.65, -7.15,  0.33, -2.05, -0.71, -1.05,  0.26, -0.8 , -1.84,  1.35, -0.42, -0.63, -3.38, -1.35, -1.8 , -1.98, -1.5 , -1.59, -0.8 , -0.3 ,  1.52,  0.94,  1.88,  1.04,  0.93, -0.86, -1.03, -0.47, -0.17,  0.56,  0.36, -0.26,  0.05, -0.04,  0.07,  0.35,  0.46,
+ -8.94,  4.54,  5.63,  7.13, -7.9 , -0.35, -2.27, -1.07, -0.78,  0.02, -0.47, -1.27,  1.6 , -0.48, -1.44, -2.38, -0.83, -1.54, -2.21, -1.45, -0.91, -0.68, -0.79,  1.89,  0.51,  1.84,  0.99,  0.96, -0.37, -1.47, -0.82, -0.12,  1.05,  0.83,  0.09, -0.24, -0.58, -0.02,  0.3 ,  0.48,
+ -6.8 ,  4.53,  3.75,  6.32, -7.55, -0.35, -1.42, -0.74, -1.47, -0.16,  0.54, -0.79,  1.39, -0.75, -1.5 , -2.04, -1.34, -2.2 , -2.79, -2.04, -0.45, -0.65, -0.67,  1.85,  0.89,  2.39,  0.59,  1.09, -0.39, -0.83, -0.86, -0.81,  0.7 ,  0.73,  0.16, -0.25, -0.35, -0.19,  0.44,  0.88,
+ -4.24,  4.77,  2.41,  6.13, -6.42, -1.08, -1.41, -1.1 , -2.56, -0.02,  0.69, -0.61,  1.32, -1.21, -1.47, -1.54, -1.07, -2.19, -2.35, -2.05, -0.76, -0.77, -1.07,  1.56,  1.4 ,  2.26,  0.6 ,  0.68, -0.48, -0.82, -1.11, -0.66,  1.04,  1.01,  0.05, -0.23, -0.35, -0.5 ,  0.76,  0.31,
+ -1.88,  5.5 ,  1.56,  6.08, -6.31, -2.14, -1.35, -0.75, -2.83, -0.15,  1.07, -0.55,  0.9 , -1.4 , -1.82, -1.17, -0.95, -1.98, -1.78, -1.33, -0.67, -0.65, -0.92,  1.67,  0.78,  1.97,  0.54,  0.72, -0.34, -1.04, -1.15, -0.23,  1.02,  1.03,  0.03, -0.23, -0.74, -0.21,  1.14,  0.62,
+ -0.88,  5.98, -0.15,  5.6 , -6.56, -1.91, -0.83,  0.26, -2.67, -0.06,  0.96, -1.09,  0.58, -1.83, -1.23, -0.92, -1.18, -1.99, -2.38, -1.48, -0.99, -0.5 , -0.89,  2.1 ,  0.87,  1.91,  0.32,  0.9 , -0.55, -1.26, -1.4 , -0.49,  1.11,  0.86, -0.06, -0.2 , -0.23,  0.26,  1.12,  0.59,
+ -0.15,  6.05, -0.3 ,  5.39, -6.8 , -2.08, -1.67,  0.4 , -2.63, -0.37,  0.6 , -1.5 ,  0.29, -1.28, -1.11, -1.3 , -1.38, -1.44, -1.92, -1.53, -1.41, -0.55, -1.36,  1.53,  0.76,  1.71,  0.36,  0.84, -0.51, -1.54, -0.65, -0.34,  0.98,  0.93, -0.18, -0.13, -0.61,  0.17,  0.87,  0.58,
+  0.64,  5.24, -0.64,  5.1 , -6.04, -1.78, -1.56,  1.3 , -2.35, -0.45,  0.19, -0.76, -0.23, -1.09, -0.86, -1.3 , -1.3 , -1.55, -1.7 , -2.49, -1.38, -0.5 , -1.44,  1.99,  0.97,  2.6 ,  0.68,  0.77, -0.15, -1.59, -0.94, -0.74,  1.06,  1.44, -0.19, -0.18, -0.69, -0.32,  0.98,  0.59,
+  1.19,  5.41, -0.69,  4.08, -5.65, -1.63, -1.23,  1.41, -2.04, -0.45, -0.05, -1.  , -0.11, -0.79, -1.08, -1.3 , -0.56, -2.12, -1.2 , -2.42, -1.41, -0.43, -1.54,  1.42,  1.14,  2.36,  0.68,  1.09, -0.27, -1.45, -1.05, -0.95,  0.92,  0.96,  0.57,  0.02, -0.77, -0.26,  1.12,  0.38,
+  2.17,  4.88, -1.04,  3.79, -6.26, -2.31, -1.15,  1.32, -1.94, -0.29,  0.76, -0.93, -0.51, -1.1 , -1.28, -1.85, -1.32, -2.07, -1.36, -2.01, -1.47, -0.48, -1.57,  1.11,  0.87,  2.5 ,  0.71,  0.83,  0.28, -1.13, -0.99, -0.49,  0.74,  0.84,  0.02, -0.2 , -1.13,  0.03,  1.01,  0.64,
+  1.98,  4.9 , -1.71,  3.65, -5.56, -2.68, -1.5 ,  1.12, -1.38, -1.03,  0.6 , -1.15, -0.61, -1.13, -1.25, -1.63, -1.6 , -1.8 , -1.47, -2.31, -2.04, -0.5 , -1.35,  0.81,  0.88,  2.1 ,  1.12,  0.92,  0.15, -1.2 , -1.12, -0.66,  0.65,  1.01,  0.06, -0.2 , -1.25, -0.27,  0.92,  0.4 ,
+  2.19,  4.83, -1.07,  2.77, -4.97, -1.63, -1.74,  1.72, -1.67, -0.47,  0.63, -1.42, -0.1 , -1.33, -0.97, -0.66, -1.4 , -1.96, -1.17, -1.85, -1.9 , -1.15, -1.37,  0.94,  1.01,  2.09,  0.95,  1.45,  0.1 , -1.2 , -1.64, -0.77,  1.13,  0.99,  0.45,  0.09, -1.16, -0.32,  0.67,  0.66,
+  2.58,  5.07, -1.55,  2.55, -4.88, -1.43, -2.32,  1.79, -1.93, -0.57,  0.32, -1.2 , -0.34, -1.87, -0.56, -1.15, -1.31, -1.64, -0.93, -2.37, -1.65, -0.76, -1.89,  0.91,  0.57,  2.22,  0.91,  1.65, -0.31, -1.22, -1.38, -0.58,  0.6 ,  1.1 ,  0.72, -0.04, -0.89, -0.26,  0.25,  0.79,
+  3.32,  4.62, -1.6 ,  3.  , -4.52, -1.64, -2.64,  3.01, -2.01, -0.43,  0.46, -1.22,  0.09, -1.76, -0.69, -1.15, -1.29, -1.39, -1.04, -2.32, -1.54, -0.72, -1.85,  1.  ,  0.57,  2.53,  1.08,  1.6 , -0.25, -0.95, -1.45, -0.42,  0.76,  1.14,  0.49,  0.14, -1.17, -0.46,  0.35,  0.85,
+  2.17,  4.74, -2.02,  3.29, -4.59, -1.09, -2.04,  2.6 , -1.93, -1.05, -0.05, -1.57,  0.34, -1.73, -0.59, -0.88, -0.79, -1.49, -1.23, -2.2 , -1.9 , -1.02, -1.75,  1.32,  0.78,  2.53,  1.26,  1.44, -0.35, -1.26, -1.81, -0.49,  0.63,  1.27,  0.95,  0.63, -1.03, -0.83,  0.15,  0.55,
+  2.5 ,  5.01, -2.89,  2.97, -4.66, -1.86, -2.03,  2.68, -2.41, -0.83, -0.19, -1.84,  0.09, -1.78, -0.75, -0.84, -1.12, -1.59, -1.25, -2.08, -2.03, -1.25, -1.51,  0.94,  0.39,  2.29,  0.69,  1.33, -0.28, -1.23, -1.4 , -0.64,  0.65,  1.08,  0.5 ,  0.54, -0.74, -0.55,  0.43,  0.41,
+  1.96,  5.42, -2.5 ,  3.  , -4.28, -1.9 , -2.37,  3.  , -2.23, -0.47, -0.05, -1.68,  0.48, -1.57, -0.35, -0.86, -1.21, -1.26, -1.36, -1.98, -1.9 , -1.3 , -1.44,  0.83,  0.27,  2.21,  0.83,  1.54,  0.18, -1.56, -1.28, -0.52,  0.31,  1.44,  0.63,  0.36, -1.07, -0.45,  0.22,  0.68,
+  0.91,  5.38, -2.65,  3.5 , -4.2 , -1.86, -2.06,  2.66, -2.  , -0.69, -0.14, -2.  ,  0.42, -1.94, -0.44, -0.56, -1.19, -1.42, -1.29, -2.33, -1.49, -1.46, -1.28,  0.72,  0.52,  2.33,  1.39,  1.19, -0.15, -1.89, -1.54, -0.86,  0.83,  1.25,  0.96,  0.16, -0.75, -0.92,  0.6 ,  0.67,
+  1.25,  5.63, -3.01,  3.37, -4.19, -1.91, -2.51,  3.2 , -2.1 , -0.5 ,  0.24, -1.75,  0.7 , -2.27, -0.58, -0.76, -1.67, -1.58, -1.44, -2.11, -1.6 , -1.49, -0.86,  0.75,  0.82,  2.09,  1.5 ,  1.16,  0.43, -1.53, -1.36, -0.76,  0.9 ,  0.73,  0.54, -0.06, -0.67, -0.96,  0.39,  0.7 ,
+  1.07,  5.51, -2.91,  3.37, -4.23, -1.75, -2.61,  3.29, -1.91, -0.4 , -0.05, -1.49,  0.19, -1.5 , -0.79, -0.89, -1.47, -1.43, -1.15, -2.14, -1.65, -0.98, -0.99,  0.56,  1.1 ,  2.07,  1.2 ,  1.17,  0.21, -1.24, -1.72, -0.46,  1.15,  0.89,  0.77, -0.42, -0.71, -0.37,  0.38,  0.87,
+  0.04,  5.62, -2.34,  3.14, -3.98, -1.39, -2.56,  2.89, -1.67, -0.12, -0.36, -1.  , -0.02, -1.47, -0.59, -1.2 , -1.39, -1.31, -0.91, -2.12, -1.51, -0.73, -0.35,  0.15,  1.21,  2.52,  0.94,  1.14, -0.3 , -1.2 , -1.4 , -0.63,  1.07,  1.19,  0.77, -0.44, -0.57, -0.25,  0.32,  1.09,
+ -0.39,  5.5 , -2.03,  3.28, -4.07, -1.63, -2.96,  2.6 , -2.11, -0.14, -0.33, -0.94,  0.45, -1.84, -0.8 , -1.26, -1.25, -1.62, -0.85, -2.  , -1.81, -0.82, -0.46,  0.43,  1.48,  2.59,  0.65,  0.89, -0.7 , -0.97, -1.33, -0.19,  1.27,  0.85,  0.6 , -1.06, -0.68,  0.04,  0.94,  0.7 ,
+ -1.24,  5.17, -2.24,  2.95, -4.22, -2.11, -3.4 ,  3.09, -2.63, -0.24, -0.24, -1.12, -0.14, -2.24, -1.01, -0.96, -1.37, -2.17, -1.25, -1.8 , -2.02, -0.91, -0.49,  0.25,  1.23,  2.12,  0.44,  1.25, -0.76, -1.08, -1.61, -0.18,  1.03,  0.57,  0.15, -0.96, -0.2 , -0.02,  0.77,  0.52,
+ -3.35,  5.05, -2.34,  2.27, -3.37, -2.33, -3.07,  3.44, -2.09, -0.32, -0.52, -1.26,  0.46, -2.71, -1.45, -1.35, -1.97, -2.73, -1.42, -2.33, -1.94, -0.6 , -0.28,  0.55,  1.75,  2.77,  0.53,  1.33, -1.12, -1.23, -1.59, -0.23,  0.68,  0.44, -0.12, -0.9 , -0.27, -0.27,  1.24,  0.53,
+ -3.5 ,  6.71, -1.67,  2.93, -2.73, -2.89, -2.96,  3.07, -1.77, -0.12, -0.86, -0.33,  0.95, -2.41, -1.35, -1.14, -1.6 , -1.74, -0.98, -1.92, -1.45, -0.34,  0.25,  0.47,  1.15,  2.29,  0.37,  1.1 , -0.95, -0.76, -0.94,  0.13,  0.45,  1.02, -0.11, -0.99, -0.49, -0.45,  1.16,  0.74,
+ -5.66,  7.54, -1.41,  3.07, -1.83, -2.61, -2.84,  2.33, -1.98,  0.15, -1.03, -0.41,  0.95, -2.4 , -0.69, -0.76, -1.62, -1.87, -0.9 , -1.47, -1.33, -0.57,  0.7 ,  0.67,  1.35,  2.02,  0.86,  0.38, -1.3 , -0.87, -0.88,  0.37,  0.59,  1.28, -0.11, -0.95, -0.18,  0.3 ,  0.85,  0.53,
+ -7.53,  7.39, -0.67,  3.04, -1.46, -2.83, -3.09,  1.52, -1.42,  0.02, -1.26, -0.99,  0.93, -2.49, -0.16, -0.29, -1.47, -2.18, -1.73, -1.07, -1.36, -0.22,  0.77,  0.43,  1.44,  1.94,  1.16,  0.14, -1.47, -0.99, -0.59,  0.54,  1.07,  1.21, -0.4 , -0.64, -0.01,  0.32,  0.73,  0.15,
+-10.08,  7.08, -0.71,  2.43, -1.03, -2.69, -2.64,  0.03, -1.3 , -0.27, -1.21, -1.01,  1.28, -2.91, -0.93, -0.78, -2.06, -2.51, -2.01, -1.02, -2.41, -0.37,  1.32,  1.43,  2.13,  1.45,  1.08, -0.36, -1.63, -0.95, -0.92,  0.81,  1.11,  1.02, -0.85, -1.14, -0.32,  0.55,  0.41,  0.1 ,
+-11.46,  7.97, -0.12,  2.17, -0.8 , -2.74, -2.68, -0.41, -1.98, -0.41, -1.52, -0.68,  0.81, -2.63, -0.82, -0.73, -2.02, -2.58, -2.03, -0.69, -2.06, -0.19,  0.66,  1.55,  2.28,  0.61,  0.56,  0.07, -1.65, -1.21, -0.27,  0.84,  1.13,  0.58, -1.  , -0.52,  0.06,  0.01,  0.64,  0.18,
+-14.22,  8.09,  1.04,  2.73, -1.06, -2.99, -2.39, -1.12, -1.26, -0.38, -1.56, -0.65,  0.9 , -1.64, -1.29, -0.95, -1.68, -2.26, -2.29, -0.47, -1.26,  0.06,  0.66,  1.02,  2.14,  0.07,  0.71,  0.29, -1.62, -0.62,  0.12,  1.17,  0.21,  0.02,  0.29, -0.32, -0.22, -0.01,  0.55, -0.24,
+-18.32,  8.04,  1.98,  2.64, -0.5 , -3.72, -3.02, -0.6 ,  0.05, -0.55, -1.85, -0.28,  0.59, -1.54, -1.64, -0.8 , -1.73, -2.41, -2.67, -0.17, -1.2 ,  0.58,  0.99,  0.2 ,  2.5 ,  0.4 ,  0.23,  0.47, -1.37, -0.28, -0.37,  1.  ,  0.3 , -0.58,  0.2 , -0.45,  0.39,  0.18, -0.09, -0.23,
+-18.67,  6.73,  2.71,  2.7 ,  0.38, -3.07, -2.17, -1.72, -0.77, -1.27, -2.74, -0.34,  0.44, -1.43, -1.75, -0.69, -1.45, -1.71, -2.99,  0.29, -0.85,  1.5 ,  1.15, -0.35,  1.58,  0.56, -0.05, -0.29, -0.2 , -0.35, -0.13,  0.1 , -0.1 , -0.71,  0.59, -0.21,  0.04,  0.42, -0.65,  0.38,
+-18.72,  4.44,  3.88,  1.9 ,  1.14, -3.24, -0.78, -1.4 ,  0.31, -0.61, -2.81, -0.9 ,  0.09, -1.28, -1.59, -0.75, -1.69, -2.11, -2.89,  0.23,  0.66,  1.43,  1.24, -0.74,  0.47, -0.31, -0.35,  0.14,  0.46, -0.71,  0.56,  0.6 , -0.38, -0.13,  0.03, -0.57,  0.33,  0.66, -0.37, -0.27,
+-17.44,  0.95,  4.47,  1.02,  0.62, -3.11, -0.17, -0.38,  0.77, -0.7 , -1.87, -1.16,  0.32, -0.48, -0.65, -0.74, -1.64, -2.  , -2.01, -0.53,  0.82,  1.41,  1.24, -0.05,  0.4 , -0.73, -0.52,  0.6 ,  1.09, -1.05,  0.55,  0.72, -1.46, -0.09,  0.01, -0.98,  0.15,  0.72, -0.  , -0.19,
+-17.43, -2.28,  4.38, -0.31, -0.37, -3.6 , -0.38, -0.42,  0.61, -0.68, -0.74, -1.15, -0.21, -0.74, -0.42, -0.9 , -1.24, -1.88, -1.27,  0.37,  0.67,  1.26,  0.97,  1.21,  0.25, -1.04, -0.91,  0.12,  1.19, -1.18,  0.68,  0.57, -1.06, -0.21,  0.44, -0.21,  0.34,  0.17, -0.25, -0.26,
+-19.1 , -3.72,  4.63, -0.79, -1.24, -4.14, -0.5 , -0.2 ,  0.72, -0.82, -0.43, -1.13, -0.84, -1.15,  0.55, -0.77, -1.47, -0.72, -0.74,  0.79,  0.35,  0.78,  0.76,  0.26, -0.44, -0.96, -0.28,  0.14,  0.47, -0.84, -0.28, -0.38, -0.19,  0.08,  0.21,  0.21,  0.37, -0.29, -0.01,  0.07,
+-20.31, -5.44,  4.49, -0.96, -1.03, -4.17, -1.61, -0.61, -0.1 , -1.47, -0.29,  0.02, -0.13, -0.46,  0.04, -1.26, -1.09, -0.6 , -0.61,  0.6 , -0.54, -0.26, -0.19,  0.28,  0.29, -0.85, -0.5 , -0.17,  1.03, -0.33, -0.33,  0.12,  0.42,  0.1 , -0.48, -0.17, -0.35,  0.57,  0.22, -0.03,
+-19.91, -6.34,  4.5 , -2.06, -0.3 , -3.67, -1.05, -1.14,  0.06, -0.68, -0.36,  0.71,  0.12, -0.61, -0.71, -1.84, -0.42, -0.63, -0.68,  0.49,  0.84,  0.54,  0.04,  0.13, -0.04, -0.76,  0.2 , -0.89,  0.42, -0.31, -0.89, -0.03, -0.39, -0.09, -0.92, -0.41, -0.26,  0.71,  0.79,  0.02,
+-17.95, -6.59,  4.74, -2.04,  0.27, -2.92, -0.8 , -1.45, -0.08, -0.98, -0.09,  0.88, -0.75, -0.64, -0.66, -1.12, -0.82, -1.15, -1.1 , -0.64,  0.82,  0.84, -0.17, -0.24, -0.38,  0.17,  0.64, -0.39,  0.43, -0.01, -0.79, -0.04,  0.11, -0.01, -0.87, -0.45, -0.53,  0.5 ,  0.25, -0.74,
+-18.89, -7.41,  4.77, -3.02,  0.42, -2.94, -0.14, -0.72, -0.7 , -2.04, -0.74,  0.59, -0.09,  0.08, -0.89, -1.64, -0.94, -1.08, -0.16, -0.44,  1.24,  0.59,  0.63,  0.21, -0.44, -0.16,  0.53,  0.61,  0.79,  0.32, -0.54,  0.36, -0.22, -0.22, -0.78, -0.16,  0.21,  0.42,  0.21, -0.34,
+-20.96, -7.89,  4.76, -3.29, -0.23, -2.75,  0.29, -0.91, -0.25, -1.55, -0.07,  0.08, -0.36, -0.  , -1.35, -1.89, -1.29, -1.01, -0.62, -0.76,  0.4 , -0.12,  0.4 ,  0.34, -0.87, -1.51, -0.22,  0.5 , -0.14,  0.01,  0.06,  0.22, -0.68, -0.26, -0.79, -0.43, -0.14, -0.03,  0.49, -0.24,
+-20.41, -6.75,  4.22, -4.34, -0.04, -2.71, -0.17, -0.43,  0.56, -1.04,  0.15,  0.27,  0.18, -1.06, -1.2 , -2.3 , -1.18, -1.16, -0.8 , -0.31,  0.34,  0.17,  0.69, -0.22, -1.05, -1.04, -0.81,  0.57,  0.26,  0.06, -0.46,  0.23, -0.41, -0.32, -0.53,  0.17,  0.09, -0.53,  0.71, -0.07,
+-20.87, -6.75,  3.79, -4.44,  2.23, -1.63, -0.05, -0.24,  0.08, -2.45, -0.43, -0.62, -0.2 , -0.83, -1.16, -2.31, -0.49, -0.46, -1.13,  0.19,  0.45,  0.13, -0.03, -0.11, -0.54, -0.79, -0.64,  0.65,  0.3 , -0.22, -0.54,  0.05, -0.58,  0.18, -0.2 ,  0.03, -0.24, -0.37,  0.19,  0.28,
+-19.59, -6.88,  3.56, -4.03,  3.2 , -1.26, -0.  , -0.54,  0.06, -1.68, -0.99, -0.25, -0.69, -0.92, -1.18, -2.7 , -0.74,  0.39,  0.23,  0.28, -0.11, -0.01,  0.08,  0.18,  0.16, -0.84,  0.14,  0.92,  0.77,  0.58, -0.55, -0.43,  0.07,  0.51, -0.66, -0.54, -0.64, -0.03,  0.27,  0.28,
+-18.27, -6.64,  3.51, -3.95,  2.51, -1.73, -1.23, -2.08,  0.02, -1.59, -0.8 ,  0.3 , -0.59, -1.79, -1.63, -2.8 , -2.1 ,  0.39,  0.53, -0.37,  0.09,  0.34, -0.23, -0.  , -0.15, -0.95,  0.55,  0.95,  0.95,  0.27, -0.87, -0.48,  0.21, -0.11, -0.78, -0.01,  0.37, -0.34,  0.15, -0.11,
+-16.11, -6.92,  3.46, -2.78,  3.18, -1.97, -0.46, -2.28, -0.05,  0.09,  0.64,  0.92, -0.46, -1.25, -1.24, -2.5 , -2.06, -1.07,  0.92, -0.8 ,  0.15,  0.03,  0.02, -0.12,  0.01, -0.73,  0.41,  0.51,  0.45,  0.23, -0.63, -0.35,  0.76,  0.13, -0.74, -0.41, -0.19,  0.02,  0.82,  0.28,
+-15.23, -7.11,  3.65, -3.43,  2.29, -2.05, -0.62, -2.5 , -0.59,  0.36,  0.36,  0.46, -0.67, -2.22, -1.72, -2.1 , -2.71, -1.96,  0.79, -0.53, -0.07, -0.68,  0.27, -0.35, -0.1 , -0.26,  0.12,  0.61, -0.1 ,  0.06, -0.19, -0.18,  0.46,  0.15, -0.84, -0.52, -0.06,  0.56,  0.51, -0.38,
+-14.86, -5.97,  4.23, -4.  ,  1.81, -1.25, -0.08, -3.03,  0.09,  0.9 ,  0.76,  0.09, -0.57, -1.51, -1.79, -2.4 , -2.68, -2.2 ,  0.48, -0.14,  0.45,  0.19,  1.13,  0.09, -0.44, -0.92,  0.03,  0.6 , -0.28, -0.02, -0.25, -0.36,  0.73,  0.79, -0.1 , -0.64, -0.32,  0.11, -0.36, -0.39,
+-14.71, -5.84,  4.19, -4.63,  1.29, -0.86,  0.55, -2.79, -0.13,  1.1 ,  0.44, -0.56, -0.01, -1.06, -1.68, -2.74, -2.09, -2.33,  0.23, -0.19,  0.75,  1.04,  1.38,  0.42,  0.06, -0.78, -0.32,  0.52, -0.23,  0.16, -0.57, -0.09,  0.86,  0.54, -0.03, -0.51, -0.5 ,  0.28, -0.06,  0.12,
+-14.96, -5.79,  4.54, -3.81,  1.32, -0.61,  0.67, -2.61, -0.79,  0.71,  0.31, -0.75, -0.44, -0.73, -1.63, -2.48, -1.74, -1.48,  0.95, -0.69,  0.83,  0.47,  0.13,  0.58,  0.04, -0.08, -0.56,  0.39,  0.15, -0.27, -1.08, -0.16,  0.75, -0.1 , -0.85, -0.55, -0.62, -0.2 ,  0.11,  0.82,
+-15.02, -5.15,  4.73, -3.72,  1.77, -1.1 ,  0.01, -1.68, -0.76,  0.58,  0.56, -0.33, -1.07, -1.15, -0.86, -1.73, -2.08, -1.56,  1.02, -0.4 ,  0.24,  0.26, -0.08,  0.04, -1.12, -0.39, -0.94,  0.5 ,  0.26,  0.06, -0.36, -0.23,  0.73,  0.45, -0.89, -0.47, -0.39,  0.02, -0.24,  0.54,
+-15.79, -6.  ,  3.39, -4.75,  1.64, -1.14,  0.21, -1.07, -0.32, -0.67,  0.19, -0.12, -1.75, -1.41, -0.91, -0.93, -1.23, -1.53,  0.97, -0.69,  0.18,  0.78,  0.61,  0.64, -0.97, -0.15,  0.02,  0.13,  0.14, -0.29, -0.47, -0.61, -0.36,  0.7 , -0.99, -0.53, -0.01,  0.29,  0.25,  0.63,
+-16.02, -5.3 ,  4.41, -4.48,  1.44, -1.95, -0.11, -0.64, -0.01, -0.24, -0.04, -0.41, -1.5 , -1.55, -0.88, -0.85, -1.31, -1.79,  0.28, -1.1 , -0.41,  0.95,  0.54,  0.23, -0.44, -0.2 ,  0.22,  1.45,  0.48, -0.5 ,  0.03,  0.24,  0.14,  0.82, -0.77, -0.65,  0.11, -0.16, -0.43, -0.14,
+-13.64, -4.42,  5.16, -3.28,  0.83, -2.48,  0.53, -1.11, -0.25, -0.16, -0.24,  0.04, -1.2 , -1.94, -1.9 , -1.09, -0.92, -2.27,  0.13, -0.68, -0.3 ,  1.03,  0.03, -0.12, -1.46, -1.02, -0.  ,  1.1 ,  0.15, -0.57, -0.14, -0.04,  0.16,  0.47, -0.05, -0.35, -0.37, -0.38,  0.03, -0.69,
+-13.44, -3.74,  4.8 , -2.37,  0.64, -2.17,  0.79, -1.4 , -1.52, -1.36, -0.08, -0.13, -0.84, -1.36, -0.79, -0.74,  0.23, -0.53,  0.21, -0.52, -0.53,  0.62, -0.22,  0.38, -1.06, -1.08,  0.46,  0.35, -0.24, -0.68, -0.2 ,  0.04, -0.13, -0.3 , -0.15, -0.45, -0.57, -0.8 ,  0.17, -0.56,
+-13.07, -3.63,  4.09, -1.67,  0.98, -2.32,  0.12, -0.82, -2.04, -2.17, -0.11,  0.35, -0.14, -1.52, -0.59, -0.78,  0.22,  0.36,  0.3 , -0.87, -0.18, -0.33, -0.28, -0.71, -1.04, -0.59,  0.77,  0.24,  0.3 , -0.78, -0.46,  0.61, -0.22,  0.14,  0.62, -0.4 , -0.86, -0.72,  0.31,  0.43,
+-10.39, -2.84,  3.3 , -2.89, -0.34, -4.  , -0.16, -1.32, -1.4 , -1.64, -0.29, -0.45,  0.24, -1.18, -0.26,  0.21, -0.04,  0.27,  0.38, -1.44,  0.09, -0.16, -1.15, -1.17, -1.51, -1.67,  0.92,  0.07,  0.4 , -0.21, -0.07,  0.43, -0.57,  0.15,  0.62, -0.72, -0.69, -0.64,  0.1 ,  0.8 ,
+-10.11, -2.13,  2.83, -2.91, -0.56, -4.35, -0.99, -2.07, -1.65, -0.77,  0.66, -0.  ,  0.15, -0.24, -0.7 ,  0.15, -0.41, -0.18, -0.63, -1.76, -0.2 , -0.17, -0.81, -1.14, -1.3 , -1.64,  0.22,  0.34, -0.08,  0.12, -0.  ,  0.22, -0.38, -0.51, -0.39, -0.63, -0.13, -0.3 , -0.28, -0.36,
+-12.57, -2.4 ,  2.26, -3.4 , -0.74, -3.7 , -1.19, -2.37, -0.45, -0.4 ,  0.76,  1.08, -0.12, -0.76, -0.98, -0.5 , -0.66, -0.37, -0.9 , -2.11, -0.91, -1.54, -1.43, -0.88, -1.01, -1.25,  0.15, -0.02, -0.06,  0.22, -0.16,  0.23, -0.5 , -0.5 , -0.53, -0.53, -0.1 , -0.07, -0.21, -0.8 ,
+-13.4 , -1.89,  2.33, -1.94,  0.55, -2.88,  0.77, -1.89, -1.43, -1.5 ,  0.94,  1.13, -0.23,  0.38, -0.41,  0.36, -0.35, -0.14, -0.23, -0.83, -0.87, -1.2 , -1.18, -1.06, -1.02, -0.37,  0.13,  0.55,  0.11,  0.71, -0.17, -0.76, -0.52, -0.35, -0.33,  0.06,  0.69,  0.33, -0.22, -0.14,
+-16.04, -1.86,  1.81, -1.42,  0.03, -3.21,  0.67, -2.4 , -1.24, -1.65,  0.73,  1.22, -0.36,  0.58, -0.7 ,  0.71, -0.47, -0.02,  0.68,  0.1 , -0.4 , -0.86, -0.96, -0.9 , -0.28, -0.08, -0.38,  0.34,  0.17,  0.51,  0.59, -0.66, -0.91, -0.76, -0.35, -0.22,  0.1 ,  0.03, -0.44,  0.38,
+-18.52, -1.94,  0.27, -2.45, -0.53, -3.62, -0.66, -3.1 , -1.92, -0.73,  0.61,  1.64, -0.02,  0.93, -0.89,  0.78, -1.01, -0.23,  1.55,  0.06, -0.29, -0.41, -0.39, -0.01, -0.46, -0.17, -0.14,  0.3 ,  0.73,  0.57,  0.15, -0.6 , -0.54, -0.15, -0.7 ,  0.04,  0.07,  0.17, -0.2 , -0.24,
+-20.67, -1.22, -0.67, -2.04, -0.19, -2.09, -0.5 , -3.26, -1.77,  0.46, -0.07,  0.11, -0.63,  0.36, -1.66,  0.91,  0.16,  0.31,  1.89, -1.35, -1.09, -0.3 , -0.9 , -0.35, -0.81, -0.28,  0.1 ,  0.22,  0.92,  0.07,  0.3 , -0.34, -0.59, -0.39, -0.35,  0.41, -0.09,  0.47, -0.25, -0.74,
+-22.56,  0.22, -0.31, -0.99, -0.92, -1.9 ,  0.24, -3.33, -1.97,  0.93,  0.63,  0.2 ,  0.27,  1.4 , -1.4 ,  1.02, -0.38, -0.69,  0.91, -1.02, -1.11, -0.13, -0.49, -0.58, -1.14, -0.3 , -0.01, -0.12,  0.64, -0.09,  0.23, -0.55, -0.44,  0.05, -0.49,  0.21, -0.3 ,  0.25, -0.5 , -0.57,
+-25.12,  0.38, -0.51, -0.74, -1.52, -1.66, -0.35, -3.21, -3.29, -0.43, -0.48, -0.52,  0.77,  0.66, -0.93,  1.47, -1.15, -0.98,  0.71, -0.68, -0.72, -0.78, -0.01, -0.27, -1.  , -0.62, -0.34, -0.16,  0.72,  0.68,  0.88, -0.  ,  0.33, -0.45, -1.02,  0.38, -0.3 ,  0.74,  0.18,  0.05,
+-26.65, -0.22,  0.48,  0.36, -2.25, -0.59,  0.8 , -2.28, -3.23, -0.71, -0.15, -0.88,  0.76, -0.42, -1.41,  0.79, -1.39, -0.76,  0.22, -0.17,  0.16,  0.01, -0.97,  0.06, -0.39, -0.98, -1.15, -0.28,  0.68,  0.1 ,  0.55,  0.16, -0.41,  0.02, -0.21,  0.41,  0.22,  0.72,  0.02, -0.08,
+-28.18,  0.83,  0.31,  0.86, -1.84, -0.5 ,  0.37, -1.71, -2.05, -0.67,  0.17, -0.55,  0.65, -0.93, -1.48,  0.49, -0.5 , -0.5 , -0.98, -0.63, -0.24,  0.37, -0.86,  0.2 ,  0.13, -0.44, -0.05,  0.26,  0.12,  0.67,  0.43,  0.1 , -0.88, -0.12, -0.65,  0.28, -0.15, -0.24,  0.47, -0.12,
+-30.09,  0.59,  0.48,  0.75, -1.7 ,  0.55,  1.27, -1.7 , -1.54,  0.41, -0.28, -0.81,  0.4 , -1.04, -1.  ,  0.39, -0.36, -0.65, -0.53, -0.78,  0.09,  0.91, -0.98, -0.55,  0.7 ,  0.43,  0.4 ,  0.37, -0.61, -0.25,  0.18,  0.37, -0.2 , -0.04, -0.33,  0.  , -0.46,  0.06,  0.23, -0.8 ,
+-30.94,  1.65,  0.49,  1.19, -1.27,  0.52,  1.53, -0.65, -0.76, -0.09, -0.99, -0.97,  0.01, -1.34, -1.32, -0.17, -0.38, -0.66, -0.29, -0.18, -0.42,  0.3 , -1.34, -0.86,  0.57, -0.09,  0.02,  1.11, -0.31, -0.07,  0.37,  0.39, -0.08, -0.52, -0.75,  0.01, -0.18,  0.45, -0.01, -0.79,
+-33.47,  0.84,  0.87,  1.3 , -1.76,  0.52,  1.42, -1.01, -1.5 , -0.05, -0.88, -0.41,  0.08, -0.97, -1.3 , -0.45, -0.37, -0.67, -0.22, -0.09,  0.59,  1.16, -0.96, -0.51,  0.92, -0.13, -0.58,  0.17, -1.19,  0.01,  0.14, -0.22, -0.14, -0.53,  0.18, -0.22, -0.67,  0.05, -0.47, -0.48,
+-35.46,  0.35,  0.35,  0.83, -1.86, -0.47,  1.34, -0.84, -2.33, -0.59, -0.88, -0.7 , -0.35, -1.32, -1.41, -0.83, -0.66, -0.33, -0.35,  0.05, -0.81, -0.2 , -1.28, -0.9 ,  0.61, -0.39, -0.6 , -0.23, -1.59, -0.03,  0.09,  0.15,  0.06, -0.77, -0.75,  0.18, -0.53,  0.12, -0.06, -0.09,
+-36.29,  1.18,  1.48,  1.56, -1.13, -0.4 ,  1.08, -0.94, -1.65, -0.76, -0.27, -0.33,  0.16, -0.69, -1.14, -0.71, -0.6 , -0.01,  0.25, -0.39, -0.15,  1.1 , -0.77, -0.01,  0.39, -0.25, -0.28, -0.41, -1.22, -0.31, -0.42,  0.33,  0.57, -0.59, -1.09,  0.35, -0.57,  0.6 ,  0.42, -0.22,
+-36.83,  1.23,  1.13,  1.07, -1.34, -0.44,  0.27, -1.2 , -1.88, -1.4 , -0.37, -0.96, -0.22, -0.38, -0.61, -0.77,  0.06,  0.13, -0.47, -0.67, -0.36,  0.71, -0.18,  1.3 ,  1.34,  0.43, -0.3 , -0.4 , -0.62, -0.4 , -0.16,  0.55,  0.5 , -0.01, -0.77,  0.08, -0.96,  0.6 ,  0.53, -0.42,
+-34.23,  1.7 ,  1.22, -0.28, -1.18,  0.2 , -0.24, -1.68, -1.38, -1.42, -0.26,  0.1 , -1.29,  0.21, -1.14, -1.08,  0.33,  0.47, -0.98, -0.66, -1.31,  0.36, -0.03,  0.49,  0.99,  0.5 , -0.25,  0.28, -0.79, -0.05,  0.31,  0.24,  0.83,  0.28, -0.8 ,  0.13, -0.33,  0.58, -0.09, -0.58,
+-33.07,  2.  ,  2.13, -0.58, -2.29, -0.25,  0.09, -2.28, -1.65, -1.1 ,  0.45,  0.89, -1.05, -0.06, -1.62, -0.6 ,  0.56,  0.61, -0.87, -0.88, -1.48,  1.28,  0.9 ,  0.52,  0.93, -0.19, -0.55,  0.97, -1.18, -0.65, -0.15,  0.07,  0.75,  0.04, -0.64,  0.61, -0.44,  0.19, -0.01, -0.25,
+-33.92,  2.05,  1.2 , -0.36, -1.79, -1.27, -1.4 , -2.7 , -1.18, -0.96,  0.12,  0.06, -1.16, -0.74, -1.71,  0.51,  0.91,  0.74, -0.29, -0.97, -1.16,  1.1 ,  1.73,  0.45,  0.49, -0.88, -0.93,  0.82,  0.07, -0.61, -0.66, -0.33,  0.98,  0.47, -0.26, -0.08, -0.85,  0.32,  0.67,  0.17,
+-33.89,  1.78,  0.67, -1.01, -1.  , -1.34, -1.22, -1.68, -0.82, -1.17,  0.86,  0.44, -2.04, -1.37, -1.3 ,  0.62,  0.97,  0.15, -0.32,  0.03, -1.13,  0.1 ,  0.7 ,  0.99,  0.49, -0.34, -0.83,  0.05,  0.26, -0.5 , -0.14, -0.09,  0.97,  0.84, -0.25, -0.23, -0.71, -0.49,  0.02, -0.49,
+-32.95,  1.04, -0.01, -1.73, -1.93, -1.48, -0.84, -1.4 , -1.31, -0.85,  1.2 , -0.73, -1.98, -2.16, -1.91,  0.03,  0.68, -1.36, -1.24,  0.11, -1.92, -0.7 , -0.38,  0.84,  0.77, -0.1 , -0.87, -0.1 ,  0.24, -0.08, -0.12, -0.51,  0.07, -0.03,  0.65, -0.35, -1.  , -0.12,  0.81, -0.35,
+-31.72,  1.3 , -0.02, -0.05, -0.17,  0.  ,  0.83, -1.11, -0.98, -1.09,  0.99, -1.65, -1.6 , -1.46, -2.18,  0.15,  1.19, -0.99, -0.73,  0.66, -1.97, -0.61, -0.63,  0.71,  1.08,  0.75, -0.78,  1.35,  0.51,  0.45, -0.73, -1.02,  0.24,  0.06,  1.04, -0.55, -1.43, -0.34,  1.23,  0.42,
+-31.71,  2.01, -0.12,  0.46, -0.43, -0.89,  0.54, -1.01, -0.23, -0.14, -0.48, -1.97, -1.79, -2.49, -2.97, -0.1 ,  1.14, -1.34, -0.64,  0.4 , -2.33, -0.91, -0.35,  0.89,  1.58,  0.26, -1.  ,  0.34,  0.51, -0.04, -1.09,  0.04,  0.24,  0.54,  0.8 , -0.51, -1.01, -0.13,  0.17,  0.3
+};
+// clang-format on
+
+#endif  // MICRO_EXAMPLES_DATA_KWS_H_
diff --git a/micro/examples/classifier/data/mnist.h b/micro/examples/classifier/data/mnist.h
new file mode 100644
index 0000000000000000000000000000000000000000..1c0e47d2395af8401a6b3ffb71358c5a00e2c960
--- /dev/null
+++ b/micro/examples/classifier/data/mnist.h
@@ -0,0 +1,51 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_EXAMPLES_DATA_MNIST_H_
+#define MICRO_EXAMPLES_DATA_MNIST_H_
+
+// clang-format off
+static float data_mnist_4[28*28] = {
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.8,0.4,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.5,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.7,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.1,0.8,1. ,0.6,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.1,1. ,1. ,0.5,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.7,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,0.8,0.1,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,1. ,0.8,0. ,0. ,0. ,0. ,0. ,0. ,0.7,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,1. ,0.8,0.1,0. ,0. ,0. ,0. ,0.2,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,1. ,0.9,0.7,0.5,0.6,0.2,0.6,1. ,1. ,0.6,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.5,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,0.8,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.5,0.5,0.5,0.5,0.5,0.9,1. ,0.6,0.1,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.9,0.9,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,0.9,1. ,0.3,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.8,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.8,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.9,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
+0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.
+};
+// clang-format on
+
+#endif  // MICRO_EXAMPLES_DATA_MNIST_H_
diff --git a/micro/examples/classifier/main.cc b/micro/examples/classifier/main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b76cc0b038c11f1b947c9afc31f97a1d57df6514
--- /dev/null
+++ b/micro/examples/classifier/main.cc
@@ -0,0 +1,52 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <cstdio>
+
+#include "data.h"
+#include "micro.h"
+
+namespace micro {
+namespace MICRO_MODEL_NAME {
+
+MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
+
+}
+}  // namespace micro
+
+int main() {
+  micro::MaceMicroEngine *micro_engine = NULL;
+  micro::MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine);
+
+  micro_engine->RegisterInputData(0, MICRO_DATA_NAME::input,
+                                  MICRO_DATA_NAME::input_dims);
+  micro_engine->Run();
+
+  float *output_buffer = NULL;
+  const int32_t *output_dims = NULL;
+  uint32_t dim_size = 0;
+  micro_engine->GetOutputData(0, reinterpret_cast<void **>(&output_buffer),
+                              &output_dims, &dim_size);
+
+  int32_t output_total_size = 1;
+  for (int32_t i = 0; i < dim_size; ++i) {
+    output_total_size *= output_dims[i];
+  }
+
+  for (int32_t i = 0; i < output_total_size; ++i) {
+    printf("%d: %f\n", i, output_buffer[i]);
+  }
+
+  return 0;
+}
diff --git a/micro/examples/classifier/mbed-os.lib b/micro/examples/classifier/mbed-os.lib
new file mode 100644
index 0000000000000000000000000000000000000000..e36da5dbac97a99819c15e14cb59b9c03860034d
--- /dev/null
+++ b/micro/examples/classifier/mbed-os.lib
@@ -0,0 +1 @@
+https://github.com/ARMmbed/mbed-os/#0db72d0cf26539016efbe38f80d6f2cb7a3d4414
diff --git a/micro/framework/operator.cc b/micro/framework/operator.cc
index 3065484ce0a3512d9cce6f7666f26a352e9f9961..9ecdad69fd536d7ebf786d3fd04e528768bf16fc 100644
--- a/micro/framework/operator.cc
+++ b/micro/framework/operator.cc
@@ -74,6 +74,8 @@ uint32_t Operator::GetInputSize() {
 }
 
 const void *Operator::DoGetInputData(uint32_t idx) {
+  MACE_ASSERT(idx < GetInputSize());
+
   const void *data = NULL;
   const OpIOInfo *input_info = op_context_->input_info(idx);
   const uint32_t op_def_idx = input_info->op_def_idx_;
@@ -94,6 +96,8 @@ const void *Operator::DoGetInputData(uint32_t idx) {
 }
 
 uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
+  MACE_ASSERT(idx < GetInputSize());
+
   uint32_t dim_size = 0;
   const OpIOInfo *input_info = op_context_->input_info(idx);
   const uint32_t op_def_idx = input_info->op_def_idx_;
@@ -115,6 +119,8 @@ uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
 }
 
 const int32_t *Operator::GetInputShapeDims(uint32_t idx) {
+  MACE_ASSERT(idx < GetInputSize());
+
   const int32_t *dims = NULL;
   const OpIOInfo *input_info = op_context_->input_info(idx);
   const uint32_t op_def_idx = input_info->op_def_idx_;
@@ -138,14 +144,20 @@ uint32_t Operator::GetOutputSize() {
 }
 
 DataType Operator::GetOutputDataType(uint32_t idx) {
+  MACE_ASSERT(idx < GetOutputSize());
+
   return op_def_->output_type(idx);
 }
 
 void *Operator::DoGetOutputData(uint32_t idx) {
+  MACE_ASSERT(idx < GetOutputSize());
+
   return engine_config_->tensor_mem_ + op_def_->mem_offset(idx);
 }
 
 uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
+  MACE_ASSERT(idx < GetOutputSize());
+
   uint32_t dim_size = 0;
   model::OutputShape *output_shape =
       const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
@@ -156,6 +168,8 @@ uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
 }
 
 const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
+  MACE_ASSERT(idx < GetOutputSize());
+
   const int32_t *dims = NULL;
   model::OutputShape *output_shape =
       const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
@@ -167,6 +181,8 @@ const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
 
 MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
                                        const int32_t *dims) {
+  MACE_ASSERT(idx < GetOutputSize());
+
   model::OutputShape *output_shape =
       const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
 #ifndef MACE_MICRO_NDEBUG
@@ -201,6 +217,44 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
   return MACE_SUCCESS;
 }
 
+QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
+  MACE_ASSERT(idx < GetInputSize());
+
+  QuantizeInfo quantize_info = {0.0f, 0};
+  const OpIOInfo *input_info = op_context_->input_info(idx);
+  const uint32_t op_def_idx = input_info->op_def_idx_;
+  if (kIdxConstTensor == op_def_idx) {
+    const model::ConstTensor *const_tensor =
+        engine_config_->net_def_->tensor(input_info->output_idx_);
+    quantize_info.scale = const_tensor->scale();
+    quantize_info.zero = const_tensor->zero_point();
+    return quantize_info;
+  } else if (kIdxModelInput == op_def_idx) {
+    MACE_ASSERT1(false, "Unexpected, the model input has no quantize info");
+  } else {
+    const model::OperatorDef *pre_op_def =
+        engine_config_->net_def_->op(op_def_idx);
+    model::QuantizeActivationInfo quantize_activation_info =
+        pre_op_def->quantize_info(input_info->output_idx_);
+    quantize_info.scale = quantize_activation_info.scale();
+    quantize_info.zero = quantize_activation_info.zero_point();
+    return quantize_info;
+  }
+
+  return quantize_info;
+}
+
+QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) {
+  MACE_ASSERT(idx < GetOutputSize());
+
+  QuantizeInfo quantize_info;
+  model::QuantizeActivationInfo quantize_activation_info =
+      op_def_->quantize_info(idx);
+  quantize_info.scale = quantize_activation_info.scale();
+  quantize_info.zero = quantize_activation_info.zero_point();
+  return quantize_info;
+}
+
 #ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
 #define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC)                   \
 template <>                                                         \
diff --git a/micro/framework/operator.h b/micro/framework/operator.h
index 6269773e02a57637bc1abe3dec256bfa1056d842..a053f78f57d398a675d8364bca5a9a8cad35bba7 100644
--- a/micro/framework/operator.h
+++ b/micro/framework/operator.h
@@ -17,8 +17,8 @@
 
 #include "micro/base/logging.h"
 #include "micro/base/types.h"
-#include "micro/include/public/micro.h"
 #include "micro/framework/scratch_buffer.h"
+#include "micro/include/public/micro.h"
 
 namespace micro {
 
@@ -84,6 +84,9 @@ class Operator {
                                const int32_t *input_dims);
   MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
 
+  QuantizeInfo GetInputQuantizeInfo(uint32_t idx);
+  QuantizeInfo GetOutputQuantizeInfo(uint32_t idx);
+
   template<typename T>
   const T *GetInputData(uint32_t idx) {
     return static_cast<const T *>(DoGetInputData(idx));
@@ -101,7 +104,7 @@ class Operator {
   const model::OperatorDef *op_def_;
   MaceMicroEngineConfig *engine_config_;
 
- private:
+ protected:
   OpContext *op_context_;
 };
 
diff --git a/micro/include/public/micro.h b/micro/include/public/micro.h
index 6618b64b346669e8a28a79cde26d72d19e9e4e21..eee2635586550b270afee7c729ce3ea4411291cd 100644
--- a/micro/include/public/micro.h
+++ b/micro/include/public/micro.h
@@ -17,7 +17,9 @@
 
 #include <stdint.h>
 
-#include "micro/include/port/define.h"
+#ifndef NULL
+#define NULL 0
+#endif
 
 namespace micro {
 
@@ -61,7 +63,7 @@ class Graph;
 class Operator;
 }  // namespace framework
 
-struct MACE_API MaceMicroEngineConfig {
+struct MaceMicroEngineConfig {
   model::NetDef *net_def_;
   const uint8_t *model_data_;
   framework::Graph *graph_;
@@ -73,7 +75,7 @@ struct MACE_API MaceMicroEngineConfig {
   uint32_t scratch_buffer_size_;
 };
 
-class MACE_API MaceMicroEngine {
+class MaceMicroEngine {
  public:
   MaceMicroEngine() {}
   ~MaceMicroEngine() {}
diff --git a/micro/include/utils/bfloat16.h b/micro/include/utils/bfloat16.h
index b293548d7870350091f6dccbd4bd1b5842fdfb7f..421626cbad729ed40aebbfeb02c0f4a8dafef4bf 100644
--- a/micro/include/utils/bfloat16.h
+++ b/micro/include/utils/bfloat16.h
@@ -32,7 +32,14 @@ union Sphinx {
 
 class BFloat16 {
  public:
-  BFloat16();
+  BFloat16() {}
+
+  explicit BFloat16(float value) { data_ = Sphinx(value).i >> 16; }
+
+  explicit BFloat16(int value) {
+    data_ = Sphinx(static_cast<float>(value)).i >> 16;
+  }
+
 
   operator float() const {
     return Sphinx(static_cast<uint32_t>(data_ << 16)).f;
diff --git a/micro/model/operator_def.cc b/micro/model/operator_def.cc
index 31ffa678dafad659fc724cbf586cf739caf444b8..b71d033b450efc13c000798721854abf6f27b86a 100644
--- a/micro/model/operator_def.cc
+++ b/micro/model/operator_def.cc
@@ -23,11 +23,22 @@ MACE_DEFINE_STRING_FUNC(OperatorDef, name, name_)
 MACE_DEFINE_STRING_FUNC(OperatorDef, type, type_)
 MACE_DEFINE_OBJECT_FUNC(OperatorDef, int32_t, device_type)
 MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, Argument, arg, args_)
-MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, OutputShape,
-                           output_shape, output_shapes_)
+MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef,
+                           OutputShape,
+                           output_shape,
+                           output_shapes_)
 MACE_DEFINE_ARRAY_FUNC(OperatorDef, DataType, output_type, output_types_)
+MACE_DEFINE_ARRAY_FUNC(OperatorDef,
+                       QuantizeActivationInfo,
+                       quantize_info,
+                       quantize_info_);
 // the mem_offset is the mem_id in proto file
 MACE_DEFINE_ARRAY_FUNC(OperatorDef, int32_t, mem_offset, mem_offsets_)
 
+MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, scale);
+MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, int32_t, zero_point);
+MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, minval);
+MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, maxval);
+
 }  // namespace model
 }  // namespace micro
diff --git a/micro/model/operator_def.h b/micro/model/operator_def.h
index 92695ad90a14fd3482e9818662f53a9f4a35db0a..8ad01ebfbea3d25bc21d7a30b3e4af752041741b 100644
--- a/micro/model/operator_def.h
+++ b/micro/model/operator_def.h
@@ -23,6 +23,22 @@
 namespace micro {
 namespace model {
 
+class QuantizeActivationInfo {
+ public:
+  MACE_DEFINE_HARD_CODE_MAGIC(QuantizeActivationInfo)
+
+  MACE_DECLARE_OBJECT_FUNC(float, scale);
+  MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point);
+  MACE_DECLARE_OBJECT_FUNC(float, minval);
+  MACE_DECLARE_OBJECT_FUNC(float, maxval);
+
+ private:
+  SerialFloat scale_;
+  SerialInt32 zero_point_;
+  SerialFloat minval_;
+  SerialFloat maxval_;
+};
+
 class OperatorDef : public Serialize {
  public:
   MACE_DEFINE_HARD_CODE_MAGIC(OperatorDef)
@@ -35,6 +51,7 @@ class OperatorDef : public Serialize {
   MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg);
   MACE_DECLARE_PTR_ARRAY_FUNC(OutputShape, output_shape);
   MACE_DECLARE_ARRAY_FUNC(DataType, output_type);
+  MACE_DECLARE_ARRAY_FUNC(QuantizeActivationInfo, quantize_info);
   // the mem_offset is the mem_id in proto file
   MACE_DECLARE_ARRAY_FUNC(int32_t, mem_offset);
 
@@ -48,6 +65,7 @@ class OperatorDef : public Serialize {
   SerialArray<Argument> args_;
   SerialArray<OutputShape> output_shapes_;
   SerialArray<DataType> output_types_;
+  SerialArray<QuantizeActivationInfo> quantize_info_;
   SerialArray<SerialInt32> mem_offsets_;
 };
 
diff --git a/micro/ops/CMakeLists.txt b/micro/ops/CMakeLists.txt
index 43ddce0a11c0fa03b6349f923fb8a588f0769fa8..0825e180074738bf60949484c9aab46af21a57f1 100644
--- a/micro/ops/CMakeLists.txt
+++ b/micro/ops/CMakeLists.txt
@@ -1,7 +1,6 @@
 set(MICRO_OPS_SRCS
   shape.cc
   reduce.cc
-  reshape.cc
   matmul.cc
   nhwc/depthwise_conv_2d_ref.cc
   nhwc/conv_2d_c4_s4.cc
@@ -31,12 +30,13 @@ set(MICRO_OPS_SRCS
   activation.cc
 )
 
+add_subdirectory(nhwc)
+
 add_library(micro_ops
   ${MICRO_OPS_SRCS}
 )
 target_link_libraries(micro_ops
-  micro_base
-  micro_framework
+  PRIVATE micro_base
 )
 
 
diff --git a/micro/ops/eltwise.cc b/micro/ops/eltwise.cc
index 98f3897ea96f2b2eaf989e529fa26b6c851dfbbe..975a60cecfc7825dcae055256fe2d2fbda64de57 100644
--- a/micro/ops/eltwise.cc
+++ b/micro/ops/eltwise.cc
@@ -19,14 +19,6 @@
 namespace micro {
 namespace ops {
 namespace eltwise {
-bool ShapeIsEqual(const int32_t *dims0,
-                  const int32_t *dims1, uint32_t dim_size) {
-  while (--dim_size > 0) {
-    if (dims0[dim_size] != dims1[dim_size])
-      return false;
-  }
-  return true;
-}
 
 int32_t GetIndex(const int32_t *shape,
                  const int32_t *index, int32_t dim_size) {
diff --git a/micro/ops/eltwise.h b/micro/ops/eltwise.h
index 263082cca9225438dcaac456a983a4a47510d512..fd08114206b0f4acf1676912a89a3caa8e1fe708 100644
--- a/micro/ops/eltwise.h
+++ b/micro/ops/eltwise.h
@@ -19,31 +19,13 @@
 #include "micro/base/utils.h"
 #include "micro/framework/operator.h"
 #include "micro/framework/scratch_buffer.h"
+#include "micro/base/types.h"
 
 namespace micro {
 namespace ops {
 
-namespace eltwise {  // for redefine
-enum Type {
-  SUM = 0,
-  SUB = 1,
-  PROD = 2,
-  DIV = 3,
-  MIN = 4,
-  MAX = 5,
-  NEG = 6,
-  ABS = 7,
-  SQR_DIFF = 8,
-  POW = 9,
-  EQUAL = 10,
-  FLOOR_DIV = 11,
-  CLIP = 12,
-  SIGN = 13,
-  NONE = 14,
-};
+namespace eltwise {
 
-bool ShapeIsEqual(const int32_t *dims0,
-                  const int32_t *dims1, uint32_t dim_size);
 int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size);
 void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size);
 template<typename T>
@@ -202,9 +184,8 @@ class EltwiseOp : public framework::Operator {
       if (input1_size == 1) {
         TensorScalarEltwise(type_, input0_, input1_[0],
                             input0_size, swapped, output_ptr);
-      } else if (eltwise::ShapeIsEqual(input0_dims_,
-                                       input1_shape,
-                                       input0_dim_size_)) {
+      } else if (base::ShapeIsEqual(input0_dims_, input1_shape,
+                                    input0_dim_size_)) {
         TensorEltwise(type_, input0_, input1_, input0_size,
                       swapped, output_ptr);
       } else if (need_general_broadcast) {
diff --git a/micro/ops/matmul.h b/micro/ops/matmul.h
index 94d9b03507a8c24064cc5db0e71aa03514c4acca..23cd0d0dd4884a3ec004137ad19b7dd507a0c2a8 100644
--- a/micro/ops/matmul.h
+++ b/micro/ops/matmul.h
@@ -40,10 +40,8 @@ class MatMulOp : public framework::Operator {
   uint32_t input_b_dim_size_;
 
   const mifloat *bias_;
-#ifndef MACE_MICRO_NDEBUG
   const int32_t *bias_dims_;
   uint32_t bias_dim_size_;
-#endif
 
   mifloat *output_;
 
diff --git a/micro/ops/nhwc/CMakeLists.txt b/micro/ops/nhwc/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ce890e12b0f665d3b0338f56b9747c4315ae5025
--- /dev/null
+++ b/micro/ops/nhwc/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(MACE_MICRO_ENABLE_CMSIS)
+  add_subdirectory(cmsis_nn)
+endif()
diff --git a/micro/ops/nhwc/cmsis_nn/CMakeLists.txt b/micro/ops/nhwc/cmsis_nn/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..ca6db7303fdf0f4caf07f4717502be4e7f5fb1d7
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/CMakeLists.txt
@@ -0,0 +1,16 @@
+add_library(micro_ops_nhwc_cmsis_nn
+  arm_conv_2d_int8.cc
+  arm_pooling_int8.cc
+  arm_softmax_int8.cc
+  arm_mat_mul_int8.cc
+  arm_eltwise_int8.cc
+  arm_depthwise_conv_2d_int8.cc
+  dequantize.cc
+  quantize.cc
+  utilities.cc
+)
+
+target_link_libraries(micro_ops_nhwc_cmsis_nn
+  PRIVATE micro_base
+  PRIVATE CMSISNN
+)
diff --git a/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f886be8d7317b0f421f6db6d3e74991a07759c8b
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc
@@ -0,0 +1,135 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h"
+
+#include <arm_nnfunctions.h>
+
+#include "micro/base/logger.h"
+#include "micro/framework/op_context.h"
+#include "micro/framework/scratch_buffer.h"
+#include "micro/model/const_tensor.h"
+#include "micro/model/net_def.h"
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+namespace micro {
+namespace ops {
+
+MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
+  MACE_ASSERT(filter_dims_[0] == output_dims[3] &&
+              input_dims_[3] == filter_dims_[3]);
+
+  QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
+  QuantizeInfo filter_quantize_info = GetInputQuantizeInfo(FILTER);
+  QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
+
+  double double_multiplier = input_quantize_info.scale *
+                             filter_quantize_info.scale /
+                             output_quantize_info.scale;
+  int32_t multiplier;
+  int32_t shift;
+  QuantizeMultiplier(double_multiplier, &multiplier, &shift);
+
+  cmsis_nn_conv_params conv_params;
+  /// input_offset is negative
+  conv_params.input_offset = -input_quantize_info.zero;
+  conv_params.output_offset = output_quantize_info.zero;
+  conv_params.activation.min = -128;
+  conv_params.activation.max = 127;
+  conv_params.stride.w = strides_[1];
+  conv_params.stride.h = strides_[0];
+  conv_params.padding.w = padding_sizes_[1] / 2;
+  conv_params.padding.h = padding_sizes_[0] / 2;
+  conv_params.dilation.w = dilations_[1];
+  conv_params.dilation.h = dilations_[0];
+
+  ScratchBuffer scratch_buffer(engine_config_);
+
+  cmsis_nn_per_channel_quant_params quant_params;
+  quant_params.multiplier = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
+  quant_params.shift = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
+  for (int32_t i = 0; i < output_dims[3]; ++i) {
+    quant_params.multiplier[i] = multiplier;
+    quant_params.shift[i] = shift;
+  }
+
+  MACE_ASSERT(input_dims_[0] == 1);
+  MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_dims_[0];
+  input_dims.h = input_dims_[1];
+  input_dims.w = input_dims_[2];
+  input_dims.c = input_dims_[3];
+  const int8_t *input_data = reinterpret_cast<const int8_t *>(input_);
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = filter_dims_[0];
+  filter_dims.h = filter_dims_[1];
+  filter_dims.w = filter_dims_[2];
+  filter_dims.c = filter_dims_[3];
+  const int8_t *filter_data = reinterpret_cast<const int8_t *>(filter_);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_dims[3];
+  int32_t *bias_data =
+      const_cast<int32_t *>(reinterpret_cast<const int32_t *>(bias_));
+  if (bias_data == NULL) {
+    bias_data = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
+    for (int32_t i = 0; i < bias_dims.c; ++i) {
+      bias_data[i] = 0;
+    }
+  }
+
+  cmsis_nn_dims cmn_output_dims;
+  cmn_output_dims.n = output_dims[0];
+  cmn_output_dims.h = output_dims[1];
+  cmn_output_dims.w = output_dims[2];
+  cmn_output_dims.c = output_dims[3];
+  int8_t *output_data = reinterpret_cast<int8_t *>(output_);
+
+  cmsis_nn_context cmn_context;
+  cmn_context.size = arm_convolve_wrapper_s8_get_buffer_size(
+      &conv_params, &input_dims, &filter_dims, &cmn_output_dims);
+  if (cmn_context.size > 0) {
+    cmn_context.buf = scratch_buffer.GetBuffer<int8_t>(cmn_context.size);
+  } else {
+    cmn_context.buf = NULL;
+  }
+
+  arm_status status = arm_convolve_wrapper_s8(
+      &cmn_context, &conv_params, &quant_params, &input_dims, input_data,
+      &filter_dims, filter_data, &bias_dims, bias_data, &cmn_output_dims,
+      output_data);
+  MACE_ASSERT(status == ARM_MATH_SUCCESS)
+      << "failed in arm_convolve_wrapper_s8";
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus ArmConv2dInt8Op::Run() {
+  int32_t output_dims[4] = {0};
+  InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims);
+  ResizeOutputShape(0, 4, output_dims);
+
+  MACE_RETURN_IF_ERROR(Compute(output_dims));
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h
new file mode 100644
index 0000000000000000000000000000000000000000..d7448171884c854ec91a1361b3317c40cc6ea017
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_
+
+#include "micro/ops/nhwc/base/conv_2d_base.h"
+#include "micro/ops/utils/activation.h"
+
+namespace micro {
+namespace ops {
+
+class ArmConv2dInt8Op : public Conv2dBase {
+ public:
+  virtual MaceStatus Run();
+
+ private:
+  MaceStatus Compute(int32_t (&output_dims)[4]);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_
diff --git a/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e3746fe80d5cf8e9917eb41b262dc8ebceb83eb9
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc
@@ -0,0 +1,136 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h"
+
+#include <arm_nnfunctions.h>
+
+#include "micro/base/logger.h"
+#include "micro/framework/op_context.h"
+#include "micro/framework/scratch_buffer.h"
+#include "micro/model/const_tensor.h"
+#include "micro/model/net_def.h"
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+namespace micro {
+namespace ops {
+
+MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
+  QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
+  QuantizeInfo filter_quantize_info = GetInputQuantizeInfo(FILTER);
+  QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
+
+  double double_multiplier = input_quantize_info.scale *
+                             filter_quantize_info.scale /
+                             output_quantize_info.scale;
+  int32_t multiplier;
+  int32_t shift;
+  QuantizeMultiplier(double_multiplier, &multiplier, &shift);
+
+  cmsis_nn_dw_conv_params dw_conv_params;
+  dw_conv_params.ch_mult = filter_dims_[0];
+  /// input_offset is negative
+  dw_conv_params.input_offset = -input_quantize_info.zero;
+  dw_conv_params.output_offset = output_quantize_info.zero;
+  dw_conv_params.activation.min = -128;
+  dw_conv_params.activation.max = 127;
+  dw_conv_params.stride.w = strides_[1];
+  dw_conv_params.stride.h = strides_[0];
+  dw_conv_params.padding.w = padding_sizes_[1] / 2;
+  dw_conv_params.padding.h = padding_sizes_[0] / 2;
+  dw_conv_params.dilation.w = dilations_[1];
+  dw_conv_params.dilation.h = dilations_[0];
+
+  ScratchBuffer scratch_buffer(engine_config_);
+
+  cmsis_nn_per_channel_quant_params quant_params;
+  quant_params.multiplier = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
+  quant_params.shift = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
+  for (int32_t i = 0; i < output_dims[3]; ++i) {
+    quant_params.multiplier[i] = multiplier;
+    quant_params.shift[i] = shift;
+  }
+
+  MACE_ASSERT(input_dims_[0] == 1);
+  MACE_ASSERT(filter_dims_[0] == 1);
+  MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_dims_[0];
+  input_dims.h = input_dims_[1];
+  input_dims.w = input_dims_[2];
+  input_dims.c = input_dims_[3];
+  const int8_t *input_data = reinterpret_cast<const int8_t *>(input_);
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.n = filter_dims_[0];
+  filter_dims.h = filter_dims_[1];
+  filter_dims.w = filter_dims_[2];
+  filter_dims.c = filter_dims_[3];
+  const int8_t *filter_data = reinterpret_cast<const int8_t *>(filter_);
+
+  cmsis_nn_dims bias_dims;
+  bias_dims.n = 1;
+  bias_dims.h = 1;
+  bias_dims.w = 1;
+  bias_dims.c = output_dims[3];
+  int32_t *bias_data =
+      const_cast<int32_t *>(reinterpret_cast<const int32_t *>(bias_));
+  if (bias_data == NULL) {
+    bias_data = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
+    for (int32_t i = 0; i < bias_dims.c; ++i) {
+      bias_data[i] = 0;
+    }
+  }
+
+  cmsis_nn_dims cmn_output_dims;
+  cmn_output_dims.n = output_dims[0];
+  cmn_output_dims.h = output_dims[1];
+  cmn_output_dims.w = output_dims[2];
+  cmn_output_dims.c = filter_dims.c * filter_dims.n;
+  int8_t *output_data = reinterpret_cast<int8_t *>(output_);
+
+  cmsis_nn_context cmn_context;
+  cmn_context.size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
+      &dw_conv_params, &input_dims, &filter_dims, &cmn_output_dims);
+
+  if (cmn_context.size > 0) {
+    cmn_context.buf = scratch_buffer.GetBuffer<int8_t>(cmn_context.size);
+  } else {
+    cmn_context.buf = NULL;
+  }
+
+  arm_status status = arm_depthwise_conv_wrapper_s8(
+      &cmn_context, &dw_conv_params, &quant_params, &input_dims, input_data,
+      &filter_dims, filter_data, &bias_dims, bias_data, &cmn_output_dims,
+      output_data);
+  MACE_ASSERT(status == ARM_MATH_SUCCESS)
+      << "failed in arm_convolve_wrapper_s8";
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus ArmDepthwiseConv2dInt8Op::Run() {
+  int32_t output_dims[4] = {0};
+  InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims);
+  output_dims[3] *= input_dims_[3];
+  ResizeOutputShape(0, 4, output_dims);
+
+  MACE_RETURN_IF_ERROR(Compute(output_dims));
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce41a6ebc143442f002a8146f49bfcf25889ed37
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h
@@ -0,0 +1,35 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_
+
+#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
+#include "micro/ops/utils/activation.h"
+
+namespace micro {
+namespace ops {
+
+class ArmDepthwiseConv2dInt8Op : public DepthwiseConv2dBase {
+ public:
+  virtual MaceStatus Run();
+
+ private:
+  MaceStatus Compute(int32_t (&output_dims)[4]);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_
diff --git a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc
new file mode 100644
index 0000000000000000000000000000000000000000..6cb44e35a20410385c6f8143d22b84ba4f696063
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc
@@ -0,0 +1,109 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h"
+
+#include <arm_nnfunctions.h>
+
+#include "micro/base/logging.h"
+#include "micro/base/types.h"
+#include "micro/base/utils.h"
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+namespace micro {
+namespace ops {
+
+MaceStatus ArmEltwiseInt8Op::OnInit() {
+  MACE_ASSERT(GetInputSize() == 2);
+
+  input0_ = GetInputData<int8_t>(INPUT0);
+  input0_dims_ = GetInputShapeDims(INPUT0);
+  input0_dim_size_ = GetInputShapeDimSize(INPUT0);
+
+  input1_ = GetInputData<int8_t>(INPUT1);
+  input1_dims_ = GetInputShapeDims(INPUT1);
+  input1_dim_size_ = GetInputShapeDimSize(INPUT1);
+
+  output_ = GetOutputData<int8_t>(OUTPUT);
+
+  type_ = static_cast<eltwise::Type>(
+      GetArgByName("type", static_cast<int32_t>(NONE)));
+  coeff_ = GetRepeatArgByName<float>("coeff", &coeff_size_);
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus ArmEltwiseInt8Op::Run() {
+  MACE_ASSERT1(GetInputSize() == 2,
+               "ArmEltwiseInt8Op only supports 2 inputs");
+  MACE_ASSERT(input0_dim_size_ == input1_dim_size_);
+  MACE_ASSERT(base::ShapeIsEqual(input0_dims_, input1_dims_, input1_dim_size_));
+
+  MACE_RETURN_IF_ERROR(
+          ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_));
+
+  if (type_ == eltwise::SUM) {
+    QuantizeInfo input_quantize_info0 = GetInputQuantizeInfo(0);
+    QuantizeInfo input_quantize_info1 = GetInputQuantizeInfo(1);
+    QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
+
+    int32_t input0_offset = -input_quantize_info0.zero;
+    double input0_scale = input_quantize_info0.scale;
+    int32_t input1_offset = -input_quantize_info1.zero;
+    double input1_scale = input_quantize_info1.scale;
+    int32_t output_offset = output_quantize_info.zero;
+    double output_scale = output_quantize_info.scale;
+
+    int32_t left_shift = 20;
+
+    const double twice_max_input_scale =
+        2 * static_cast<double>(base::max(input0_scale, input1_scale));
+    const double real_input0_multiplier =
+        static_cast<double>(input0_scale) / twice_max_input_scale;
+    const double real_input1_multiplier =
+        static_cast<double>(input1_scale) / twice_max_input_scale;
+    const double real_output_multiplier =
+        twice_max_input_scale /
+        ((1 << left_shift) * static_cast<double>(output_scale));
+
+    int32_t input0_multiplier = 0;
+    int32_t input0_shift = 0;
+    QuantizeMultiplier(real_input0_multiplier, &input0_multiplier,
+                       &input0_shift);
+
+    int32_t input1_multiplier = 0;
+    int32_t input1_shift = 0;
+    QuantizeMultiplier(real_input1_multiplier, &input1_multiplier,
+                       &input1_shift);
+
+    int32_t output_multiplier = 0;
+    int32_t output_shift = 0;
+    QuantizeMultiplier(real_output_multiplier, &output_multiplier,
+                       &output_shift);
+
+    int32_t element_size = base::GetShapeSize(input0_dim_size_, input0_dims_);
+    arm_elementwise_add_s8(input0_, input1_, input0_offset, input0_multiplier,
+                           input0_shift, input1_offset, input1_multiplier,
+                           input1_shift, left_shift, output_, output_offset,
+                           output_multiplier, output_shift, -128, 127,
+                           element_size);
+  } else {
+    MACE_ASSERT1(false, "Unsupported ArmEltwiseInt8Op type");
+  }
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h
new file mode 100644
index 0000000000000000000000000000000000000000..6e8a0aea8ff6e4e36eab4d19ef968330a67df029
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h
@@ -0,0 +1,62 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_
+
+#include "micro/base/logger.h"
+#include "micro/base/logging.h"
+#include "micro/base/types.h"
+#include "micro/base/utils.h"
+#include "micro/framework/op_context.h"
+#include "micro/framework/operator.h"
+#include "micro/framework/scratch_buffer.h"
+#include "micro/model/const_tensor.h"
+#include "micro/model/net_def.h"
+
+namespace micro {
+namespace ops {
+
+class ArmEltwiseInt8Op : public framework::Operator {
+ public:
+  MaceStatus OnInit();
+
+  MaceStatus Run();
+
+ private:
+  const int8_t *input0_;
+  const int32_t *input0_dims_;
+  uint32_t input0_dim_size_;
+
+  const int8_t *input1_;
+  const int32_t *input1_dims_;
+  uint32_t input1_dim_size_;
+
+  int8_t *output_;
+
+  eltwise::Type type_;
+  const float *coeff_;
+  uint32_t coeff_size_;
+  int32_t scalar_input_index_;
+  bool nchw_;
+
+  MACE_OP_INPUT_TAGS(INPUT0, INPUT1);
+  MACE_OP_OUTPUT_TAGS(OUTPUT);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_
diff --git a/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e2dd8fd2dacde6dea88bad682a14643c9521055f
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc
@@ -0,0 +1,152 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h"
+
+#include <arm_nnfunctions.h>
+
+#include "micro/base/logger.h"
+#include "micro/base/logging.h"
+#include "micro/base/utils.h"
+#include "micro/framework/op_context.h"
+#include "micro/framework/scratch_buffer.h"
+#include "micro/model/argument.h"
+#include "micro/model/const_tensor.h"
+#include "micro/model/net_def.h"
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+namespace micro {
+namespace ops {
+
+MaceStatus ArmMatMulInt8Op::OnInit() {
+  transpose_a_ = GetArgByName("transpose_a", false);
+  transpose_b_ = GetArgByName("transpose_b", false);
+  input_a_ = GetInputData<int8_t>(INPUT_A);
+  input_b_ = GetInputData<int8_t>(INPUT_B);
+  output_ = GetOutputData<int8_t>(OUTPUT);
+
+  if (GetInputSize() >= 3) {
+    bias_ = GetInputData<int32_t>(BIAS);
+    bias_dim_size_ = GetInputShapeDimSize(BIAS);
+    bias_dims_ = GetInputShapeDims(BIAS);
+  } else {
+    bias_ = NULL;
+    bias_dim_size_ = 0;
+    bias_dims_ = NULL;
+  }
+
+  input_a_dim_size_ = GetInputShapeDimSize(INPUT_A);
+  input_b_dim_size_ = GetInputShapeDimSize(INPUT_B);
+
+  input_a_dims_ = GetInputShapeDims(INPUT_A);
+  input_b_dims_ = GetInputShapeDims(INPUT_B);
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus ArmMatMulInt8Op::Run() {
+  MACE_ASSERT(Validate());
+
+  MACE_ASSERT(input_a_dim_size_ == 2);
+  MACE_ASSERT(input_b_dim_size_ == 2);
+
+  MACE_ASSERT(input_a_dims_[0] == 1);
+
+  MACE_ASSERT(transpose_b_);
+  MACE_ASSERT(!transpose_a_);
+
+  const int32_t lhs_rows = input_a_dims_[0];
+  const int32_t rhs_rows = input_b_dims_[0];
+  const int32_t rhs_cols = input_b_dims_[1];
+
+  const int32_t rhs_t_cols = rhs_rows;
+
+  const int32_t rows = lhs_rows;
+  const int32_t cols = rhs_t_cols;
+
+  if (bias_ != NULL) {
+    MACE_ASSERT(bias_dim_size_ == 1);
+    MACE_ASSERT(bias_dims_[0] == cols);
+  }
+
+  int32_t *output_dims0 =
+      ScratchBuffer(engine_config_).GetBuffer<int32_t>(input_a_dim_size_);
+
+  output_dims0[0] = rows;
+  output_dims0[1] = cols;
+
+  MACE_RETURN_IF_ERROR(
+      ResizeOutputShape(OUTPUT, input_a_dim_size_, output_dims0));
+
+  QuantizeInfo input_quantize_info_a = GetInputQuantizeInfo(INPUT_A);
+  QuantizeInfo input_quantize_info_b = GetInputQuantizeInfo(INPUT_B);
+  QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
+
+  double double_multiplier = input_quantize_info_a.scale *
+                             input_quantize_info_b.scale /
+                             output_quantize_info.scale;
+  int32_t multiplier;
+  int32_t shift;
+  QuantizeMultiplier(double_multiplier, &multiplier, &shift);
+
+  ScratchBuffer scratch_buffer(engine_config_);
+
+  int32_t *bias = NULL;
+  if (bias_ == NULL) {
+    bias = scratch_buffer.GetBuffer<int32_t>(cols);
+    for (int32_t i = 0; i < cols; ++i) {
+      bias[i] = 0;
+    }
+  } else {
+    bias = const_cast<int32_t *>(bias_);
+  }
+
+  arm_status status = arm_nn_vec_mat_mult_t_s8(
+      input_a_, input_b_, bias, output_, -input_quantize_info_a.zero,
+      input_quantize_info_b.zero, output_quantize_info.zero, multiplier, shift,
+      rhs_cols, rhs_rows, -128, 127);
+
+  MACE_ASSERT(status == ARM_MATH_SUCCESS);
+
+  return MACE_SUCCESS;
+}
+
+bool ArmMatMulInt8Op::Validate() {
+  const int32_t lhs_rank = input_a_dim_size_;
+  const int32_t rhs_rank = input_b_dim_size_;
+  if (input_a_dim_size_ == input_b_dim_size_) {
+    for (uint32_t i = 0; i < input_a_dim_size_ - 2; ++i) {
+      MACE_ASSERT1(input_a_dims_[i] == input_b_dims_[i],
+                   "batch dimensions are not equal");
+    }
+  } else {
+    MACE_ASSERT1(input_a_dim_size_ == 2 || input_b_dim_size_ == 2,
+                 "Either lhs or rhs matrix should has rank 2 "
+                 "for non-batched matrix multiplication");
+  }
+
+  int32_t lhs_depth =
+      transpose_a_ ? input_a_dims_[lhs_rank - 2] : input_a_dims_[lhs_rank - 1];
+  int32_t rhs_depth =
+      transpose_b_ ? input_b_dims_[rhs_rank - 1] : input_b_dims_[rhs_rank - 2];
+  if (lhs_depth != rhs_depth) {
+    MACE_ASSERT1(false, "the number of A's column must be equal to B's row ");
+    return false;
+  }
+
+  return true;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h
new file mode 100644
index 0000000000000000000000000000000000000000..5b8bcf5ddeb8d4cd5ed35c6a51b170e245312a7b
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h
@@ -0,0 +1,55 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_
+
+#include "micro/framework/operator.h"
+
+namespace micro {
+namespace ops {
+class ArmMatMulInt8Op : public framework::Operator {
+ public:
+  MaceStatus OnInit();
+  MaceStatus Run();
+
+ private:
+  bool Validate();
+
+ private:
+  const int8_t *input_a_;
+  const int32_t *input_a_dims_;
+  uint32_t input_a_dim_size_;
+
+  const int8_t *input_b_;
+  const int32_t *input_b_dims_;
+  uint32_t input_b_dim_size_;
+
+  const int32_t *bias_;
+  const int32_t *bias_dims_;
+  uint32_t bias_dim_size_;
+
+  int8_t *output_;
+
+  bool transpose_a_;
+  bool transpose_b_;
+
+  MACE_OP_INPUT_TAGS(INPUT_A, INPUT_B, BIAS);
+  MACE_OP_OUTPUT_TAGS(OUTPUT);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_
diff --git a/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc
new file mode 100644
index 0000000000000000000000000000000000000000..7e5851622f2abeddd14813601f5d84a416cd1ed6
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc
@@ -0,0 +1,123 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h"
+
+#include <arm_nnfunctions.h>
+
+#include "micro/base/logging.h"
+#include "micro/base/utils.h"
+#include "micro/framework/scratch_buffer.h"
+#include "micro/include/utils/macros.h"
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+namespace micro {
+namespace ops {
+
+void ArmPoolingInt8Op::MaxPooling(const mifloat *input,
+                                  const int32_t *filter_hw,
+                                  const int32_t *stride_hw,
+                                  const int32_t *dilation_hw,
+                                  const int32_t *pad_hw) {
+  MACE_UNUSED(dilation_hw);
+
+  cmsis_nn_context ctx;
+  ctx.buf = NULL;
+  ctx.size = 0;
+
+  cmsis_nn_pool_params pool_params;
+  pool_params.activation.min = -128;
+  pool_params.activation.max = 127;
+  pool_params.stride.h = stride_hw[0];
+  pool_params.stride.w = stride_hw[1];
+  pool_params.padding.h = pad_hw[0];
+  pool_params.padding.w = pad_hw[1];
+
+  MACE_ASSERT(input_dims_[0] == 1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_dims_[0];
+  input_dims.h = input_dims_[1];
+  input_dims.w = input_dims_[2];
+  input_dims.c = input_dims_[3];
+  const int8_t *input_data = reinterpret_cast<const int8_t *>(input);
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.h = filter_hw[0];
+  filter_dims.w = filter_hw[1];
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_dims_[0];
+  output_dims.h = output_dims_[1];
+  output_dims.w = output_dims_[2];
+  output_dims.c = output_dims_[3];
+  int8_t *output_data = reinterpret_cast<int8_t *>(output_);
+
+  arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
+                  &output_dims, output_data);
+}
+
+void ArmPoolingInt8Op::AvgPooling(const mifloat *input,
+                                  const int32_t *filter_hw,
+                                  const int32_t *stride_hw,
+                                  const int32_t *dilation_hw,
+                                  const int32_t *pad_hw) {
+  MACE_UNUSED(dilation_hw);
+
+  const int32_t out_width = output_dims_[2];
+  const int32_t in_channels = input_dims_[3];
+
+  cmsis_nn_context ctx;
+  ctx.size = arm_avgpool_s8_get_buffer_size(out_width, in_channels);
+  ScratchBuffer scratch_buffer(engine_config_);
+  if (ctx.size > 0) {
+    ctx.buf = scratch_buffer.GetBuffer<int8_t>(ctx.size);
+  } else {
+    ctx.buf = NULL;
+  }
+
+  cmsis_nn_pool_params pool_params;
+  pool_params.activation.min = -128;
+  pool_params.activation.max = 127;
+  pool_params.stride.h = stride_hw[0];
+  pool_params.stride.w = stride_hw[1];
+  pool_params.padding.h = pad_hw[0];
+  pool_params.padding.w = pad_hw[1];
+
+  MACE_ASSERT(input_dims_[0] == 1);
+
+  cmsis_nn_dims input_dims;
+  input_dims.n = input_dims_[0];
+  input_dims.h = input_dims_[1];
+  input_dims.w = input_dims_[2];
+  input_dims.c = input_dims_[3];
+  const int8_t *input_data = reinterpret_cast<const int8_t *>(input);
+
+  cmsis_nn_dims filter_dims;
+  filter_dims.h = filter_hw[0];
+  filter_dims.w = filter_hw[1];
+
+  cmsis_nn_dims output_dims;
+  output_dims.n = output_dims_[0];
+  output_dims.h = output_dims_[1];
+  output_dims.w = output_dims_[2];
+  output_dims.c = output_dims_[3];
+  int8_t *output_data = reinterpret_cast<int8_t *>(output_);
+
+  arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
+                 &output_dims, output_data);
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h
new file mode 100644
index 0000000000000000000000000000000000000000..1b6bed3ac2b169c2c5b99c8e26e935915a9c939c
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h
@@ -0,0 +1,41 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_
+
+#include "micro/model/output_shape.h"
+#include "micro/ops/nhwc/base/pooling_base.h"
+
+namespace micro {
+namespace ops {
+
+class ArmPoolingInt8Op : public PoolingBase {
+ private:
+  void MaxPooling(const mifloat *input,
+                  const int32_t *filter_hw,
+                  const int32_t *stride_hw,
+                  const int32_t *dilation_hw,
+                  const int32_t *pad_hw);
+  void AvgPooling(const mifloat *input,
+                  const int32_t *filter_hw,
+                  const int32_t *stride_hw,
+                  const int32_t *dilation_hw,
+                  const int32_t *pad_hw);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_
diff --git a/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc
new file mode 100644
index 0000000000000000000000000000000000000000..e1d44bf1cdeb0aba53d0e14bf8bf9f1707034179
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc
@@ -0,0 +1,82 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h"
+
+#include <arm_nnfunctions.h>
+
+#include "micro/base/logging.h"
+#include "micro/base/utils.h"
+#include "micro/framework/op_context.h"
+#include "micro/model/net_def.h"
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+namespace micro {
+namespace ops {
+
+MaceStatus ArmSoftmaxInt8Op::OnInit() {
+  data_format_ = static_cast<DataFormat>(
+      GetArgByName("data_format", static_cast<int32_t>(NHWC)));
+  input_ = GetInputData<mifloat>(INPUT);
+  input_dims_ = GetInputShapeDims(INPUT);
+  input_dim_size_ = GetInputShapeDimSize(INPUT);
+  MACE_ASSERT(input_dim_size_ == 2);
+
+  output_ = GetOutputData<mifloat>(OUTPUT);
+
+  bool use_log = GetArgByName("use_log", false);
+  MACE_ASSERT1(!use_log, "The argument \"use_log\" is unsupported");
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus ArmSoftmaxInt8Op::Run() {
+  MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
+  // TODO(ZhangZhimin): Workarounds for AUTO data format
+  if (NHWC == data_format_ || AUTO == data_format_) {  // NHWC
+    return RunForNHWC();
+  } else {
+    MACE_NOT_IMPLEMENTED;
+    return MACE_UNSUPPORTED;
+  }
+}
+
+MaceStatus ArmSoftmaxInt8Op::RunForNHWC() {
+  int32_t class_size = input_dims_[input_dim_size_ - 1];
+
+  const int8_t *input_data = reinterpret_cast<const int8_t *>(input_);
+  int8_t *output_data = reinterpret_cast<int8_t *>(output_);
+
+  int32_t num_rows = input_dims_[0];
+
+  QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
+
+  int kInputDeltaIntBits = 5;
+  int32_t scale_q = static_cast<int32_t>(
+      base::min(static_cast<double>(input_quantize_info.scale) *
+                    (1 << (31 - kInputDeltaIntBits)),
+                (1ll << 31) - 1.0));
+  int32_t mult;
+  int32_t shift;
+  QuantizeMultiplier(scale_q, &mult, &shift);
+  int32_t diff_min = -128;
+
+  arm_softmax_s8(input_data, num_rows, class_size, mult, shift, diff_min,
+                 output_data);
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h
new file mode 100644
index 0000000000000000000000000000000000000000..00f33863e6ca79e0c2570c11050322671337e318
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h
@@ -0,0 +1,47 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_
+
+#include "micro/framework/operator.h"
+
+namespace micro {
+namespace ops {
+
+class ArmSoftmaxInt8Op : public framework::Operator {
+ public:
+  MaceStatus OnInit();
+  MaceStatus Run();
+
+ private:
+  MaceStatus RunForNHWC();
+
+ private:
+  const mifloat *input_;
+  const int32_t *input_dims_;
+  uint32_t input_dim_size_;
+
+  mifloat *output_;
+
+  DataFormat data_format_;
+
+  MACE_OP_INPUT_TAGS(INPUT);
+  MACE_OP_OUTPUT_TAGS(OUTPUT);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_
diff --git a/micro/ops/nhwc/cmsis_nn/dequantize.cc b/micro/ops/nhwc/cmsis_nn/dequantize.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9e4be8bcd872b51f5611feb9c0f5a18e13969971
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/dequantize.cc
@@ -0,0 +1,56 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/dequantize.h"
+
+#include "micro/base/logging.h"
+#include "micro/base/utils.h"
+#include "micro/framework/op_context.h"
+#include "micro/framework/operator.h"
+#include "micro/model/net_def.h"
+
+namespace micro {
+namespace ops {
+
+MaceStatus DequantizeOp::OnInit() {
+  input_ = GetInputData<int8_t>(INPUT);
+  input_dims_ = GetInputShapeDims(INPUT);
+  input_dim_size_ = GetInputShapeDimSize(INPUT);
+
+  output_ = GetOutputData<mifloat>(OUTPUT);
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus DequantizeOp::Run() {
+  MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
+
+  QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
+
+  float scale = input_quantize_info.scale;
+  int32_t zero_point = input_quantize_info.zero;
+
+  int32_t element_size = 1;
+  for (uint32_t i = 0; i < input_dim_size_; ++i) {
+    element_size *= input_dims_[i];
+  }
+  for (int32_t i = 0; i < element_size; ++i) {
+    output_[i] = scale * (input_[i] - zero_point);
+  }
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/dequantize.h b/micro/ops/nhwc/cmsis_nn/dequantize.h
new file mode 100644
index 0000000000000000000000000000000000000000..9459e2d11779d3157590e5db992ec71faf62fe0a
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/dequantize.h
@@ -0,0 +1,43 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_
+
+#include "micro/framework/operator.h"
+
+namespace micro {
+namespace ops {
+
+class DequantizeOp : public framework::Operator {
+ public:
+  MaceStatus OnInit();
+  MaceStatus Run();
+
+ private:
+  const int8_t *input_;
+  const int32_t *input_dims_;
+  uint32_t input_dim_size_;
+
+  mifloat *output_;
+
+  MACE_OP_INPUT_TAGS(INPUT);
+  MACE_OP_OUTPUT_TAGS(OUTPUT);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_
+
diff --git a/micro/ops/nhwc/cmsis_nn/quantize.cc b/micro/ops/nhwc/cmsis_nn/quantize.cc
new file mode 100644
index 0000000000000000000000000000000000000000..406672efd55d71c28360462887cc4376a938071a
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/quantize.cc
@@ -0,0 +1,64 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/quantize.h"
+
+#include <cmath>
+#include "micro/base/logging.h"
+#include "micro/base/utils.h"
+
+namespace micro {
+namespace ops {
+
+inline int8_t SaturateInt8(float value) {
+  int rounded_value = static_cast<int>(value);
+  if (rounded_value <= -128) {
+    return -128;
+  } else if (rounded_value >= 127) {
+    return 127;
+  } else {
+    return static_cast<int8_t>(rounded_value);
+  }
+}
+
+MaceStatus QuantizeOp::OnInit() {
+  input_ = GetInputData<mifloat>(INPUT);
+  input_dims_ = GetInputShapeDims(INPUT);
+  input_dim_size_ = GetInputShapeDimSize(INPUT);
+
+  output_ = GetOutputData<int8_t>(OUTPUT);
+
+  return MACE_SUCCESS;
+}
+
+MaceStatus QuantizeOp::Run() {
+  MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
+  QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
+  float recip_scale = 1.0f / output_quantize_info.scale;
+  int32_t zero_point = output_quantize_info.zero;
+
+  int32_t element_size = 1;
+  for (uint32_t i = 0; i < input_dim_size_; ++i) {
+    element_size *= input_dims_[i];
+  }
+
+  for (int32_t i = 0; i < element_size; ++i) {
+    output_[i] = SaturateInt8(roundf(recip_scale * input_[i] + zero_point));
+  }
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace ops
+}  // namespace micro
diff --git a/micro/ops/nhwc/cmsis_nn/quantize.h b/micro/ops/nhwc/cmsis_nn/quantize.h
new file mode 100644
index 0000000000000000000000000000000000000000..c9f7dda2f2bb17fd2dca7d719d081a624dc8cf1f
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/quantize.h
@@ -0,0 +1,42 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_
+
+#include "micro/framework/operator.h"
+
+namespace micro {
+namespace ops {
+
+class QuantizeOp : public framework::Operator {
+ public:
+  MaceStatus OnInit();
+  MaceStatus Run();
+
+ private:
+  const mifloat *input_;
+  const int32_t *input_dims_;
+  uint32_t input_dim_size_;
+
+  int8_t *output_;
+
+  MACE_OP_INPUT_TAGS(INPUT);
+  MACE_OP_OUTPUT_TAGS(OUTPUT);
+};
+
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_
diff --git a/micro/ops/nhwc/cmsis_nn/utilities.cc b/micro/ops/nhwc/cmsis_nn/utilities.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8845ff3c7e33d5a3a1abb81531b79114244eacf7
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/utilities.cc
@@ -0,0 +1,40 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "micro/ops/nhwc/cmsis_nn/utilities.h"
+
+#include <math.h>
+
+void QuantizeMultiplier(double double_multiplier,
+                        int32_t *quantized_multiplier,
+                        int32_t *shift) {
+  if (double_multiplier == 0.) {
+    *quantized_multiplier = 0;
+    *shift = 0;
+    return;
+  }
+  const double q = frexp(double_multiplier, reinterpret_cast<int *>(shift));
+  int64_t q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
+
+  if (q_fixed == (1ll << 31)) {
+    q_fixed /= 2;
+    ++*shift;
+  }
+
+  if (*shift < -31) {
+    *shift = 0;
+    q_fixed = 0;
+  }
+  *quantized_multiplier = static_cast<int32_t>(q_fixed);
+}
diff --git a/micro/ops/nhwc/cmsis_nn/utilities.h b/micro/ops/nhwc/cmsis_nn/utilities.h
new file mode 100644
index 0000000000000000000000000000000000000000..4eb7beaf16374e4e35cf290200e94ec9ee14e04d
--- /dev/null
+++ b/micro/ops/nhwc/cmsis_nn/utilities.h
@@ -0,0 +1,24 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_
+#define MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_
+
+#include "micro/base/types.h"
+
+void QuantizeMultiplier(double double_multiplier,
+                        int32_t *quantized_multiplier,
+                        int32_t *shift);
+
+#endif  // MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_
diff --git a/micro/ops/nhwc/pooling_ref.cc b/micro/ops/nhwc/pooling_ref.cc
index 270a7c0f782e9faebbaa5347ca0221e50f266dca..c3f97694e3575f7b35907fc797d5b31a94866cf3 100644
--- a/micro/ops/nhwc/pooling_ref.cc
+++ b/micro/ops/nhwc/pooling_ref.cc
@@ -49,7 +49,7 @@ void PoolingRefOp::MaxPooling(const mifloat *input,
         }
         for (int32_t fh = 0; fh < filter_hw[0]; ++fh) {
           int32_t inh = inh_addr + dilation_hw[0] * fh;
-          if (inh < 0 && inh >= in_height) {
+          if (inh < 0 || inh >= in_height) {
             continue;
           }
           int32_t in_h_base = (in_b_base + inh) * in_width;
diff --git a/micro/ops/reshape.cc b/micro/ops/reshape.cc
deleted file mode 100644
index 26e80d794197d4ab6be47fa33499b4ca22536baf..0000000000000000000000000000000000000000
--- a/micro/ops/reshape.cc
+++ /dev/null
@@ -1,108 +0,0 @@
-// Copyright 2020 The MACE Authors. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include "micro/ops/reshape.h"
-
-#include "micro/base/logging.h"
-#include "micro/base/utils.h"
-#include "micro/framework/scratch_buffer.h"
-
-namespace micro {
-namespace ops {
-
-namespace {
-
-MaceStatus ValidShapeData(const int32_t *input_dims,
-                          const uint32_t input_dim_size,
-                          int32_t *shape_data,
-                          const uint32_t shape_data_size) {
-  MACE_ASSERT(
-      input_dims != NULL && shape_data != NULL);
-  int32_t unknown_idx = -1;
-  int32_t product = 1;
-  const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
-
-  for (uint32_t i = 0; i < shape_data_size; ++i) {
-    if (shape_data[i] == -1) {
-      MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1");
-      unknown_idx = i;
-      shape_data[i] = 1;
-    } else {
-      MACE_ASSERT2(shape_data[i] >= 0, "Shape must be non-negative: ",
-                   shape_data[i]);
-      if (shape_data[i] == 0) {
-        MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range.");
-        shape_data[i] = input_dims[i];
-      }
-      product *= shape_data[i];
-    }
-  }
-
-  if (unknown_idx != -1) {
-    MACE_ASSERT1(product != 0,
-                 "Cannot infer shape if there is zero shape size.");
-    const int32_t missing = input_size / product;
-    MACE_ASSERT1(missing * product == input_size,
-                 "Input size not match reshaped tensor size");
-    shape_data[unknown_idx] = missing;
-  }
-
-  return MACE_SUCCESS;
-}
-
-}  // namespace
-
-MaceStatus ReshapeOp::OnInit() {
-  input_ = GetInputData<mifloat>(INPUT);
-  input_dims_ = GetInputShapeDims(INPUT);
-  input_dim_size_ = GetInputShapeDimSize(INPUT);
-
-  shape_ = GetInputData<int32_t>(SHAPE);
-  shape_dims_ = GetInputShapeDims(SHAPE);
-  shape_dim_size_ = GetInputShapeDimSize(SHAPE);
-
-  output_ = GetOutputData<mifloat>(OUTPUT);
-  return MACE_SUCCESS;
-}
-
-MaceStatus ReshapeOp::Run() {
-  const int32_t input_data_size =
-      base::GetShapeSize(input_dim_size_, input_dims_);
-  const int32_t shape_data_size =
-      base::GetShapeSize(shape_dim_size_, shape_dims_);
-
-  int32_t *shape_data =
-      ScratchBuffer(engine_config_).GetBuffer<int32_t>(shape_data_size);
-  base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t));
-
-  MACE_RETURN_IF_ERROR(ValidShapeData(input_dims_, input_dim_size_,
-                                      shape_data, shape_data_size));
-
-#ifndef MACE_MICRO_NDEBUG
-  const int32_t output_data_size = base::accumulate_multi(
-      shape_data, 0, static_cast<uint32_t>(shape_data_size));
-  if (input_data_size != output_data_size) {
-    LOG(FATAL) << "input_data_size(" << input_data_size
-               << ") != output_data_size(" << output_data_size
-               << "), please check the model.";
-  }
-#endif
-
-  // TODO(luxuhui): optimize this method by reusing buffer
-  base::memcpy(output_, input_, input_data_size * sizeof(mifloat));
-  return ResizeOutputShape(OUTPUT, shape_data_size, shape_data);
-}
-
-}  // namespace ops
-}  // namespace micro
diff --git a/micro/ops/reshape.h b/micro/ops/reshape.h
index 0e907b2f6dad0563ff035a727ee44cec57503766..dfaf73cdad883fadc7c6526b199dc89261d885d2 100644
--- a/micro/ops/reshape.h
+++ b/micro/ops/reshape.h
@@ -15,17 +15,104 @@
 #ifndef MICRO_OPS_RESHAPE_H_
 #define MICRO_OPS_RESHAPE_H_
 
+#include "micro/base/utils.h"
 #include "micro/framework/operator.h"
+#include "micro/framework/scratch_buffer.h"
 
 namespace micro {
 namespace ops {
+
+namespace internal {
+
+inline MaceStatus ValidShapeData(const int32_t *input_dims,
+                                 const uint32_t input_dim_size,
+                                 int32_t *shape_data,
+                                 const uint32_t shape_data_size) {
+  MACE_ASSERT(input_dims != NULL && shape_data != NULL);
+  int32_t unknown_idx = -1;
+  int32_t product = 1;
+  const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
+
+  for (uint32_t i = 0; i < shape_data_size; ++i) {
+    if (shape_data[i] == -1) {
+      MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1");
+      unknown_idx = i;
+      shape_data[i] = 1;
+    } else {
+      MACE_ASSERT2(shape_data[i] >= 0,
+                   "Shape must be non-negative: ", shape_data[i]);
+      if (shape_data[i] == 0) {
+        MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range.");
+        shape_data[i] = input_dims[i];
+      }
+      product *= shape_data[i];
+    }
+  }
+
+  if (unknown_idx != -1) {
+    MACE_ASSERT1(product != 0,
+                 "Cannot infer shape if there is zero shape size.");
+    const int32_t missing = input_size / product;
+    MACE_ASSERT1(missing * product == input_size,
+                 "Input size not match reshaped tensor size");
+    shape_data[unknown_idx] = missing;
+  }
+
+  return MACE_SUCCESS;
+}
+
+}  // namespace internal
+
+
+template <typename T>
 class ReshapeOp : public framework::Operator {
  public:
-  MaceStatus OnInit();
-  MaceStatus Run();
+  typedef T value_type;
+
+  MaceStatus OnInit() {
+    input_ = GetInputData<ReshapeOp::value_type>(INPUT);
+    input_dims_ = GetInputShapeDims(INPUT);
+    input_dim_size_ = GetInputShapeDimSize(INPUT);
+
+    shape_ = GetInputData<int32_t>(SHAPE);
+    shape_dims_ = GetInputShapeDims(SHAPE);
+    shape_dim_size_ = GetInputShapeDimSize(SHAPE);
+
+    output_ = GetOutputData<ReshapeOp::value_type>(OUTPUT);
+    return MACE_SUCCESS;
+  }
+
+  MaceStatus Run() {
+    const int32_t input_data_size =
+        base::GetShapeSize(input_dim_size_, input_dims_);
+    const int32_t shape_data_size =
+        base::GetShapeSize(shape_dim_size_, shape_dims_);
+
+    int32_t *shape_data =
+        ScratchBuffer(engine_config_).GetBuffer<int32_t>(shape_data_size);
+    base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t));
+
+    MACE_RETURN_IF_ERROR(internal::ValidShapeData(input_dims_, input_dim_size_,
+                                                  shape_data, shape_data_size));
+
+#ifndef MACE_MICRO_NDEBUG
+    const int32_t output_data_size = base::accumulate_multi(
+        shape_data, 0, static_cast<uint32_t>(shape_data_size));
+    if (input_data_size != output_data_size) {
+      LOG(FATAL) << "input_data_size(" << input_data_size
+                 << ") != output_data_size(" << output_data_size
+                 << "), please check the model.";
+    }
+#endif
+
+    // TODO(luxuhui): optimize this method by reusing buffer
+    base::memcpy(output_, input_,
+                 input_data_size * sizeof(ReshapeOp::value_type));
+    return ResizeOutputShape(OUTPUT, shape_data_size, shape_data);
+  }
 
  private:
-  const mifloat *input_;
+  const value_type *input_;
   const int32_t *input_dims_;
   uint32_t input_dim_size_;
 
@@ -33,7 +120,7 @@ class ReshapeOp : public framework::Operator {
   const int32_t *shape_dims_;
   uint32_t shape_dim_size_;
 
-  mifloat *output_;
+  value_type *output_;
 
   MACE_OP_INPUT_TAGS(INPUT, SHAPE);
   MACE_OP_OUTPUT_TAGS(OUTPUT);
diff --git a/micro/ops/softmax.cc b/micro/ops/softmax.cc
index 26a91f9019c15cef32bcdf28f3bcc78fcf90e825..925ed8f9ee7c5d9d9c5bc2aa88e3dce6819eb2c9 100644
--- a/micro/ops/softmax.cc
+++ b/micro/ops/softmax.cc
@@ -36,7 +36,8 @@ MaceStatus SoftmaxOp::OnInit() {
 
 MaceStatus SoftmaxOp::Run() {
   MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
-  if (NHWC == data_format_) {  // NHWC
+  // TODO(ZhangZhimin): Walkarounds for AUTO data format
+  if (NHWC == data_format_ || AUTO == data_format_) {  // NHWC
     return RunForNHWC();
   } else {
     MACE_NOT_IMPLEMENTED;
diff --git a/micro/pretrained_models/keras/README.md b/micro/pretrained_models/keras/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..145ff4fe5eec32b9283199348acde9c0d0596e25
--- /dev/null
+++ b/micro/pretrained_models/keras/README.md
@@ -0,0 +1,11 @@
+# Tensorflow Keras Models
+
+MACE Micro supports Keras models of Tensorflow 2.x
+
+## HAR
+
+The model is from <https://github.com/Shahnawax/HAR-CNN-Keras/>.
+
+## MNIST
+
+The mnist_keras.py depends on tensorflow 2.x and tensorflow_model_optimization. You can run this script to generate "mnist.h5" and "mnist-int8.h5" models
diff --git a/micro/pretrained_models/keras/har/har-int8.yml b/micro/pretrained_models/keras/har/har-int8.yml
new file mode 100644
index 0000000000000000000000000000000000000000..4fe0806c0faf3e35ad65b06cc5c548f4b7bf64da
--- /dev/null
+++ b/micro/pretrained_models/keras/har/har-int8.yml
@@ -0,0 +1,28 @@
+library_name: har
+target_abis: [host]
+model_graph_format: file
+model_data_format: file
+models:
+  har_int8:
+    platform: keras
+    model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5
+    model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd
+    subgraphs:
+      - input_tensors:
+          - conv2d_1_input:0
+        input_shapes:
+          - 1,90,3,1
+        input_ranges:
+          - -5,15
+        output_tensors:
+          - dense_3/Softmax:0
+        output_shapes:
+          - 1,6
+    runtime: cpu
+    limit_opencl_kernel_time: 0
+    nnlib_graph_mode: 0
+    obfuscate: 0
+    winograd: 0
+    quantize: 1
+    quantize_schema: int8
+    quantize_range_file: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.range
diff --git a/micro/pretrained_models/keras/har/har.yml b/micro/pretrained_models/keras/har/har.yml
new file mode 100644
index 0000000000000000000000000000000000000000..c817b1a8231a457771c03406cffb44a45af705a9
--- /dev/null
+++ b/micro/pretrained_models/keras/har/har.yml
@@ -0,0 +1,24 @@
+library_name: har
+target_abis: [host]
+model_graph_format: file
+model_data_format: file
+models:
+  har:
+    platform: keras
+    model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5
+    model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd
+    subgraphs:
+      - input_tensors:
+          - conv2d_1_input:0
+        input_shapes:
+          - 1,90,3,1
+        output_tensors:
+          - dense_3/Softmax:0
+        output_shapes:
+          - 1,6
+    runtime: cpu
+    data_type: fp32_fp32
+    limit_opencl_kernel_time: 0
+    nnlib_graph_mode: 0
+    obfuscate: 0
+    winograd: 0
diff --git a/micro/pretrained_models/keras/mnist/mnist-int8.yml b/micro/pretrained_models/keras/mnist/mnist-int8.yml
new file mode 100644
index 0000000000000000000000000000000000000000..5693182c8d5fc876c450d113d67dc8a4449d170a
--- /dev/null
+++ b/micro/pretrained_models/keras/mnist/mnist-int8.yml
@@ -0,0 +1,27 @@
+library_name: mnist
+target_abis: [host]
+model_graph_format: file
+model_data_format: file
+models:
+  mnist_int8:
+    platform: keras
+    model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/mnist/mnist_int8.h5
+    model_sha256_checksum: f56ae3b94c114719683c3bc55351f871d371e874d3a4d3224cc5299717e8b7fc
+    subgraphs:
+      - input_tensors:
+          - conv2d_input:0
+        input_shapes:
+          - 1,28,28,1
+        input_ranges:
+          - 0,1
+        output_tensors:
+          - quant_dense_1/Softmax:0
+        output_shapes:
+          - 1,10
+    runtime: cpu
+    limit_opencl_kernel_time: 0
+    nnlib_graph_mode: 0
+    obfuscate: 0
+    winograd: 0
+    quantize: 1
+    quantize_schema: int8
diff --git a/micro/pretrained_models/keras/mnist/mnist.yml b/micro/pretrained_models/keras/mnist/mnist.yml
new file mode 100644
index 0000000000000000000000000000000000000000..8331a240617886953dfbaf185eb9e82b3ccd614a
--- /dev/null
+++ b/micro/pretrained_models/keras/mnist/mnist.yml
@@ -0,0 +1,25 @@
+library_name: mnist
+target_abis: [host]
+model_graph_format: file
+model_data_format: file
+models:
+  mnist:
+    platform: keras
+    model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/mnist/mnist.h5
+    model_sha256_checksum: 85f2ffe02e1b9dd2d6ad3826b91ac134fed15b838bb92a1010f67c19d55b1f65
+    subgraphs:
+      - input_tensors:
+          - conv2d_input:0
+        input_shapes:
+          - 1,28,28,1
+        output_tensors:
+          - dense_1/Softmax:0
+        output_shapes:
+          - 1,10
+    runtime: cpu
+    data_type: fp32_fp32
+    limit_opencl_kernel_time: 0
+    nnlib_graph_mode: 0
+    obfuscate: 0
+    winograd: 0
+    quantize: 0
diff --git a/micro/pretrained_models/keras/mnist/mnist_keras.py b/micro/pretrained_models/keras/mnist/mnist_keras.py
new file mode 100644
index 0000000000000000000000000000000000000000..531bb2ecf1670d15fda09fc8691223d340cb0232
--- /dev/null
+++ b/micro/pretrained_models/keras/mnist/mnist_keras.py
@@ -0,0 +1,83 @@
+# Refer to https://www.tensorflow.org/model_optimization/guide
+
+import tensorflow.compat.v2 as tf
+import tensorflow_datasets as tfds
+import tensorflow_model_optimization as tfmot
+
+
+def normalize_img(image, label):
+    """Normalizes images: `uint8` -> `float32`."""
+    return tf.cast(image, tf.float32) / 255.0, label
+
+
+tfds.disable_progress_bar()
+tf.enable_v2_behavior()
+
+(ds_train, ds_test), ds_info = tfds.load(
+    "mnist",
+    split=["train", "test"],
+    shuffle_files=True,
+    as_supervised=True,
+    with_info=True,
+)
+
+ds_train = ds_train.map(
+    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE
+)
+ds_train = ds_train.cache()
+ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
+ds_train = ds_train.batch(128)
+ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
+
+ds_test = ds_test.map(
+    normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE
+)
+ds_test = ds_test.batch(128)
+ds_test = ds_test.cache()
+ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)
+
+model = tf.keras.models.Sequential(
+    [
+        tf.keras.layers.Conv2D(
+            filters=32, kernel_size=3, activation="relu", padding="same"
+        ),
+        tf.keras.layers.DepthwiseConv2D(
+            kernel_size=3, activation="relu", padding="same"
+        ),
+        tf.keras.layers.MaxPool2D(pool_size=2),
+        tf.keras.layers.Flatten(),
+        tf.keras.layers.Dense(128, activation="relu"),
+        tf.keras.layers.Dense(10, activation="softmax"),
+    ]
+)
+model.compile(
+    loss="sparse_categorical_crossentropy",
+    optimizer=tf.keras.optimizers.Adam(0.001),
+    metrics=["accuracy"],
+)
+
+model.fit(
+    ds_train,
+    epochs=6,
+    validation_data=ds_test,
+)
+
+model.save("mnist.h5")
+
+quantize_model = tfmot.quantization.keras.quantize_model
+
+quantization_aware_model = quantize_model(model)
+
+quantization_aware_model.compile(
+    optimizer="adam",
+    loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
+    metrics=["accuracy"],
+)
+
+quantization_aware_model.fit(
+    ds_train,
+    epochs=6,
+    validation_data=ds_test,
+)
+
+quantization_aware_model.save("mnist-int8.h5")
diff --git a/micro/pretrained_models/tensorflow/README.md b/micro/pretrained_models/tensorflow/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e476f09e433af538e9923b9635572fcf36db8fee
--- /dev/null
+++ b/micro/pretrained_models/tensorflow/README.md
@@ -0,0 +1,5 @@
+# Tensorflow frozen models
+
+## KWS
+
+The model is from <https://github.com/hyperconnect/TC-ResNet/>.
diff --git a/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8-bf16.yml b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8-bf16.yml
new file mode 100644
index 0000000000000000000000000000000000000000..9475c63d47af017dccf36a2e28f069a913f88474
--- /dev/null
+++ b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8-bf16.yml
@@ -0,0 +1,25 @@
+library_name: kws-tc_resnet8
+target_abis: [host]
+model_graph_format: file
+model_data_format: file
+models:
+  kws_tc_resnet8_bf16:
+    platform: tensorflow
+    model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/tensorflow/kws/kws-tc_resnet8.pb
+    model_sha256_checksum: c552cf79cb64d3c755ae7d867c1c78b13f55f7589d46def1f70ce657c0db0d79
+    subgraphs:
+      - input_tensors:
+          - input
+        input_shapes:
+          - 1,98,40,1
+        output_tensors:
+          - output/softmax
+        output_shapes:
+          - 1,12
+    runtime: cpu
+    data_type: bf16_fp32
+    limit_opencl_kernel_time: 0
+    nnlib_graph_mode: 0
+    obfuscate: 0
+    winograd: 0
+    quantize: 0
diff --git a/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml
new file mode 100644
index 0000000000000000000000000000000000000000..bdf38ffb5a4764425742ed6973d495d87e53efb6
--- /dev/null
+++ b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml
@@ -0,0 +1,25 @@
+library_name: kws-tc_resnet8
+target_abis: [host]
+model_graph_format: file
+model_data_format: file
+models:
+  kws_tc_resnet8:
+    platform: tensorflow
+    model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/tensorflow/kws/kws-tc_resnet8.pb
+    model_sha256_checksum: c552cf79cb64d3c755ae7d867c1c78b13f55f7589d46def1f70ce657c0db0d79
+    subgraphs:
+      - input_tensors:
+          - input
+        input_shapes:
+          - 1,98,40,1
+        output_tensors:
+          - output/softmax
+        output_shapes:
+          - 1,12
+    runtime: cpu
+    data_type: fp32_fp32
+    limit_opencl_kernel_time: 0
+    nnlib_graph_mode: 0
+    obfuscate: 0
+    winograd: 0
+    quantize: 0
diff --git a/micro/test/CMakeLists.txt b/micro/test/CMakeLists.txt
index 27d7f069bd132ef7c7ffe0bd53d704d1cbe67e55..3552a56650dd86636da9a137e37476cc1b70892d 100644
--- a/micro/test/CMakeLists.txt
+++ b/micro/test/CMakeLists.txt
@@ -1,7 +1,6 @@
 add_subdirectory(ccutils)
 
 if(NOT HEXAGON)
-  include(${PROJECT_SOURCE_DIR}/third_party/googletest/googletest.cmake)
   add_subdirectory(ccunit)
 endif()
 
diff --git a/micro/test/ccunit/CMakeLists.txt b/micro/test/ccunit/CMakeLists.txt
index f760593893b629e69aea968e1a292da32aab74cb..46a5eac21dfbf7b19cfca2dd7ad96abc5ca1d138 100644
--- a/micro/test/ccunit/CMakeLists.txt
+++ b/micro/test/ccunit/CMakeLists.txt
@@ -1,5 +1,3 @@
-
-
 add_executable(micro_ops_test
   micro/ops/stack_test.cc
   micro/ops/reshape_test.cc
@@ -20,25 +18,23 @@ add_executable(micro_ops_test
   micro/ops/softmax_test.cc
   micro/ops/bias_add_test.cc
   micro/ops/expand_dims_test.cc
+  micro/ops/concat_test.cc
 )
+
+if(MACE_MICRO_ENABLE_CMSIS)
+  target_link_libraries(micro_ops_test
+    PRIVATE micro_ops_nhwc_cmsis_nn
+  )
+  target_compile_options(micro_ops_test
+    PRIVATE "-DMACE_MICRO_ENABLE_CMSIS=ON"
+  )
+endif()
+
 target_link_libraries(micro_ops_test
   PRIVATE micro_base
-  PRIVATE micro_ops_for_test
+  PRIVATE micro_ops
+  PRIVATE micro_framework_for_optest
   PRIVATE micro_ccutils
   PRIVATE gtest
   PRIVATE gtest_main
 )
-
-if(MICRO_MODEL_NAME)
-  add_executable(micro_cc_test
-    micro/model/net_def_test.cc
-    micro/framework/graph_test.cc
-    micro/codegen/engine_test.cc
-  )
-  target_link_libraries(micro_cc_test
-    micro_engine
-    gtest
-    gtest_main
-  )
-  target_compile_definitions(micro_cc_test PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}")
-endif()
diff --git a/micro/test/ccunit/micro/codegen/engine_test.cc b/micro/test/ccunit/micro/codegen/engine_test.cc
index 60f2841f7426773f79027cd9d0f9307d80e57168..f21c3eb6812369a0888604ab02709aa85b24e15e 100644
--- a/micro/test/ccunit/micro/codegen/engine_test.cc
+++ b/micro/test/ccunit/micro/codegen/engine_test.cc
@@ -33,8 +33,9 @@ class EngineTest : public ::testing::Test {
 
 void OutputAllInfo() {
   MaceMicroEngine *micro_engine = NULL;
-  MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine)
-                  == MACE_SUCCESS && micro_engine != NULL);
+  MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine) ==
+                  MACE_SUCCESS &&
+              micro_engine != NULL);
 
   float input_buffer[1 * 1 * 128 * 9] = {0};
   int32_t input_shape[] = {1, 1, 128, 9};
diff --git a/micro/test/ccunit/micro/ops/eltwise_test.cc b/micro/test/ccunit/micro/ops/eltwise_test.cc
index 4d0fe7914f3edb2a796bbeabbe549e8be2f5e5a0..49cf75236447c19cf55d4ff8895d06efca3ef29d 100644
--- a/micro/test/ccunit/micro/ops/eltwise_test.cc
+++ b/micro/test/ccunit/micro/ops/eltwise_test.cc
@@ -14,8 +14,10 @@
 
 #include "gtest/gtest.h"
 #include "micro/ops/eltwise.h"
+#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h"
 #include "micro/ops/gtest_utils.h"
 #include "micro/ops/substitute_op.h"
+#include "micro/ops/test_quantize_utils.h"
 #include "micro/ops/test_utils.h"
 
 namespace micro {
@@ -494,6 +496,91 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
                       dims1121, output_9, expect_9, dims1123);
 }
 
+#ifdef MACE_MICRO_ENABLE_CMSIS
+
+namespace {
+
+void TestEltwiseQuantInt8(const int32_t *input_dims,
+                          const uint32_t input_dim_size,
+                          eltwise::Type type) {
+  int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims);
+  float *input0 = new float[shape_size];
+  float *input1 = new float[shape_size];
+  FillNormalRandomInput(input0, shape_size);
+  FillNormalRandomInput(input1, shape_size);
+  float *expect_output = new float[shape_size];
+  const uint32_t MAX_OUTPUT_NUM = 10;
+  int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
+
+  EltwiseOp<float> eltwsie_op;
+  framework::SubstituteOp substitude_op;
+  substitude_op.AddInput(input0, input_dims, input_dim_size)
+      .AddInput(input1, input_dims, input_dim_size)
+      .AddArg("type", static_cast<int>(type))
+      .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
+  eltwsie_op.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
+  eltwsie_op.Run();
+  uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
+
+  int8_t *input0_int8 = new int8_t[shape_size];
+  int8_t *input1_int8 = new int8_t[shape_size];
+  int8_t *output_int8 = new int8_t[shape_size];
+  float *output = new float[shape_size];
+  int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
+  QuantizeInfo input_quant_info0;
+  QuantizeInfo input_quant_info1;
+  AutoQuantizeInt8(input0, shape_size, input0_int8, &input_quant_info0.scale,
+                   &input_quant_info0.zero);
+  AutoQuantizeInt8(input1, shape_size, input1_int8, &input_quant_info1.scale,
+                   &input_quant_info1.zero);
+  QuantizeInfo output_quant_info = {0.0f, 0};
+  AdjustRangeInt8(expect_output, shape_size, &output_quant_info.scale,
+                  &output_quant_info.zero);
+
+  ArmEltwiseInt8Op eltwsie_op_int8;
+  framework::SubstituteOp substitude_op_int8;
+  substitude_op_int8
+      .AddInput(input0_int8, input_dims, input_dim_size, input_quant_info0)
+      .AddInput(input1_int8, input_dims, input_dim_size, input_quant_info1)
+      .AddArg("type", static_cast<int>(type))
+      .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
+  eltwsie_op_int8.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
+      NULL);
+  eltwsie_op_int8.Run();
+  uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
+
+  Dequantize(output_int8, shape_size, output_quant_info.scale,
+             output_quant_info.zero, output);
+
+  ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
+                      output, output_dims, output_dim_size, 0.1);
+
+  delete[] input0;
+  delete[] input1;
+  delete[] expect_output;
+  delete[] expect_output_dims;
+  delete[] input0_int8;
+  delete[] input1_int8;
+  delete[] output_int8;
+  delete[] output;
+  delete[] output_dims;
+}
+
+}  // namespace
+
+TEST_F(EltwiseOpTest, QuantInt8) {
+  const int32_t input_dims0[4] = {1, 32, 32, 16};
+  TestEltwiseQuantInt8(input_dims0, 4, eltwise::SUM);
+  const int32_t input_dims1[4] = {2, 31, 31, 17};
+  TestEltwiseQuantInt8(input_dims1, 4, eltwise::SUM);
+  const int32_t input_dims2[2] = {1, 31};
+  TestEltwiseQuantInt8(input_dims2, 2, eltwise::SUM);
+}
+
+#endif
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccunit/micro/ops/matmul_test.cc b/micro/test/ccunit/micro/ops/matmul_test.cc
index 4661352a97fa0c96b9403cc9d56bdc34e17a6282..86a0a0592d7803c5f20bead5fe279332aca55c56 100644
--- a/micro/test/ccunit/micro/ops/matmul_test.cc
+++ b/micro/test/ccunit/micro/ops/matmul_test.cc
@@ -15,8 +15,10 @@
 #include "gtest/gtest.h"
 #include "micro/ops/gtest_utils.h"
 #include "micro/ops/matmul.h"
+#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h"
 #include "micro/ops/substitute_op.h"
 #include "micro/ops/test_utils.h"
+#include "micro/ops/test_quantize_utils.h"
 
 namespace micro {
 namespace ops {
@@ -94,6 +96,94 @@ TEST_F(MatMulOpTest, SimpleCPU) {
   Simple2();
 }
 
+#ifdef MACE_MICRO_ENABLE_CMSIS
+
+namespace {
+
+void TestMatMulQuantInt8(int32_t lhs_rows, int32_t lhs_cols, int32_t rhs_cols) {
+  uint32_t input0_size = lhs_rows * lhs_cols;
+  uint32_t input1_size = lhs_cols * rhs_cols;
+  uint32_t output_size = lhs_rows * rhs_cols;
+  float *input0 = new float[input0_size];
+  float *input1 = new float[input1_size];
+  FillNormalRandomInput(input0, input0_size);
+  FillNormalRandomInput(input1, input1_size);
+  float *expect_output = new float[output_size];
+  const uint32_t MAX_OUTPUT_NUM = 10;
+  int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
+
+  const int32_t input0_dims[2] = {lhs_rows, lhs_cols};
+  // mat0 * tranpose(mat1)
+  const int32_t input1_dims[2] = {rhs_cols, lhs_cols};
+
+  MatMulOp matmul_op;
+  framework::SubstituteOp substitude_op;
+  substitude_op.AddInput(input0, input0_dims, 2)
+      .AddInput(input1, input1_dims, 2)
+      .AddArg("transpose_a", false)
+      .AddArg("transpose_b", true)
+      .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
+  matmul_op.Init(NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
+                 NULL);
+  matmul_op.Run();
+  uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
+
+  int8_t *input0_int8 = new int8_t[input0_size];
+  int8_t *input1_int8 = new int8_t[input1_size];
+  int8_t *output_int8 = new int8_t[output_size];
+  float *output = new float[output_size];
+  int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
+  QuantizeInfo input_quant_info0;
+  QuantizeInfo input_quant_info1;
+  AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
+                   &input_quant_info0.zero);
+  AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
+                            &input_quant_info1.scale);
+  QuantizeInfo output_quant_info = {0.0f, 0};
+  AdjustRangeInt8(expect_output, output_size, &output_quant_info.scale,
+                  &output_quant_info.zero);
+
+  ArmMatMulInt8Op matmul_op_int8;
+  framework::SubstituteOp substitude_op_int8;
+  substitude_op_int8.AddInput(input0_int8, input0_dims, 2, input_quant_info0)
+      .AddInput(input1_int8, input1_dims, 2, input_quant_info1)
+      .AddArg("transpose_a", false)
+      .AddArg("transpose_b", true)
+      .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
+  matmul_op_int8.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
+      NULL);
+  matmul_op_int8.Run();
+  uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
+
+  Dequantize(output_int8, output_size, output_quant_info.scale,
+             output_quant_info.zero, output);
+
+  ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
+                      output, output_dims, output_dim_size, 0.1);
+
+  delete[] input0;
+  delete[] input1;
+  delete[] expect_output;
+  delete[] expect_output_dims;
+  delete[] input0_int8;
+  delete[] input1_int8;
+  delete[] output_int8;
+  delete[] output;
+  delete[] output_dims;
+}
+
+}  // namespace
+
+TEST_F(MatMulOpTest, QuantInt8) {
+  TestMatMulQuantInt8(1, 8, 4);
+  TestMatMulQuantInt8(1, 1001, 63);
+  // WARNING(ZhangZhimin): Batch inputs is unsupported
+  // TestMatMulQuantInt8(3, 100, 100);
+}
+
+#endif
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc
index 067420dc0b81cb9649175597600a231bc3a39066..e26b8cae28bf30248f3462e71c78e052ba5c875f 100644
--- a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc
+++ b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc
@@ -15,8 +15,10 @@
 #include "gtest/gtest.h"
 #include "micro/ops/gtest_utils.h"
 #include "micro/ops/nhwc/conv_2d_ref.h"
+#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h"
 #include "micro/ops/substitute_op.h"
 #include "micro/ops/test_utils.h"
+#include "micro/ops/test_quantize_utils.h"
 
 namespace micro {
 namespace ops {
@@ -315,6 +317,141 @@ TEST_F(Conv2dOpTest, CPUConv1x1) {
   TestConv1x1();
 }
 
+#ifdef MACE_MICRO_ENABLE_CMSIS
+
+namespace {
+
+void TestConv2dQuantInt8(const int32_t batch,
+                         const int32_t out_channels,
+                         const int32_t in_channels,
+                         const int32_t in_height,
+                         const int32_t in_width,
+                         const int32_t kernel_height,
+                         const int32_t kernel_width,
+                         enum Padding padding_type,
+                         const int32_t stride_height,
+                         const int32_t stride_width,
+                         const int32_t dilation_height,
+                         const int32_t dilation_width) {
+  uint32_t input0_size = batch * in_height * in_width * in_channels;
+  uint32_t input1_size =
+      out_channels * kernel_height * kernel_width * in_channels;
+  uint32_t max_output_size = batch * out_channels *
+                             (in_height + kernel_height * dilation_height) *
+                             (in_width + kernel_width * dilation_width);
+  int32_t bias_size = out_channels;
+  float *input0 = new float[input0_size];
+  float *input1 = new float[input1_size];
+  float *bias = new float[bias_size];
+  FillNormalRandomInput(input0, input0_size);
+  FillNormalRandomInput(input1, input1_size);
+  FillNormalRandomInput(bias, bias_size);
+  float *expect_output = new float[max_output_size];
+  const uint32_t MAX_OUTPUT_NUM = 10;
+  int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
+
+  const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels};
+  const int32_t input1_dims[4] = {out_channels, kernel_height, kernel_width,
+                                  in_channels};
+  const int32_t bias_dims[1] = {bias_size};
+
+  const int32_t strides[2] = {stride_height, stride_width};
+  const int32_t dilations[2] = {dilation_height, dilation_width};
+
+  Conv2dRefOp conv2d_op;
+  framework::SubstituteOp substitude_op;
+  substitude_op.AddInput(input0, input0_dims, 4)
+      .AddInput(input1, input1_dims, 4)
+      .AddInput(bias, bias_dims, 1)
+      .AddArg("padding", padding_type)
+      .AddRepeatArg("strides", strides, 2)
+      .AddRepeatArg("dilations", dilations, 2)
+      .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
+  conv2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
+                 NULL);
+  conv2d_op.Run();
+  uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
+  uint32_t exepct_output_size =
+      base::GetShapeSize(expect_output_dim_size, expect_output_dims);
+
+  int8_t *input0_int8 = new int8_t[input0_size];
+  int8_t *input1_int8 = new int8_t[input1_size];
+  int32_t *bias_int32 = new int32_t[bias_size];
+  int8_t *output_int8 = new int8_t[max_output_size];
+  float *output = new float[max_output_size];
+  int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
+  QuantizeInfo input_quant_info0;
+  QuantizeInfo input_quant_info1;
+  AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
+                   &input_quant_info0.zero);
+  AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
+                            &input_quant_info1.scale);
+  QuantizeInfo output_quant_info = {0.0f, 0};
+  AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale,
+                  &output_quant_info.zero);
+  float bias_scale = input_quant_info0.scale * input_quant_info1.scale;
+  QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32);
+
+  ArmConv2dInt8Op conv2d_op_int8;
+  framework::SubstituteOp substitude_op_int8;
+  substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0)
+      .AddInput(input1_int8, input1_dims, 4, input_quant_info1)
+      .AddInput(bias_int32, bias_dims, 1)
+      .AddArg("padding", padding_type)
+      .AddRepeatArg("strides", strides, 2)
+      .AddRepeatArg("dilations", dilations, 2)
+      .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
+  conv2d_op_int8.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
+      NULL);
+  conv2d_op_int8.Run();
+  uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
+
+  uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
+  Dequantize(output_int8, output_size, output_quant_info.scale,
+             output_quant_info.zero, output);
+
+  ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
+                      output, output_dims, output_dim_size, 0.1);
+
+  delete[] input0;
+  delete[] input1;
+  delete[] bias;
+  delete[] expect_output;
+  delete[] expect_output_dims;
+  delete[] input0_int8;
+  delete[] input1_int8;
+  delete[] bias_int32;
+  delete[] output_int8;
+  delete[] output;
+  delete[] output_dims;
+}
+
+}  // namespace
+
+TEST_F(Conv2dOpTest, QuantInt8) {
+  TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, VALID, 1, 1, 1, 1);
+  TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
+  TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, FULL, 1, 1, 1, 1);
+  TestConv2dQuantInt8(1, 128, 64, 32, 54, 3, 3, FULL, 1, 1, 1, 1);
+  TestConv2dQuantInt8(1, 128, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1);
+  TestConv2dQuantInt8(1, 128, 64, 14, 13, 5, 5, SAME, 2, 2, 1, 1);
+  TestConv2dQuantInt8(1, 128, 257, 28, 28, 3, 3, SAME, 1, 1, 1, 1);
+  TestConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1);
+  TestConv2dQuantInt8(1, 2, 1, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1);
+  TestConv2dQuantInt8(1, 128, 1, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1);
+
+  // dilations is unsupported
+  // TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 2);
+  // TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 1);
+
+  // batch must be 1
+  // TestConv2dQuantInt8(2, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
+  // TestConv2dQuantInt8(4, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
+}
+
+#endif
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc
index 3583f4c4f128a7aee1f5db6f91aacb6f5b4a361c..7f62ffef6b3bc879da741d89a5b780551464540c 100644
--- a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc
+++ b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc
@@ -15,8 +15,10 @@
 #include "gtest/gtest.h"
 #include "micro/ops/gtest_utils.h"
 #include "micro/ops/nhwc/depthwise_conv_2d_ref.h"
+#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h"
 #include "micro/ops/substitute_op.h"
 #include "micro/ops/test_utils.h"
+#include "micro/ops/test_quantize_utils.h"
 
 namespace micro {
 namespace ops {
@@ -107,6 +109,146 @@ TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) {
   MultiC2ValidTest();
 }
 
+#ifdef MACE_MICRO_ENABLE_CMSIS
+
+namespace {
+
+void TestDepthwiseConv2dQuantInt8(const int32_t batch,
+                                  const int32_t multiplier,
+                                  const int32_t in_channels,
+                                  const int32_t in_height,
+                                  const int32_t in_width,
+                                  const int32_t kernel_height,
+                                  const int32_t kernel_width,
+                                  enum Padding padding_type,
+                                  const int32_t stride_height,
+                                  const int32_t stride_width,
+                                  const int32_t dilation_height,
+                                  const int32_t dilation_width) {
+  uint32_t input0_size = batch * in_height * in_width * in_channels;
+  uint32_t input1_size =
+      multiplier * kernel_height * kernel_width * in_channels;
+  uint32_t max_output_size = batch * multiplier * in_channels *
+                             (in_height + kernel_height * dilation_height) *
+                             (in_width + kernel_width * dilation_width);
+  int32_t bias_size = multiplier * in_channels;
+  float *input0 = new float[input0_size];
+  float *input1 = new float[input1_size];
+  float *bias = new float[bias_size];
+  FillNormalRandomInput(input0, input0_size);
+  FillNormalRandomInput(input1, input1_size);
+  FillNormalRandomInput(bias, bias_size);
+  float *expect_output = new float[max_output_size];
+  const uint32_t MAX_OUTPUT_NUM = 10;
+  int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
+
+  const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels};
+  const int32_t input1_dims[4] = {multiplier, kernel_height, kernel_width,
+                                  in_channels};
+  const int32_t bias_dims[1] = {bias_size};
+
+  const int32_t strides[2] = {stride_height, stride_width};
+  const int32_t dilations[2] = {dilation_height, dilation_width};
+
+  DepthwiseConv2dRefOp depthwise_conv2d_op;
+  framework::SubstituteOp substitude_op;
+  substitude_op.AddInput(input0, input0_dims, 4)
+      .AddInput(input1, input1_dims, 4)
+      .AddInput(bias, bias_dims, 1)
+      .AddArg("padding", padding_type)
+      .AddRepeatArg("strides", strides, 2)
+      .AddRepeatArg("dilations", dilations, 2)
+      .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
+  depthwise_conv2d_op.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
+  depthwise_conv2d_op.Run();
+  uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
+  uint32_t exepct_output_size =
+      base::GetShapeSize(expect_output_dim_size, expect_output_dims);
+
+  int8_t *input0_int8 = new int8_t[input0_size];
+  int8_t *input1_int8 = new int8_t[input1_size];
+  int32_t *bias_int32 = new int32_t[bias_size];
+  int8_t *output_int8 = new int8_t[max_output_size];
+  float *output = new float[max_output_size];
+  int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
+  QuantizeInfo input_quant_info0;
+  QuantizeInfo input_quant_info1;
+  AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
+                   &input_quant_info0.zero);
+  AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
+                            &input_quant_info1.scale);
+  QuantizeInfo output_quant_info = {0.0f, 0};
+  AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale,
+                  &output_quant_info.zero);
+  float bias_scale = input_quant_info0.scale * input_quant_info1.scale;
+  QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32);
+
+  ArmDepthwiseConv2dInt8Op depthwise_conv2d_op_int8;
+  framework::SubstituteOp substitude_op_int8;
+  substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0)
+      .AddInput(input1_int8, input1_dims, 4, input_quant_info1)
+      .AddInput(bias_int32, bias_dims, 1)
+      .AddArg("padding", padding_type)
+      .AddRepeatArg("strides", strides, 2)
+      .AddRepeatArg("dilations", dilations, 2)
+      .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
+  depthwise_conv2d_op_int8.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
+      NULL);
+  depthwise_conv2d_op_int8.Run();
+  uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
+
+  uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
+  Dequantize(output_int8, output_size, output_quant_info.scale,
+             output_quant_info.zero, output);
+
+  ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
+                      output, output_dims, output_dim_size, 0.1);
+
+  delete[] input0;
+  delete[] input1;
+  delete[] bias;
+  delete[] expect_output;
+  delete[] expect_output_dims;
+  delete[] input0_int8;
+  delete[] input1_int8;
+  delete[] bias_int32;
+  delete[] output_int8;
+  delete[] output;
+  delete[] output_dims;
+}
+
+}  // namespace
+
+TEST_F(DepthwiseConv2dOpTest, QuantInt8) {
+  TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, VALID, 1, 1, 1, 1);
+  TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, SAME, 1, 1, 1, 1);
+  TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, FULL, 1, 1, 1, 1);
+
+  TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1);
+  TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 5, 5, SAME, 2, 2, 1, 1);
+  TestDepthwiseConv2dQuantInt8(1, 1, 256, 28, 28, 3, 3, SAME, 1, 1, 1, 1);
+  TestDepthwiseConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1);
+
+  TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1);
+  TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1);
+
+  // dilations is unsupported
+  // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 3, 3, VALID, 1, 1, 2, 2);
+  // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 1, 3, 5);
+  // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 3, 3, 1);
+
+  // batch must be 1
+  // TestDepthwiseConv2dQuantInt8(3, 1, 128, 56, 56, 3, 3, SAME, 2, 2);
+
+  // multiplier must be 1
+  // TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 1, 1);
+  // TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 2, 2);
+}
+
+#endif
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc
index d7f7db329a8d98eafe7d65f383f0c72c2f6a3044..74e3f15e8e3fe461b86c6b0228ff1156e3402a80 100644
--- a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc
+++ b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc
@@ -16,7 +16,9 @@
 #include "micro/ops/gtest_utils.h"
 #include "micro/ops/nhwc/pooling_ref.h"
 #include "micro/ops/nhwc/pooling_s4.h"
+#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h"
 #include "micro/ops/substitute_op.h"
+#include "micro/ops/test_quantize_utils.h"
 #include "micro/ops/test_utils.h"
 
 namespace micro {
@@ -203,6 +205,134 @@ TEST_F(PoolingOpTest, TestPoolingOpSameAvg) {
   TestPoolingOpSameAvg();
 }
 
+#ifdef MACE_MICRO_ENABLE_CMSIS
+
+namespace {
+
+void TestPoolingQuantInt8(const int32_t *input_dims,
+                          const uint32_t input_dim_size,
+                          const int32_t *kernels,
+                          const int32_t *strides,
+                          Padding padding,
+                          PoolingType pooling_type) {
+  int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
+  int32_t max_output_size = input_dims[0] * input_dims[3] *
+                            (input_dims[1] + kernels[0]) *
+                            (input_dims[2] + kernels[1]);
+
+  float *input = new float[input_size];
+  FillNormalRandomInput(input, input_size);
+  float *expect_output = new float[max_output_size];
+  const uint32_t MAX_OUTPUT_DIM_SIZE = 100;
+  int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE];
+
+  const int32_t dilations[2] = {1, 1};
+
+  PoolingRefOp pooling_op;
+  framework::SubstituteOp substitude_op;
+  substitude_op.AddInput(input, input_dims, input_dim_size)
+      .AddRepeatArg("strides", strides, 2)
+      .AddRepeatArg("kernels", kernels, 2)
+      .AddRepeatArg("dilations", dilations, 2)
+      .AddArg("padding", padding)
+      .AddArg("pooling_type", pooling_type)
+      .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_DIM_SIZE);
+  pooling_op.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
+  pooling_op.Run();
+  uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
+
+  int8_t *input_int8 = new int8_t[input_size];
+  int8_t *output_int8 = new int8_t[max_output_size];
+  float *output = new float[max_output_size];
+  int32_t *output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE];
+  QuantizeInfo input_quant_info;
+  AutoQuantizeInt8(input, input_size, input_int8, &input_quant_info.scale,
+               &input_quant_info.zero);
+  QuantizeInfo output_quant_info = input_quant_info;
+
+  ArmPoolingInt8Op pooling_op_int8;
+  framework::SubstituteOp substitude_op_int8;
+  substitude_op_int8
+      .AddInput(input_int8, input_dims, input_dim_size, input_quant_info)
+      .AddRepeatArg("strides", strides, 2)
+      .AddRepeatArg("kernels", kernels, 2)
+      .AddRepeatArg("dilations", dilations, 2)
+      .AddArg("padding", padding)
+      .AddArg("pooling_type", pooling_type)
+      .AddOutput(output_int8, output_dims, MAX_OUTPUT_DIM_SIZE,
+                 output_quant_info);
+  pooling_op_int8.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
+      NULL);
+  pooling_op_int8.Run();
+  uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
+
+  uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
+  Dequantize(output_int8, output_size, output_quant_info.scale,
+             output_quant_info.zero, output);
+
+  ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
+                      output, output_dims, output_dim_size, 0.1);
+
+  delete[] input;
+  delete[] expect_output;
+  delete[] expect_output_dims;
+  delete[] input_int8;
+  delete[] output_int8;
+  delete[] output;
+  delete[] output_dims;
+}
+
+}  // namespace
+TEST_F(PoolingOpTest, Quant) {
+  const int32_t input_dims0[4] = {1, 7, 7, 1024};
+  const int32_t kernels0[2] = {7, 7};
+  const int32_t strides0[2] = {1, 1};
+  TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID,
+                       PoolingType::AVG);
+  TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID,
+                       PoolingType::MAX);
+  TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::FULL,
+                       PoolingType::AVG);
+  TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::SAME,
+                       PoolingType::MAX);
+  const int32_t input_dims1[4] = {1, 3, 3, 2};
+  const int32_t kernels1[2] = {3, 3};
+  const int32_t strides1[2] = {1, 1};
+  TestPoolingQuantInt8(input_dims1, 4, kernels1, strides1, Padding::SAME,
+                       PoolingType::AVG);
+  const int32_t input_dims2[4] = {1, 3, 3, 2};
+  const int32_t kernels2[2] = {2, 3};
+  const int32_t strides2[2] = {1, 2};
+  TestPoolingQuantInt8(input_dims2, 4, kernels2, strides2, Padding::SAME,
+                       PoolingType::MAX);
+  // WARNING(ZhangZhimin): Batch inputs is unsupported
+  // const int32_t input_dims3[4] = {3,15,15,128};
+  // const int32_t kernels3[2] = {4, 4};
+  // const int32_t strides3[2] = {4, 4};
+  // TestPoolingQuantInt8(input_dims3, 4, kernels3, strides3, Padding::SAME,
+  //                      PoolingType::AVG);
+  // const int32_t input_dims4[4] = {3,15,15,128};
+  // const int32_t kernels4[2] = {4, 4};
+  // const int32_t strides4[2] = {4, 4};
+  // TestPoolingQuantInt8(input_dims4, 4, kernels4, strides4, Padding::SAME,
+  //                      PoolingType::MAX);
+  const int32_t input_dims5[4] = {1, 31, 31, 127};
+  const int32_t kernels5[2] = {2, 2};
+  const int32_t strides5[2] = {3, 3};
+  TestPoolingQuantInt8(input_dims5, 4, kernels5, strides5, Padding::SAME,
+                       PoolingType::AVG);
+  const int32_t input_dims6[4] = {1, 31, 31, 127};
+  const int32_t kernels6[2] = {2, 2};
+  const int32_t strides6[2] = {3, 3};
+  TestPoolingQuantInt8(input_dims6, 4, kernels6, strides6, Padding::SAME,
+                       PoolingType::MAX);
+}
+
+#endif
+
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccunit/micro/ops/reshape_test.cc b/micro/test/ccunit/micro/ops/reshape_test.cc
index aa05281d2d383b4d0bd909d2f3788515ec329693..cbe77e6e6a596176d7003da943d4bebee94b0b4d 100644
--- a/micro/test/ccunit/micro/ops/reshape_test.cc
+++ b/micro/test/ccunit/micro/ops/reshape_test.cc
@@ -33,7 +33,7 @@ void TestReshapeOp(
     T *y, int32_t *y_dims, const uint32_t y_dim_size,
     const T *e, const int32_t *e_dims, const uint32_t e_dim_size) {
 
-  ReshapeOp reshape_op;
+  ReshapeOp<T> reshape_op;
   framework::SubstituteOp substitude_op;
   substitude_op.AddInput(input, input_dims, input_dim_size)
       .AddInput(shape, shape_dims, 1)
diff --git a/micro/test/ccunit/micro/ops/softmax_test.cc b/micro/test/ccunit/micro/ops/softmax_test.cc
index 0590256fddded792a04adf72b8c2f63ac4deb198..32facb83535e323cbcebdb56a0808e0a7085acae 100644
--- a/micro/test/ccunit/micro/ops/softmax_test.cc
+++ b/micro/test/ccunit/micro/ops/softmax_test.cc
@@ -13,9 +13,11 @@
 // limitations under the License.
 
 #include "gtest/gtest.h"
-#include "micro/ops/gtest_utils.h"
 #include "micro/ops/softmax.h"
+#include "micro/ops/gtest_utils.h"
+#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h"
 #include "micro/ops/substitute_op.h"
+#include "micro/ops/test_quantize_utils.h"
 #include "micro/ops/test_utils.h"
 
 namespace micro {
@@ -49,15 +51,89 @@ void Simple(bool use_log = false) {
       &substitude_op), NULL);
   softmax_op.Run();
 
-  ExpectTensorNear<float>(output, output_dims, output_dim_size,
-                          expect, expect_dims, output_dim_size, 1e-5);
+  ExpectTensorNear<float>(output, output_dims, output_dim_size, expect,
+                          expect_dims, output_dim_size, 1e-5);
 }
 
 }  // namespace
 
+
 TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); }
 TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); }
 
+#ifdef MACE_MICRO_ENABLE_CMSIS
+
+namespace {
+
+void TestSoftmaxQuantInt8(const int32_t *input_dims,
+                          const uint32_t input_dim_size,
+                          bool use_log = false) {
+  int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims);
+  float *input = new float[shape_size];
+  FillNormalRandomInput(input, shape_size);
+  float *expect_output = new float[shape_size];
+  const uint32_t MAX_OUTPUT_NUM = 10;
+  int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
+
+  SoftmaxOp softmax_op;
+  framework::SubstituteOp substitude_op;
+  substitude_op.AddInput(input, input_dims, input_dim_size)
+      .AddArg("use_log", static_cast<int>(use_log))
+      .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
+  softmax_op.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
+  softmax_op.Run();
+  uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
+
+  int8_t *input_int8 = new int8_t[shape_size];
+  int8_t *output_int8 = new int8_t[shape_size];
+  float *output = new float[shape_size];
+  int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
+  QuantizeInfo input_quant_info;
+  AutoQuantizeInt8(input, shape_size, input_int8, &input_quant_info.scale,
+               &input_quant_info.zero);
+  QuantizeInfo output_quant_info = {1.0f / 255.0f, -128};
+
+  ArmSoftmaxInt8Op softmax_op_int8;
+  framework::SubstituteOp substitude_op_int8;
+  substitude_op_int8
+      .AddInput(input_int8, input_dims, input_dim_size, input_quant_info)
+      .AddArg("use_log", static_cast<int>(use_log))
+      .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
+  softmax_op_int8.Init(
+      NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
+      NULL);
+  softmax_op_int8.Run();
+  uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
+
+  Dequantize(output_int8, shape_size, output_quant_info.scale,
+             output_quant_info.zero, output);
+
+  ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
+                      output, output_dims, output_dim_size, 0.1);
+
+  delete[] input;
+  delete[] expect_output;
+  delete[] expect_output_dims;
+  delete[] input_int8;
+  delete[] output_int8;
+  delete[] output;
+  delete[] output_dims;
+}
+
+}  // namespace
+
+TEST_F(SoftmaxOpTest, QuantInt8) {
+  const int32_t input_dims0[2] = {5, 10};
+  TestSoftmaxQuantInt8(input_dims0, 2);
+  const int32_t input_dims1[2] = {50, 100};
+  TestSoftmaxQuantInt8(input_dims1, 2);
+  const int32_t input_dims2[2] = {1, 31};
+  TestSoftmaxQuantInt8(input_dims2, 2);
+}
+
+#endif
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccutils/CMakeLists.txt b/micro/test/ccutils/CMakeLists.txt
index 8b60050d4a11c16fe6ef8c2e543150524a7c2408..aa9246cfc7688d1ca21753d829e0e1b1e73bc74f 100644
--- a/micro/test/ccutils/CMakeLists.txt
+++ b/micro/test/ccutils/CMakeLists.txt
@@ -7,6 +7,7 @@ add_library(micro_ccutils
 
 target_include_directories(micro_ccutils PUBLIC .)
 target_link_libraries(micro_ccutils micro_base micro_framework_for_optest)
+target_compile_options(micro_ccutils PUBLIC "-std=c++11")
 
 if(HEXAGON_STUB)
   add_library(micro_rpc_stub
diff --git a/micro/test/ccutils/micro/ops/operator.test.cc b/micro/test/ccutils/micro/ops/operator.test.cc
index 578402b3973ae8b3fc95147dce4be67896af994d..267314940c44910fe9a88ccc70c8175491b8d774 100644
--- a/micro/test/ccutils/micro/ops/operator.test.cc
+++ b/micro/test/ccutils/micro/ops/operator.test.cc
@@ -105,6 +105,16 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
   return fake_op_->ResizeOutputShape(idx, dim_size, dims);
 }
 
+QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
+  return fake_op_->GetInputQuantizeInfo(idx);
+}
+
+QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) {
+  return fake_op_->GetOutputQuantizeInfo(idx);
+}
+
+
+
 #ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
 #define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC)                   \
 template <>                                                         \
diff --git a/micro/test/ccutils/micro/ops/substitute_op.cc b/micro/test/ccutils/micro/ops/substitute_op.cc
index f65c01ec9f160934b73c01c23de9790d8851d42c..4c8735d1a85d43d4bb214f63b1feedb2488b428d 100644
--- a/micro/test/ccutils/micro/ops/substitute_op.cc
+++ b/micro/test/ccutils/micro/ops/substitute_op.cc
@@ -24,26 +24,32 @@ namespace framework {
 SubstituteOp::SubstituteOp()
     : input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {}
 
-SubstituteOp &SubstituteOp::AddInput(
-    const void *input, const int32_t *dims, const uint32_t dims_size) {
+SubstituteOp &SubstituteOp::AddInput(const void *input,
+                                     const int32_t *dims,
+                                     const uint32_t dims_size,
+                                     QuantizeInfo quant_info) {
   MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0,
                "Invalid param");
   MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem.");
   inputs_[input_idx_] = input;
   input_dims_[input_idx_] = dims;
   input_dim_sizes_[input_idx_] = dims_size;
+  input_quant_info_[input_idx_] = quant_info;
   ++input_idx_;
   return *this;
 }
 
-SubstituteOp &SubstituteOp::AddOutput(
-    void *output, int32_t *dims, const uint32_t dims_size) {
+SubstituteOp &SubstituteOp::AddOutput(void *output,
+                                      int32_t *dims,
+                                      const uint32_t dims_size,
+                                      QuantizeInfo quant_info) {
   MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0,
                "Invalid param");
   MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem.");
   outputs_[output_idx_] = output;
   output_dims_[output_idx_] = dims;
   output_dim_sizes_[output_idx_] = dims_size;
+  output_quant_info_[output_idx_] = quant_info;
   ++output_idx_;
   return *this;
 }
@@ -86,6 +92,14 @@ const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) {
   return output_dims_[idx];
 }
 
+QuantizeInfo SubstituteOp::GetInputQuantizeInfo(uint32_t idx) {
+  return input_quant_info_[idx];
+}
+
+QuantizeInfo SubstituteOp::GetOutputQuantizeInfo(uint32_t idx) {
+  return output_quant_info_[idx];
+}
+
 MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx,
                                            uint32_t input_dim_size,
                                            const int32_t *input_dims) {
diff --git a/micro/test/ccutils/micro/ops/substitute_op.h b/micro/test/ccutils/micro/ops/substitute_op.h
index 0f5e60d471fb7a6c07bdb31d33d5d03b71ccba56..4b822d7a6b03f0ca90782bbaecc38170b3d42445 100644
--- a/micro/test/ccutils/micro/ops/substitute_op.h
+++ b/micro/test/ccutils/micro/ops/substitute_op.h
@@ -16,6 +16,7 @@
 #define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_
 
 #include "micro/base/logging.h"
+#include "micro/base/types.h"
 #include "micro/base/utils.h"
 #include "micro/include/public/micro.h"
 
@@ -43,9 +44,13 @@ class SubstituteOp {
   ~SubstituteOp() {}
 
   SubstituteOp &AddInput(const void *input,
-                         const int32_t *dims, const uint32_t dims_size);
+                         const int32_t *dims,
+                         const uint32_t dims_size,
+                         QuantizeInfo quant_info = QuantizeInfo{0.0f, 0});
   SubstituteOp &AddOutput(void *output,
-                          int32_t *dims, const uint32_t dims_size);
+                          int32_t *dims,
+                          const uint32_t dims_size,
+                          QuantizeInfo quant_info = QuantizeInfo{0.0f, 0});
 
   template<typename T>
   SubstituteOp &AddArg(const char *name, T value) {
@@ -106,6 +111,9 @@ class SubstituteOp {
                                const int32_t *input_dims);
   MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
 
+  QuantizeInfo GetInputQuantizeInfo(uint32_t idx);
+  QuantizeInfo GetOutputQuantizeInfo(uint32_t idx);
+
   template<typename T>
   const T *GetInputData(uint32_t idx) {
     return static_cast<const T *>(DoGetInputData(idx));
@@ -120,11 +128,13 @@ class SubstituteOp {
   const void *inputs_[kMaxInputNum];
   const int32_t *input_dims_[kMaxInputNum];
   uint32_t input_dim_sizes_[kMaxInputNum];
+  QuantizeInfo input_quant_info_[kMaxInputNum];
   uint32_t input_idx_;
 
   void *outputs_[kMaxOutputNum];
   int32_t *output_dims_[kMaxOutputNum];
   uint32_t output_dim_sizes_[kMaxOutputNum];
+  QuantizeInfo output_quant_info_[kMaxOutputNum];
   uint32_t output_idx_;
 
   // for arg
diff --git a/micro/test/ccutils/micro/ops/test_quantize_utils.h b/micro/test/ccutils/micro/ops/test_quantize_utils.h
new file mode 100644
index 0000000000000000000000000000000000000000..d15792cf45416826168b3764cc3a9720a1e55c9f
--- /dev/null
+++ b/micro/test/ccutils/micro/ops/test_quantize_utils.h
@@ -0,0 +1,129 @@
+// Copyright 2020 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
+#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
+
+#include <math.h>
+#include <stdint.h>
+
+#include <limits>
+
+#include "micro/base/logging.h"
+#include "micro/common/global_buffer.h"
+#include "micro/include/public/micro.h"
+#include "micro/port/api.h"
+
+namespace micro {
+namespace ops {
+namespace test {
+
+template <typename Q>
+inline Q Saturate(float value) {
+  int rounded_value = static_cast<int>(value);
+  if (rounded_value <= std::numeric_limits<Q>::lowest()) {
+    return std::numeric_limits<Q>::lowest();
+  } else if (rounded_value >= std::numeric_limits<Q>::max()) {
+    return std::numeric_limits<Q>::max();
+  } else {
+    return static_cast<Q>(rounded_value);
+  }
+}
+
+inline void FindMinMax(const float *input,
+                       const uint32_t size,
+                       float *min_val,
+                       float *max_val) {
+  float max_v = base::lowest();
+  float min_v = base::highest();
+  for (uint32_t i = 0; i < size; ++i) {
+    max_v = base::max(max_v, input[i]);
+    min_v = base::min(min_v, input[i]);
+  }
+  *min_val = min_v;
+  *max_val = max_v;
+}
+
+template <typename Q>
+inline void QuantizeWithScaleAndZeropoint(const float *input,
+                                          const uint32_t size,
+                                          float scale,
+                                          int32_t zero_point,
+                                          Q *output) {
+  float recip_scale = 1 / scale;
+  for (uint32_t i = 0; i < size; ++i) {
+    output[i] = Saturate<Q>(roundf(zero_point + recip_scale * input[i]));
+  }
+}
+
+inline void AdjustRangeInt8(const float *input,
+                            const uint32_t size,
+                            float *scale,
+                            int32_t *zero_point) {
+  float in_min_data;
+  float in_max_data;
+  FindMinMax(input, size, &in_min_data, &in_max_data);
+  in_max_data = base::max(0.f, in_max_data);
+  in_min_data = base::min(0.f, in_min_data);
+
+  *scale = (in_max_data - in_min_data) / 255;
+  *zero_point = int8_t(-in_min_data / *scale - 128);
+}
+
+inline void AdjustRangeInt8Symmetric(const float *input,
+                                     const uint32_t size,
+                                     float *scale) {
+  float in_min_data;
+  float in_max_data;
+  FindMinMax(input, size, &in_min_data, &in_max_data);
+  in_max_data = base::max(0.f, in_max_data);
+  in_min_data = base::min(0.f, in_min_data);
+
+  float max_abs = base::max(base::abs(in_max_data), base::abs(in_min_data));
+
+  *scale = max_abs / 127.0f;
+}
+
+inline void AutoQuantizeInt8(const float *input,
+                             const uint32_t size,
+                             int8_t *output,
+                             float *scale,
+                             int32_t *zero_point) {
+  AdjustRangeInt8(input, size, scale, zero_point);
+  QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output);
+}
+
+inline void AutoQuantizeInt8Symmetric(const float *input,
+                                      const uint32_t size,
+                                      int8_t *output,
+                                      float *scale) {
+  AdjustRangeInt8Symmetric(input, size, scale);
+  QuantizeWithScaleAndZeropoint(input, size, *scale, 0, output);
+}
+
+inline void Dequantize(const int8_t *input,
+                       const uint32_t size,
+                       const float scale,
+                       const int32_t zero_point,
+                       float *output) {
+  for (uint32_t i = 0; i < size; ++i) {
+    output[i] = static_cast<float>(scale * (input[i] - zero_point));
+  }
+}
+
+}  // namespace test
+}  // namespace ops
+}  // namespace micro
+
+#endif  // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
diff --git a/micro/test/ccutils/micro/ops/test_utils.cc b/micro/test/ccutils/micro/ops/test_utils.cc
index 7cbe5163e5383e1bcb0da3be9784991c66846d3d..bb6cd0f3edd287f2540ca3197427a727a9007f4c 100644
--- a/micro/test/ccutils/micro/ops/test_utils.cc
+++ b/micro/test/ccutils/micro/ops/test_utils.cc
@@ -15,6 +15,8 @@
 
 #include "micro/ops/test_utils.h"
 
+#include <random>
+
 namespace micro {
 namespace ops {
 namespace test {
@@ -67,6 +69,30 @@ void FillRandomInput(void *input, const int32_t shape_size) {
   }
 }
 
+void FillUniformRandomInput(float *input,
+                            const int32_t shape_size,
+                            float low,
+                            float up) {
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_real_distribution<float> dis(low, up);
+  for (int n = 0; n < shape_size; ++n) {
+    input[n] = dis(gen);
+  }
+}
+
+void FillNormalRandomInput(float *input,
+                           const int32_t shape_size,
+                           float mean,
+                           float std) {
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::normal_distribution<float> dis(mean, std);
+  for (int n = 0; n < shape_size; ++n) {
+    input[n] = dis(gen);
+  }
+}
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/test/ccutils/micro/ops/test_utils.h b/micro/test/ccutils/micro/ops/test_utils.h
index fc64e0b7c33dbe10d1b52afd9c6eb7c737d9326b..91c29025997bfe6107154d18e52a02736a256fe0 100644
--- a/micro/test/ccutils/micro/ops/test_utils.h
+++ b/micro/test/ccutils/micro/ops/test_utils.h
@@ -38,6 +38,16 @@ T *input = common::test::GetGlobalBuffer()->GetBuffer<T>(shape_size); \
 micro::ops::test::FillRandomInput(input, shape_size * sizeof(T))
 #endif
 
+void FillUniformRandomInput(float *input,
+                            const int32_t shape_size,
+                            float low = -50.0f,
+                            float up = 50.0f);
+
+void FillNormalRandomInput(float *input,
+                           const int32_t shape_size,
+                           float mean = 0.0f,
+                           float std = 1.0f);
+
 }  // namespace test
 }  // namespace ops
 }  // namespace micro
diff --git a/micro/third_party/CMSIS_5 b/micro/third_party/CMSIS_5
new file mode 160000
index 0000000000000000000000000000000000000000..378acfb6490a82ba90e1ffb4bfd4e602668b180a
--- /dev/null
+++ b/micro/third_party/CMSIS_5
@@ -0,0 +1 @@
+Subproject commit 378acfb6490a82ba90e1ffb4bfd4e602668b180a
diff --git a/micro/third_party/gflags b/micro/third_party/gflags
new file mode 160000
index 0000000000000000000000000000000000000000..a386bd0f204cf99db253b3e84c56795dea8c397f
--- /dev/null
+++ b/micro/third_party/gflags
@@ -0,0 +1 @@
+Subproject commit a386bd0f204cf99db253b3e84c56795dea8c397f
diff --git a/micro/third_party/gflags/COPYING.txt b/micro/third_party/gflags/COPYING.txt
deleted file mode 100644
index d15b0c24134de8ce0185ac22cb2dd96e23911fab..0000000000000000000000000000000000000000
--- a/micro/third_party/gflags/COPYING.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright (c) 2006, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/micro/third_party/gflags/gflags.cmake b/micro/third_party/gflags/gflags.cmake
deleted file mode 100644
index 2a4f0343d901f70406c97dfae2c63917381890b3..0000000000000000000000000000000000000000
--- a/micro/third_party/gflags/gflags.cmake
+++ /dev/null
@@ -1,50 +0,0 @@
-INCLUDE(ExternalProject)
-
-set(GFLAGS_SRCS_DIR    "${MACE_THIRD_PARTY_DIR}/gflags")
-set(GFLAGS_INSTALL_DIR "${MACE_THIRD_PARTY_DIR}/install/gflags")
-set(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
-
-if(MSVC)
-  set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
-else(MSVC)
-  set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
-endif(MSVC)
-
-include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
-
-# Mirror of https://github.com/gflags/gflags/archive/v2.2.2.zip
-set(GFLAGS_URL     "https://cnbj1.fds.api.xiaomi.com/mace/third-party/gflags/v2.2.2.zip")
-set(GFLAGS_HASH    "SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5")
-
-ExternalProject_Add(
-  gflags_gflags
-  URL_HASH         "${GFLAGS_HASH}"
-  URL              "${GFLAGS_URL}"
-  PREFIX           ${GFLAGS_SRCS_DIR}
-  UPDATE_COMMAND   ""
-  BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
-  CMAKE_ARGS       -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-                   -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-                   -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-                   -DBUILD_STATIC_LIBS=ON
-                   -DBUILD_TESTING=OFF
-		   -DCMAKE_BUILD_TYPE=Release
-                   -DCMAKE_GENERATOR=${CMAKE_GENERATOR}
-                   ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-)
-
-if(MSVC)
-  add_custom_command(TARGET gflags_gflags POST_BUILD
-    COMMAND if $<CONFIG:Debug>==1 (${CMAKE_COMMAND} -E copy ${GFLAGS_INSTALL_DIR}/lib/gflags_static_debug.lib ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib)
-  )
-endif(MSVC)
-
-add_library(gflags STATIC IMPORTED GLOBAL)
-set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
-add_dependencies(gflags gflags_gflags)
-
-if(MSVC)
-  set_target_properties(gflags
-    PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES
-    Shlwapi.lib)
-endif(MSVC)
diff --git a/micro/third_party/googletest b/micro/third_party/googletest
new file mode 160000
index 0000000000000000000000000000000000000000..e6e2d3b7614ff4e6017d8968bd4c3f579133666e
--- /dev/null
+++ b/micro/third_party/googletest
@@ -0,0 +1 @@
+Subproject commit e6e2d3b7614ff4e6017d8968bd4c3f579133666e
diff --git a/micro/third_party/googletest/LICENSE b/micro/third_party/googletest/LICENSE
deleted file mode 100644
index 1941a11f8ce94389160b458927a29ba217542818..0000000000000000000000000000000000000000
--- a/micro/third_party/googletest/LICENSE
+++ /dev/null
@@ -1,28 +0,0 @@
-Copyright 2008, Google Inc.
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    * Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above
-copyright notice, this list of conditions and the following disclaimer
-in the documentation and/or other materials provided with the
-distribution.
-    * Neither the name of Google Inc. nor the names of its
-contributors may be used to endorse or promote products derived from
-this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/micro/third_party/googletest/googletest.BUILD b/micro/third_party/googletest/googletest.BUILD
deleted file mode 100644
index 4612f3ba6a33621d9810ae63926f7ca7b59489dc..0000000000000000000000000000000000000000
--- a/micro/third_party/googletest/googletest.BUILD
+++ /dev/null
@@ -1,32 +0,0 @@
-licenses(["notice"])
-
-exports_files(["LICENSE"])
-
-cc_library(
-    name = "gtest",
-    srcs = [
-        "googletest/src/gtest-all.cc",
-        "googlemock/src/gmock-all.cc",
-    ],
-    hdrs = glob([
-        "**/*.h",
-        "googletest/src/*.cc",
-        "googlemock/src/*.cc",
-    ]),
-    includes = [
-        "googlemock",
-        "googletest",
-        "googletest/include",
-        "googlemock/include",
-    ],
-    linkopts = ["-pthread"],
-    visibility = ["//visibility:public"],
-)
-
-cc_library(
-    name = "gtest_main",
-    srcs = ["googlemock/src/gmock_main.cc"],
-    linkopts = ["-pthread"],
-    visibility = ["//visibility:public"],
-    deps = [":gtest"],
-)
diff --git a/micro/third_party/googletest/googletest.cmake b/micro/third_party/googletest/googletest.cmake
deleted file mode 100644
index bb5e02e55cfbf2896e3347d4848710e12ef1bf62..0000000000000000000000000000000000000000
--- a/micro/third_party/googletest/googletest.cmake
+++ /dev/null
@@ -1,52 +0,0 @@
-  enable_testing()
-
-  include(ExternalProject)
-
-  set(GTEST_SOURCES_DIR ${MACE_THIRD_PARTY_DIR}/gtest)
-  set(GTEST_INSTALL_DIR ${MACE_THIRD_PARTY_DIR}/install/gtest)
-  set(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE)
-
-  include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
-
-  if(MSVC)
-    set(GTEST_LIBRARIES
-      "${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE)
-    set(GTEST_MAIN_LIBRARIES
-      "${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE)
-  else(MSVC)
-    set(GTEST_LIBRARIES
-      "${GTEST_INSTALL_DIR}/lib/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE)
-    set(GTEST_MAIN_LIBRARIES
-      "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE)
-  endif(MSVC)
-
-  # Mirror of "https://github.com/google/googletest/archive/release-1.8.0.zip"
-  set(GTEST_URL  "https://cnbj1.fds.api.xiaomi.com/mace/third-party/googletest/googletest-release-1.8.0.zip")
-  set(GTEST_HASH "SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf")
-
-  ExternalProject_Add(
-    extern_gtest
-    URL_HASH         "${GTEST_HASH}"
-    URL              "${GTEST_URL}"
-    PREFIX           ${GTEST_SOURCES_DIR}
-    UPDATE_COMMAND   ""
-    BUILD_BYPRODUCTS ${GTEST_LIBRARIES} ${GTEST_MAIN_LIBRARIES}
-    CMAKE_ARGS       -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                     -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-                     -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-                     -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
-                     -DBUILD_GMOCK=ON
-                     -Dgtest_disable_pthreads=ON
-                     -Dgtest_force_shared_crt=ON
-                     -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-                     -DCMAKE_GENERATOR=${CMAKE_GENERATOR}
-                     ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-  )
-
-  add_library(gtest STATIC IMPORTED GLOBAL)
-  set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
-  add_dependencies(gtest extern_gtest)
-
-  add_library(gtest_main STATIC IMPORTED GLOBAL)
-  set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
-  add_dependencies(gtest_main extern_gtest)
diff --git a/micro/third_party/third_party.cmake b/micro/third_party/third_party.cmake
deleted file mode 100644
index a5b0fcdb80c35a94051342d153fefc44c44d2e80..0000000000000000000000000000000000000000
--- a/micro/third_party/third_party.cmake
+++ /dev/null
@@ -1,42 +0,0 @@
-set(MACE_THIRD_PARTY_DIR "${PROJECT_BINARY_DIR}/third_party" CACHE STRING "Third party libraries download & build directories.")
-
-# Forwarding the cross compile flags
-set(THIRD_PARTY_EXTRA_CMAKE_ARGS
-  -DCMAKE_C_FLAGS=${MACE_CC_FLAGS}
-  -DCMAKE_CXX_FLAGS=${MACE_CC_FLAGS}
-)
-
-if(CMAKE_TOOLCHAIN_FILE)
-  set(THIRD_PARTY_EXTRA_CMAKE_ARGS
-      ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-      -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}
-  )
-endif(CMAKE_TOOLCHAIN_FILE)
-
-if(CROSSTOOL_ROOT)
-  set(THIRD_PARTY_EXTRA_CMAKE_ARGS
-      ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-      -DCROSSTOOL_ROOT=${CROSSTOOL_ROOT}
-  )
-endif(CROSSTOOL_ROOT)
-
-if(ANDROID_ABI)
-  set(THIRD_PARTY_EXTRA_CMAKE_ARGS
-      ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-      -DANDROID_ABI=${ANDROID_ABI}
-  )
-endif(ANDROID_ABI)
-
-if(ANDROID_NATIVE_API_LEVEL)
-  set(THIRD_PARTY_EXTRA_CMAKE_ARGS
-      ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-      -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL}
-  )
-endif(ANDROID_NATIVE_API_LEVEL)
-
-if(PLATFORM)
-  set(THIRD_PARTY_EXTRA_CMAKE_ARGS
-      ${THIRD_PARTY_EXTRA_CMAKE_ARGS}
-      -DPLATFORM=${PLATFORM}
-  )
-endif(PLATFORM)
diff --git a/micro/tools/CMakeLists.txt b/micro/tools/CMakeLists.txt
index 8e52b4b42d053743d3aafe26535ac0d46f833dfb..fd28f7a821feab461f859f38a68c2973a7f6599f 100644
--- a/micro/tools/CMakeLists.txt
+++ b/micro/tools/CMakeLists.txt
@@ -1,7 +1,7 @@
 if(MICRO_MODEL_NAME)
-  include (${PROJECT_SOURCE_DIR}/third_party/gflags/gflags.cmake)
   add_executable(micro_run_static micro_run.cc)
-  target_link_libraries(micro_run_static micro_engine gflags)
+  target_link_libraries(micro_run_static  micro models  gflags)
+  target_compile_options(micro_run_static PRIVATE "-std=c++11")
   target_compile_definitions(micro_run_static PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}")
   if(NOT ANDROID)
     target_link_libraries(micro_run_static pthread)
diff --git a/micro/tools/build_docker.sh b/micro/tools/build_docker.sh
new file mode 100755
index 0000000000000000000000000000000000000000..815e354bbf25c74428b5de65bf3f95f21ab47d7e
--- /dev/null
+++ b/micro/tools/build_docker.sh
@@ -0,0 +1,7 @@
+#! /bin/bash
+
+cd docker/mace-micro-dev
+
+docker build . -f mace-micro-dev.dockerfile --tag mace-micro-dev
+
+cd ../..
\ No newline at end of file
diff --git a/micro/tools/ci/build_mbed_example.sh b/micro/tools/ci/build_mbed_example.sh
new file mode 100755
index 0000000000000000000000000000000000000000..39f10b02714eeb8d348739e6614f2cb4828d3a06
--- /dev/null
+++ b/micro/tools/ci/build_mbed_example.sh
@@ -0,0 +1,17 @@
+#! /bin/bash
+
+python tools/python/convert.py --config micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml --enable_micro || exit -1
+
+./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh \
+-DARM_CPU=cortex-m7 \
+-DMACE_MICRO_ENABLE_CMSIS=ON \
+-DMACE_MICRO_ENABLE_HARDFP=OFF || exit -1
+
+cp build/micro/gcc-arm-none-eabi/install micro/examples/classifier -r
+
+cd micro/examples/classifier
+
+mbed deploy || exit -1
+mbed compile -t GCC_ARM -m NUCLEO_F767ZI -D MICRO_MODEL_NAME=kws_tc_resnet8 -D MICRO_DATA_NAME=kws || exit -1
+
+cd ../../..
\ No newline at end of file
diff --git a/micro/tools/ci/cross_build.sh b/micro/tools/ci/cross_build.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6216e047edcb9254d093b080dd79e6655347954c
--- /dev/null
+++ b/micro/tools/ci/cross_build.sh
@@ -0,0 +1,25 @@
+#! /bin/bash
+
+git submodule update --init .
+
+echo "Builds host float32"
+rm -rf build/micro
+./micro/tools/cmake/cmake-build-host.sh \
+-DMACE_MICRO_ENABLE_TESTS=ON \
+-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1
+
+echo "Builds host bfloat16"
+rm -rf build/micro
+./micro/tools/cmake/cmake-build-host.sh \
+-DMACE_MICRO_ENABLE_BFLOAT16=ON \
+-DMACE_MICRO_ENABLE_TESTS=ON \
+-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1
+
+echo "Builds gcc arm cortex-m7"
+rm -rf build/micro
+./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh \
+-DARM_CPU=cortex-m7 \
+-DMACE_MICRO_ENABLE_TESTS=OFF \
+-DMACE_MICRO_ENABLE_CMSIS=ON  || exit -1
+
+cd ..
\ No newline at end of file
diff --git a/micro/tools/ci/host_build_and_run_examples.sh b/micro/tools/ci/host_build_and_run_examples.sh
new file mode 100755
index 0000000000000000000000000000000000000000..2a8a4dce0001bd74410b33ffab20a43779bd9b30
--- /dev/null
+++ b/micro/tools/ci/host_build_and_run_examples.sh
@@ -0,0 +1,24 @@
+#! /bin/bash
+
+python tools/python/convert.py --config micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml --enable_micro || exit -1
+
+rm -rf build/micro
+./micro/tools/cmake/cmake-build-host.sh \
+-DMACE_MICRO_ENABLE_EXAMPLES=ON  -DMICRO_MODEL_NAME=kws_tc_resnet8 -DMICRO_DATA_NAME=kws \
+-DMACE_MICRO_ENABLE_TESTS=OFF \
+-DMACE_MICRO_ENABLE_CMSIS=OFF || exit -1
+
+./build/micro/host/examples/classifier/kws_tc_resnet8
+
+python3 tools/python/convert.py --config micro/pretrained_models/keras/mnist/mnist-int8.yml --enable_micro || exit -1
+
+rm -rf build/micro
+./micro/tools/cmake/cmake-build-host.sh \
+-DMACE_MICRO_ENABLE_CMSIS=ON \
+-DMACE_MICRO_ENABLE_EXAMPLES=ON \
+-DMICRO_MODEL_NAME=mnist_int8 -DMICRO_DATA_NAME=mnist \
+-DMACE_MICRO_ENABLE_TESTS=OFF || exit -1
+
+./build/micro/host/examples/classifier/mnist_int8
+
+cd ..
\ No newline at end of file
diff --git a/micro/tools/ci/host_build_and_run_tests.sh b/micro/tools/ci/host_build_and_run_tests.sh
new file mode 100755
index 0000000000000000000000000000000000000000..3f152f6d38c853ec15401e3337066b2b6751bb25
--- /dev/null
+++ b/micro/tools/ci/host_build_and_run_tests.sh
@@ -0,0 +1,16 @@
+#! /bin/bash
+
+git submodule update --init .
+
+rm -rf build/micro
+./micro/tools/cmake/cmake-build-host.sh \
+-DMACE_MICRO_ENABLE_TESTS=ON \
+-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1
+
+echo "MACE Micro ut"
+build/micro/host/test/ccunit/micro_ops_test || exit -1
+
+echo "MACE Micro benchmark"
+build/micro/host/test/ccbenchmark/micro_cc_benchmark || exit -1
+
+cd ..
\ No newline at end of file
diff --git a/micro/tools/ci/model_convert.sh b/micro/tools/ci/model_convert.sh
new file mode 100755
index 0000000000000000000000000000000000000000..c18acf4e7306007701e6421bf8b177a30b6ba7ba
--- /dev/null
+++ b/micro/tools/ci/model_convert.sh
@@ -0,0 +1,43 @@
+#! /bin/bash
+
+rm -rf mace-models
+rm -rf build/micro
+
+GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@git.n.xiaomi.com:applied-machine-learning/sysml/mace-models.git
+
+git submodule update --init . || exit -1
+
+CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
+python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn || exit -1
+python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark || exit -1
+
+CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml
+python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn || exit -1
+
+CONF_FILE=mace-models/micro-models/keras/mnist/mnist.yml
+python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name mnist || exit -1
+
+CONF_FILE=mace-models/micro-models/keras/mnist/mnist-int8.yml
+python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name mnist_int8 || exit -1
+
+CONF_FILE=mace-models/micro-models/keras/har/har.yml
+python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har || exit -1
+
+CONF_FILE=mace-models/micro-models/keras/har/har-int8.yml
+python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_int8 || exit -1
+
+CONF_FILE=mace-models/micro-models/tensorflow/kws/kws-tc_resnet8.yml
+python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name kws_tc_resnet8 || exit -1
+
+CONF_FILE=mace-models/micro-models/tensorflow/kws/kws-tc_resnet8-bf16.yml
+python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1
+python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name kws_tc_resnet8_bf16 || exit -1
+
+rm -rf mace-models
diff --git a/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh b/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh
index d8b5379eb083228ca36e01412277b365ea06cbc8..44011c8d77ed09c17099fe091201b0557b02adbf 100755
--- a/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh
+++ b/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh
@@ -1,17 +1,20 @@
 #!/bin/bash
+
 if [ -z "$ANDROID_NDK_HOME" ]; then
   echo "ANDROID_NDK_HOME is undefined";
+  exit -1;
 fi
 
 if [ -z "$HEXAGON_SDK_ROOT" ]; then
   echo "HEXAGON_SDK_ROOT is undefined";
+  exit -1;
 fi
 
-BUILD_DIR=build/cmake-build/arm64-v8a
+BUILD_DIR=build/micro/arm64-v8a
 mkdir -p ${BUILD_DIR}
 cd ${BUILD_DIR}
 
-cmake ../../.. \
+cmake ../../../micro \
   -DANDROID_ABI="arm64-v8a" \
   -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \
   -DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT} \
@@ -20,9 +23,10 @@ cmake ../../.. \
   -DANDROID_STL=c++_shared            \
   -DMACE_ENABLE_RPCMEM=ON             \
   -DCMAKE_INSTALL_PREFIX=install      \
+  -DMACE_MICRO_ENABLE_EXAMPLES=OFF \
   -DHEXAGON_STUB=ON \
   $@ || exit 1
 
-cmake --build . -- -j || exit 1
+cmake --build . --target install --target install -- -j || exit 1
 
 cd ../../..
diff --git a/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh b/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh
index 45d098c19d69c8f70f1ae8caf6931d4a86f122c0..1b241cb0a8dc04871ecf2e05a098b2d717cbb97a 100755
--- a/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh
+++ b/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh
@@ -1,18 +1,17 @@
 #!/bin/bash
-if [ -z "$GCC_ARM_ROOT" ]; then
-  echo "GCC_ARM_ROOT is undefined";
-fi
 
-BUILD_DIR=build/cmake-build/gcc-arm-none-eabi
+BUILD_DIR=build/micro/gcc-arm-none-eabi
+
 mkdir -p ${BUILD_DIR}
 cd ${BUILD_DIR}
 
-cmake ../../.. \
-  -DGCC_ARM_ROOT=${GCC_ARM_ROOT} \
+cmake ../../../micro \
   -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchain/gcc-arm-none-eabi.cmake \
+  -DMACE_MICRO_ENABLE_CMSIS=ON \
   -DCMAKE_INSTALL_PREFIX=install \
+  -DMACE_MICRO_ENABLE_TESTS=OFF \
   $@ || exit 1
 
-cmake --build . -- -j || exit 1
+cmake --build . --target install -- -j || exit 1
 
 cd ../../..
diff --git a/micro/tools/cmake/cmake-build-hexagon6.sh b/micro/tools/cmake/cmake-build-hexagon6.sh
index 1c78408954f84d20a17135720755870fdd7bd044..620f7f6bdada62822390bbc165b70ffe8d0bd00f 100755
--- a/micro/tools/cmake/cmake-build-hexagon6.sh
+++ b/micro/tools/cmake/cmake-build-hexagon6.sh
@@ -10,17 +10,18 @@ if [ -z "$HEXAGON_SDK_ROOT" ]; then
   echo "HEXAGON_SDK_ROOT is undefined";
 fi
 
-BUILD_DIR=build/cmake-build/hexagon6
+BUILD_DIR=build/micro/hexagon6
 mkdir -p ${BUILD_DIR}
 cd ${BUILD_DIR}
 
-cmake ../../.. \
+cmake ../../../micro \
   -DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT} \
   -DHEXAGON_TOOLS=${HEXAGON_TOOLS} \
+  -DMACE_MICRO_ENABLE_EXAMPLES=OFF \
   -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchain/hexagon6.toolchain.cmake \
   -DCMAKE_INSTALL_PREFIX=install \
   $@ || exit 1
 
-cmake --build . -- -j || exit 1
+cmake --build . --target install -- -j || exit 1
 
 cd ../../..
diff --git a/micro/tools/cmake/cmake-build-hexagon8.sh b/micro/tools/cmake/cmake-build-hexagon8.sh
index 7baa821f040d3247f04ea8caf8d5e9992ae10b07..08f32dcfc0c4938547ad988633e42a8bd4fa6aed 100755
--- a/micro/tools/cmake/cmake-build-hexagon8.sh
+++ b/micro/tools/cmake/cmake-build-hexagon8.sh
@@ -10,16 +10,18 @@ if [ -z "$HEXAGON_SDK_ROOT" ]; then
   echo "HEXAGON_SDK_ROOT is undefined";
 fi
 
-BUILD_DIR=build/cmake-build/hexagon8
-mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
+BUILD_DIR=build/micro/hexagon8
+mkdir -p ${BUILD_DIR}
+cd ${BUILD_DIR}
 
-cmake ../../.. \
+cmake ../../../micro \
   -DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT} \
   -DHEXAGON_TOOLS=${HEXAGON_TOOLS} \
+  -DMACE_MICRO_ENABLE_EXAMPLES=OFF \
   -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchain/hexagon8.toolchain.cmake \
   -DCMAKE_INSTALL_PREFIX=install \
   $@ || exit 1
 
-cmake --build . -- -j || exit 1
+cmake --build . --target install -- -j || exit 1
 
 cd ../../..
diff --git a/micro/tools/cmake/cmake-build-host.sh b/micro/tools/cmake/cmake-build-host.sh
index d9ec5b7bbd8a1886c479473c1cada1446775a151..9f5503255a1ecce8c46862477395c9a96dc3dec8 100755
--- a/micro/tools/cmake/cmake-build-host.sh
+++ b/micro/tools/cmake/cmake-build-host.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
-BUILD_DIR=build/cmake-build/host
+
+BUILD_DIR=build/micro/host
 mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
 
-cmake ../../.. \
-  -DMACE_MICRO_ENABLE_TESTS=ON \
+cmake ../../../micro \
   -DCMAKE_INSTALL_PREFIX=install \
   $@ || exit 1
 
-cmake --build . -- -j || exit 1
+cmake --build . --target install -- -j || exit 1
 
 cd ../../..
diff --git a/tools/cmake/cmake-generate-proto-py-host.sh b/tools/cmake/cmake-generate-proto-py-host.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5573a1f9f6c911e2d24139a59905a795764f933a
--- /dev/null
+++ b/tools/cmake/cmake-generate-proto-py-host.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+if [[ -z "$BUILD_DIR" ]]; then
+    BUILD_DIR=build/cmake-build/host
+fi
+
+mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR}
+cmake ../../..
+make mace_proto_py micro_mem_proto_py -j
+cd ../../..
diff --git a/tools/converter.py b/tools/converter.py
index c8e38cdffdd5f6198074a3cd6885460c73b853e1..e7a5f05d11ef15d041ba5e6b4d8aab6bc71c7667 100644
--- a/tools/converter.py
+++ b/tools/converter.py
@@ -61,6 +61,7 @@ PlatformTypeStrs = [
     "caffe",
     "onnx",
     "megengine",
+    "keras",
     "pytorch",
 ]
 PlatformType = Enum('PlatformType', [(ele, ele) for ele in PlatformTypeStrs],
diff --git a/tools/cpplint.sh b/tools/cpplint.sh
index 93e275a0d7df0830b50f89ae81f11ff938b08ddd..07555867d0f96225cbf6c81fe9d80b60136428d4 100755
--- a/tools/cpplint.sh
+++ b/tools/cpplint.sh
@@ -8,4 +8,12 @@ cpplint --linelength=80 --counting=detailed --root=test/ccutils $(find test/ccut
 cpplint --linelength=80 --counting=detailed --root=test/ccunit $(find test/ccunit -name "*.h" -or -name "*.cc")
 cpplint --linelength=80 --counting=detailed --root=test/ccbenchmark $(find test/ccbenchmark -name "*.h" -or -name "*.cc")
 
-cpplint --linelength=80 --counting=detailed $(find ./micro -path ./micro/codegen -prune -o -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/base  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed $(find micro/framework  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed $(find micro/include  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed $(find micro/model  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/ops  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed $(find micro/port  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/test \( -path micro/test/ccbenchmark/codegen -or -path micro/test/ccbaseline/codegen \) -prune -o  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed $(find micro/tools  -name "*.h" -or -name "*.cc")
+cpplint --linelength=80 --counting=detailed --filter=-build/include_subdir $(find micro/examples \( -path micro/examples/classifier/mbed-os -or -path micro/examples/classifier/data -or -path micro/examples/classifier/install -or -path micro/examples/classifier/BUILD \) -prune  -name "*.cc" -or -name "*.h")
diff --git a/tools/python/convert.py b/tools/python/convert.py
index b0ba9a2c54bcfcef334417acdea53a98688f18e4..2d9a4f8b79b16ef9016a70af0dcbc07b408f4d42 100644
--- a/tools/python/convert.py
+++ b/tools/python/convert.py
@@ -123,6 +123,8 @@ def convert_model(conf, quantize_stat):
         option.change_concat_ranges = conf[ModelKeys.change_concat_ranges]
     if ModelKeys.cl_mem_type in conf:
         option.cl_mem_type = conf[ModelKeys.cl_mem_type]
+    if ModelKeys.platform in conf:
+        option.platform = conf[ModelKeys.platform]
     if ModelKeys.runtime in conf:
         option.device = conf[ModelKeys.runtime]
         if option.device == DeviceType.CPU_GPU:
@@ -190,6 +192,10 @@ def convert_model(conf, quantize_stat):
         from transform import megengine_converter
         converter = megengine_converter.MegengineConverter(
             option, conf["model_file_path"])
+    elif platform == Platform.KERAS:
+        from transform import keras_converter
+        converter = keras_converter.KerasConverter(
+            option, conf["model_file_path"])
     elif platform == Platform.PYTORCH:
         from transform import pytorch_converter
         converter = pytorch_converter.PytorchConverter(
diff --git a/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 b/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2
index 2b0e6c8c17952060ec34deb72b9cd8320922d5d8..8fb470df5b1904cf06ff3829283609500b435a29 100644
--- a/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2
+++ b/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2
@@ -20,7 +20,7 @@ namespace micro {
 namespace {{model_tag}} {
 
 uint8_t kGraphData[{{ data_size }}] = {
-  {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%}
+  {{ hex_bytes_string }}
 };
 
 }  // namespace {{model_tag}}
diff --git a/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 b/tools/python/micro/jinja2_files/micro_model_data.h.jinja2
index 4b664b3952141fd7473dfeade39a2a89739dfc3e..f702429ee4db70c88fe65307c1d8f52b3ead0165 100644
--- a/tools/python/micro/jinja2_files/micro_model_data.h.jinja2
+++ b/tools/python/micro/jinja2_files/micro_model_data.h.jinja2
@@ -20,7 +20,7 @@ namespace micro {
 namespace {{model_tag}} {
 
 const uint8_t kModelData[{{ data_size }}] = {
-  {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%}
+    {{ hex_bytes_string }}
 };
 
 }  // namespace {{model_tag}}
diff --git a/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 b/tools/python/micro/jinja2_files/micro_net_def.h.jinja2
index 5380ca11ca72b1ec5033a14bfd979ee8ba912562..b9b7380a8aaf6b435215a41f1994daa94d7f4c00 100644
--- a/tools/python/micro/jinja2_files/micro_net_def.h.jinja2
+++ b/tools/python/micro/jinja2_files/micro_net_def.h.jinja2
@@ -20,7 +20,7 @@ namespace micro {
 namespace {{model_tag}} {
 
 uint8_t kNetDef[{{ data_size }}] = {
-  {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%}
+   {{ hex_bytes_string }}
 };
 
 }  // namespace {{model_tag}}
diff --git a/tools/python/micro/micro_codegen.py b/tools/python/micro/micro_codegen.py
index 4646b62d1e358741f02e6408204b3dfed8aebf5b..bd3e5c15fc6e5c4696b28b328418e776e5aae158 100644
--- a/tools/python/micro/micro_codegen.py
+++ b/tools/python/micro/micro_codegen.py
@@ -57,14 +57,31 @@ class MicroCodeGen:
         with open(output_path, "w") as f:
             f.write(source)
 
+    def gen_micro_source_from_array(self, model_tag, embed_data,
+                                    jinja_file_name, output_path):
+        cwd = os.path.dirname(__file__)
+        j2_env = Environment(loader=FileSystemLoader(cwd), trim_blocks=True)
+
+        template_name = JINJA2_DIR + jinja_file_name
+
+        hex_bytes_string = ", ".join(map(hex, embed_data))
+
+        source = j2_env.get_template(template_name).render(
+            model_tag=model_tag,
+            hex_bytes_string=hex_bytes_string,
+            data_size=len(embed_data),
+        )
+        with open(output_path, "w") as f:
+            f.write(source)
+
     def gen_net_def_data(self, model_tag, model_def_data, output_path):
         embed_data = np.frombuffer(model_def_data, dtype=np.uint8)
-        self.gen_micro_source_from_bytes(
-            model_tag, embed_data, 'micro_net_def.h.jinja2', output_path)
+        self.gen_micro_source_from_array(model_tag, embed_data,
+                                         'micro_net_def.h.jinja2', output_path)
 
     def gen_graph_data(self, model_tag, graph_data, output_path):
         embed_data = np.frombuffer(graph_data, dtype=np.uint8)
-        self.gen_micro_source_from_bytes(model_tag, embed_data,
+        self.gen_micro_source_from_array(model_tag, embed_data,
                                          'micro_graph_data.h.jinja2',
                                          output_path)
 
@@ -82,7 +99,7 @@ class MicroCodeGen:
 
     def gen_model_data(self, model_tag, model_param_data, output_path):
         embed_data = np.frombuffer(model_param_data, dtype=np.uint8)
-        self.gen_micro_source_from_bytes(model_tag, embed_data,
+        self.gen_micro_source_from_array(model_tag, embed_data,
                                          'micro_model_data.h.jinja2',
                                          output_path)
 
diff --git a/tools/python/micro/micro_op_converter.py b/tools/python/micro/micro_op_converter.py
index a2c691eef649d0fe2a8c86b93f86efbbd5820d9f..865b261f6594be6e089a2fd7160c3c5a9d847207 100644
--- a/tools/python/micro/micro_op_converter.py
+++ b/tools/python/micro/micro_op_converter.py
@@ -33,6 +33,9 @@ class MicroOpConverter:
     def convert_filters_format(self):
         arg_format = ConverterUtil.get_arg(self.net_def,
                                            MaceKeyword.mace_filter_format_str)
+        if (arg_format.i == DataFormat.OHWI.value):
+            return
+
         mace_check(arg_format.i == DataFormat.OIHW.value, "Invalid model")
         arg_format.i = DataFormat.OHWI.value
 
@@ -40,7 +43,8 @@ class MicroOpConverter:
         for op in self.net_def.op:
             # OIHW => OHWI
             if (op.type == MaceOp.Conv2D.name or
-                op.type == MaceOp.DepthwiseConv2d.name) and \
+                op.type == MaceOp.DepthwiseConv2d.name or
+                op.type == MaceOp.FullyConnected.name) and \
                     op.input[1] not in transposed_filter:
                 print("transform filter: %s" % op.type)
                 filter = self._consts[op.input[1]]
diff --git a/tools/python/micro/micro_support_ops.py b/tools/python/micro/micro_support_ops.py
index 5f9bb5f7cfd3f048fd4140d893bd95844d0c10b3..080b7d39792e3a0e7890b0e54622401610e5f527 100644
--- a/tools/python/micro/micro_support_ops.py
+++ b/tools/python/micro/micro_support_ops.py
@@ -67,6 +67,9 @@ McSupportedOps = [
                  MaceOp.Eltwise.name, mace_pb2.DT_FLOAT, None),
     OpDescriptor('micro/ops/eltwise.h', 'EltwiseOp<int32_t>',
                  MaceOp.Eltwise.name, mace_pb2.DT_INT32, None),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h',
+                 'ArmEltwiseInt8Op',
+                 MaceOp.Eltwise.name, mace_pb2.DT_INT8, None),
     OpDescriptor('micro/ops/activation.h', 'ActivationOp',
                  MaceOp.Activation.name, mace_pb2.DT_FLOAT, DataFormat.NHWC),
     OpDescriptor('micro/ops/strided_slice.h', 'StridedSliceOp<mifloat>',
@@ -92,8 +95,12 @@ McSupportedOps = [
                  DataFormat.NHWC),
     OpDescriptor('micro/ops/shape.h', 'ShapeOp', MaceOp.Shape.name,
                  mace_pb2.DT_FLOAT, DataFormat.NHWC),
-    OpDescriptor('micro/ops/reshape.h', 'ReshapeOp', MaceOp.Reshape.name,
+    OpDescriptor('micro/ops/reshape.h', 'ReshapeOp<mifloat>',
+                 MaceOp.Reshape.name,
                  mace_pb2.DT_FLOAT, DataFormat.NHWC),
+    OpDescriptor('micro/ops/reshape.h', 'ReshapeOp<int8_t>',
+                 MaceOp.Reshape.name,
+                 mace_pb2.DT_INT8, DataFormat.NHWC),
     OpDescriptor('micro/ops/expand_dims.h', 'ExpandDimsOp',
                  MaceOp.ExpandDims.name, mace_pb2.DT_FLOAT, DataFormat.NHWC),
     OpDescriptor('micro/ops/concat.h', 'ConcatOp<mifloat>', MaceOp.Concat.name,
@@ -118,6 +125,36 @@ McSupportedOps = [
                  'DepthwiseConv2dKB1S4Op',
                  MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT,
                  DataFormat.NHWC, 'kb1s4'),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/quantize.h',
+                 'QuantizeOp',
+                 MaceOp.Quantize.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/dequantize.h',
+                 'DequantizeOp',
+                 MaceOp.Dequantize.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h',
+                 'ArmConv2dInt8Op',
+                 MaceOp.Conv2D.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h',
+                 'ArmDepthwiseConv2dInt8Op',
+                 MaceOp.DepthwiseConv2d.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h',
+                 'ArmPoolingInt8Op',
+                 MaceOp.Pooling.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC),
+    OpDescriptor('micro/ops/squeeze.h', 'SqueezeOp', MaceOp.Squeeze.name,
+                 mace_pb2.DT_INT8, None),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h',
+                 'ArmSoftmaxInt8Op',
+                 MaceOp.Softmax.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC),
+    OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h',
+                 'ArmMatMulInt8Op',
+                 MaceOp.MatMul.name, mace_pb2.DT_INT8,
+                 DataFormat.NHWC)
 ]
 
 
@@ -126,7 +163,9 @@ class OpResolver:
         self.net_def = pb_model
         self.op_desc_map = {}
         self.op_desc_list = []
-        if model_conf[ModelKeys.platform] == Platform.TENSORFLOW:
+        platform = model_conf[ModelKeys.platform]
+        if platform == Platform.TENSORFLOW or \
+           platform == Platform.KERAS:
             self.default_data_format = DataFormat.NHWC
         else:
             self.default_data_format = DataFormat.NCHW
@@ -134,7 +173,7 @@ class OpResolver:
               self.default_data_format)
         if ModelKeys.quantize in model_conf and \
                 model_conf[ModelKeys.quantize] == 1:
-            self.default_data_type = mace_pb2.DT_UINT8
+            self.default_data_type = mace_pb2.DT_INT8
         else:
             self.default_data_type = \
                 model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT)
@@ -218,7 +257,7 @@ class OpResolver:
         if not data_type_match:
             return False
         op_tag = self.get_op_tag(op_def)
-        if op_tag != op_desc.tag:
+        if (op_desc.tag) and (op_tag != op_desc.tag):
             return False
         return True
 
@@ -261,6 +300,7 @@ class OpResolver:
                            "not support op type %s, data type is %s, format is %s" %  # noqa
                            (op_def.type, self.get_op_data_type(op_def),
                             self.get_op_data_format(op_def)))
+
                 if op_def.type not in self.op_desc_map:
                     self.op_desc_map[op_def.type] = []
             else:
diff --git a/tools/python/micro/scratch_computer.py b/tools/python/micro/scratch_computer.py
index 3edaab3f3415be66643e46f6f5195db9bf8b5f99..7599c32c997d41123df49373e0bdc6f04df4770a 100644
--- a/tools/python/micro/scratch_computer.py
+++ b/tools/python/micro/scratch_computer.py
@@ -22,13 +22,15 @@ from transform.base_converter import MaceOp
 class ScratchComputer:
     def __init__(self, net_def, model_conf):
         self.net_def = net_def
+        self.model_conf = model_conf
         if ModelKeys.quantize in model_conf and \
                 model_conf[ModelKeys.quantize] == 1:
             self.default_data_type = mace_pb2.DT_UINT8
         else:
             self.default_data_type = mace_pb2.DT_FLOAT
         self._scratch_map = {
-            MaceOp.Conv2D: self.scratch_size_no_need,
+            MaceOp.Conv2D: self.scratch_size_conv,
+            MaceOp.FullyConnected: self.scratch_size_no_need,
             MaceOp.Squeeze: self.scratch_size_of_squeeze,
             MaceOp.Softmax: self.scratch_size_no_need,
             MaceOp.Eltwise: self.scratch_size_eltwise,
@@ -39,7 +41,7 @@ class ScratchComputer:
             MaceOp.BiasAdd: self.scratch_size_no_need,
             MaceOp.BatchNorm: self.scratch_size_no_need,
             MaceOp.Shape: self.scratch_size_no_need,
-            MaceOp.Reshape: self.scratch_size_no_need,
+            MaceOp.Reshape: self.scratch_size_of_reshape,
             MaceOp.ExpandDims: self.scratch_size_of_expand_dims,
             MaceOp.Concat: self.scratch_size_of_concat,
             MaceOp.MatMul: self.scratch_size_of_matmul,
@@ -47,6 +49,8 @@ class ScratchComputer:
             MaceOp.DepthwiseConv2d: self.scratch_size_of_depthwise_conv,
             MaceOp.ArgMax: self.scratch_size_no_need,
             MaceOp.Cast: self.scratch_size_no_need,
+            MaceOp.Quantize: self.scratch_size_no_need,
+            MaceOp.Dequantize: self.scratch_size_no_need,
         }
 
     def compute_size(self):
@@ -80,18 +84,55 @@ class ScratchComputer:
             return 2
         elif data_type == mace_pb2.DT_UINT8:
             return 1
+        elif data_type == mace_pb2.DT_INT16:
+            return 2
+        elif data_type == mace_pb2.DT_INT8:
+            return 1
         else:
             mace_check(False, "Invalid data type: %s" % data_type)
 
+    def scratch_size_conv(self, op_def):
+        if (ModelKeys.quantize in self.model_conf
+                and self.model_conf[ModelKeys.quantize] == 1):
+            output_channels = op_def.output_shape[0].dims[3]
+            cmsis_bias_bytes = \
+                self.get_data_bytes(mace_pb2.DT_INT32) * output_channels
+
+            input_dims = self.get_op_input_dims(op_def, 0)
+            filter_dims = self.get_op_input_dims(op_def, 1)
+            cmsis_nn_buffer_bytes = \
+                2 \
+                * input_dims[3] \
+                * filter_dims[2] \
+                * filter_dims[1] \
+                * self.get_data_bytes(mace_pb2.DT_INT16)
+
+            return cmsis_nn_buffer_bytes + cmsis_bias_bytes
+        else:
+            return 0
+
     def scratch_size_of_expand_dims(self, op_def):
         output_dim_size = len(op_def.output_shape[0].dims)
         data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32)
         return output_dim_size * data_type_bytes
 
     def scratch_size_of_matmul(self, op_def):
-        output_dim_size = len(op_def.output_shape[0].dims)
-        data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32)
-        return output_dim_size * data_type_bytes
+        if (ModelKeys.quantize in self.model_conf
+                and self.model_conf[ModelKeys.quantize] == 1):
+            output_dim_bytes = \
+                len(op_def.output_shape[0].dims) \
+                * self.get_data_bytes(mace_pb2.DT_INT32)
+
+            cols = op_def.output_shape[0].dims[1]
+            cmsis_bias_bytes = cols * self.get_data_bytes(mace_pb2.DT_INT32)
+
+            return output_dim_bytes + cmsis_bias_bytes
+        else:
+            output_dim_bytes = \
+                len(op_def.output_shape[0].dims) \
+                * self.get_data_bytes(mace_pb2.DT_INT32)
+
+            return output_dim_bytes
 
     def get_op_input_dims(self, op_def, idx):
         input_name = op_def.input[idx]
@@ -107,8 +148,7 @@ class ScratchComputer:
     def scratch_size_of_pooling(self, op_def):
         input0_dims = self.get_op_input_dims(op_def, 0)
         channels = input0_dims[3]
-        mace_check(channels > 0,
-                   "can not inference pooling's input shape.")
+        mace_check(channels > 0, "can not inference pooling's input shape.")
 
         int_bytes = self.get_data_bytes(mace_pb2.DT_INT32)
         float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT)
@@ -116,14 +156,30 @@ class ScratchComputer:
         return channels * (int_bytes + float_bytes)
 
     def scratch_size_of_depthwise_conv(self, op_def):
-        filter_dims = self.get_op_input_dims(op_def, 1)
-        k_batch = filter_dims[0]
-        block_size = k_batch
-        if block_size > 4:
-            block_size = 4
-        k_channels = filter_dims[3]
-        float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT)
-        return block_size * 4 * k_channels * float_bytes
+        if (ModelKeys.quantize in self.model_conf
+                and self.model_conf[ModelKeys.quantize] == 1):
+            output_channels = op_def.output_shape[0].dims[3]
+            cmsis_bias_and_quant_bytes = \
+                self.get_data_bytes(mace_pb2.DT_INT32) * output_channels * 3
+
+            input_dims = self.get_op_input_dims(op_def, 0)
+            filter_dims = self.get_op_input_dims(op_def, 1)
+            cmsis_nn_buffer_bytes = \
+                input_dims[3] \
+                * filter_dims[2] \
+                * filter_dims[1] \
+                * self.get_data_bytes(mace_pb2.DT_INT16)
+
+            return cmsis_nn_buffer_bytes + cmsis_bias_and_quant_bytes
+        else:
+            filter_dims = self.get_op_input_dims(op_def, 1)
+            k_batch = filter_dims[0]
+            block_size = k_batch
+            if block_size > 4:
+                block_size = 4
+            k_channels = filter_dims[3]
+            float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT)
+            return block_size * 4 * k_channels * float_bytes
 
     def scratch_size_of_squeeze(self, op_def):
         input0_dims = self.get_op_input_dims(op_def, 0)
@@ -136,3 +192,11 @@ class ScratchComputer:
     def scratch_size_of_concat(self, op_def):
         # On a 64bit operating system, one pointer data need 8 bytes
         return len(op_def.input) * self.get_data_bytes(mace_pb2.DT_INT32) * 3
+
+    def scratch_size_of_reshape(self, op_def):
+        shape_dims = self.get_op_input_dims(op_def, 1)
+        shape_size = 1
+        for i in range(len(shape_dims)):
+            shape_size *= shape_dims[i]
+
+        return shape_size * self.get_data_bytes(mace_pb2.DT_INT32)
diff --git a/tools/python/micro_converter.py b/tools/python/micro_converter.py
index a6a3dc28fc00e60d03da3e27d7ad93377b7c2578..7e2f16828fea59c65ab214b98610bcfb7079d62b 100644
--- a/tools/python/micro_converter.py
+++ b/tools/python/micro_converter.py
@@ -33,7 +33,6 @@ from utils.util import mace_check
 
 NetDefExcludeFields = {
     'OperatorDef': [
-        'quantize_info',
         'node_id',
         'op_id',
         'padding',
@@ -48,13 +47,15 @@ class MicroConverter:
                  model_name, offset16=False, write_magic=False):
         self.model_conf = model_conf
         data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT)
+        # self.net_def.arg
+        if model_conf.get(ModelKeys.quantize_schema) == "int8":
+            data_type = mace_pb2.DT_INT8
         self.net_def = MicroIoConverter.convert(net_def, data_type)
         self.model_weights = model_weights
         self.model_name = model_name
         self.offset16 = offset16
         self.write_magic = write_magic
         self.code_gen = MicroCodeGen()
-        data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT)
         self.np_data_type = data_type_to_np_dt(data_type, np.float32)
         self.gen_folder = 'micro/codegen/'
         util.mkdir_p(self.gen_folder)
@@ -146,7 +147,15 @@ class MicroConverter:
         tmp_workspace_file = "WORKSPACE"
         os.system("mkdir -p %s && touch %s/%s" %
                   (tmp_dir, tmp_dir, tmp_workspace_file))
-        tar_command = "tar --exclude=micro/tools --exclude=micro/test "
+        tar_command = "tar --exclude=micro/tools"
+        tar_command += " --exclude=micro/test"
+        tar_command += " --exclude=micro/build"
+        tar_command += " --exclude=micro/cmake"
+        tar_command += " --exclude=micro/codegen"
+        tar_command += " --exclude=micro/dockerfiles"
+        tar_command += " --exclude=micro/examples"
+        tar_command += " --exclude=micro/third_party"
+        tar_command += " --exclude=micro/pretrained_models"
         tar_command += " ".join(exclude_list)
         tar_command += " -zcf " + tar_package_path
         tar_command += " micro -C %s %s" % (tmp_dir, tmp_workspace_file)
diff --git a/tools/python/py_proto/__init__.py b/tools/python/py_proto/__init__.py
index c2dfd046da287ce117527f29906242ddff101e0f..c4e6822cf5e54f6fd1148c608537912a43d2c80f 100644
--- a/tools/python/py_proto/__init__.py
+++ b/tools/python/py_proto/__init__.py
@@ -27,6 +27,7 @@ try:
     device.execute("bazel version")
 except:  # noqa
     MaceLogger.warning("No bazel, use cmake.")
+    device.execute("bash tools/cmake/cmake-generate-proto-py-host.sh")
 else:
     try:
         device.execute("bazel build //mace/proto:mace_py")
diff --git a/tools/python/quantize/quantize_util.py b/tools/python/quantize/quantize_util.py
index 410c049300605718b35eccb5b9ff25a78a4efb6d..83f8b2e01efcaf003421b7f43f41e5af1face9e0 100644
--- a/tools/python/quantize/quantize_util.py
+++ b/tools/python/quantize/quantize_util.py
@@ -71,6 +71,14 @@ class QuantizedData(object):
         self._maxval = maxval
 
 
+def adjust_range_int8(in_min, in_max):
+    in_min = min(0.0, in_min)
+    in_max = max(0.0, in_max)
+    scale = (in_max - in_min) / 255
+    zero = int(-in_min / scale - 128)
+    return scale, zero, in_min, in_max
+
+
 def adjust_range(in_min, in_max, device, non_zero):
     if device in [DeviceType.HEXAGON.value, DeviceType.HTA.value]:
         return adjust_range_for_hexagon(in_min, in_max)
@@ -153,6 +161,29 @@ def quantize_with_scale_and_zero(data, scale, zero):
     return quantized_data
 
 
+def quantize_int8(data):
+    np_data = np.array(data).astype(float)
+    in_min = np_data.min()
+    in_max = np_data.max()
+
+    in_min = min(0.0, in_min)
+    in_max = max(0.0, in_max)
+    max_abs = max(abs(in_min), abs(in_max))
+    zero = 0
+    scale = max_abs / 127
+
+    output = np.clip((np.round(zero + np_data / scale).astype(np.int32)),
+                     -127, 127)
+
+    quantized_data = QuantizedData()
+    quantized_data.data = output
+    quantized_data.scale = scale
+    quantized_data.zero = zero
+    quantized_data.minval = -127 * scale
+    quantized_data.maxval = 127 * scale
+    return quantized_data
+
+
 def quantize(data, device, non_zero):
     np_data = np.array(data).astype(float)
     in_min = np_data.min()
diff --git a/tools/python/run_micro.py b/tools/python/run_micro.py
index e100db2c45e32385d1950a354b390d65f187fa1e..11b9afa07a996e1d38998f949c47525a5040ff84 100644
--- a/tools/python/run_micro.py
+++ b/tools/python/run_micro.py
@@ -43,11 +43,15 @@ def join_2d_array(xs):
 def build_engine(model_name, data_type):
     mace_check(flags.model_name is not None and len(model_name) > 0,
                "you should specify model name for build.")
-    command = "cd micro && tools/cmake/cmake-build-host.sh" \
-              " -DMICRO_MODEL_NAME=%s -DCMAKE_BUILD_TYPE=Release" % model_name
+    command = ("micro/tools/cmake/cmake-build-host.sh"
+               " -DMICRO_MODEL_NAME=%s -DMACE_MICRO_ENABLE_CMSIS=ON"
+               " -DCMAKE_BUILD_TYPE=Release" % model_name)
     if data_type == mace_pb2.DT_BFLOAT16:
-        command += " -DMACE_ENABLE_BFLOAT16=ON"
+        command += " -DMACE_MICRO_ENABLE_BFLOAT16=ON"
         print("The current engine's data type is bfloat16.")
+    else:
+        command += " -DMACE_MICRO_ENABLE_BFLOAT16=OFF"
+
     device.execute(command)
 
 
@@ -168,8 +172,9 @@ def run_model_with_conf(flags, args, model_name, model_conf):
     if flags.vlog_level > 0:
         envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level]
 
-    target = Target("micro/build/cmake-build/host/tools/micro_run_static", [],
-                    opts=opts, envs=envs)
+    target = Target("build/micro/host/tools/micro_run_static", [],
+                    opts=opts,
+                    envs=envs)
     run_target.run_target(target_abi, install_dir, target,
                           device_ids="host")
 
diff --git a/tools/python/run_micro_bazel.py b/tools/python/run_micro_bazel.py
deleted file mode 100644
index 9008ec02be7faf0245462595ec153d1f0b8bf87e..0000000000000000000000000000000000000000
--- a/tools/python/run_micro_bazel.py
+++ /dev/null
@@ -1,292 +0,0 @@
-# Copyright 2020 The MACE Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import copy
-import numpy as np
-import shutil
-import tempfile
-
-from micro_converter import MicroConverter
-from py_proto import mace_pb2
-import run_target
-from utils import util
-from utils import device
-from utils import config_parser
-from utils.target import Target
-from utils.config_parser import ModelKeys
-from utils.util import MaceLogger
-from utils.util import mace_check
-import validate
-import layers_validate
-
-
-def join_2d_array(xs):
-    return ":".join([",".join([str(y) for y in x]) for x in xs])
-
-
-def build_engine(model_name, data_type):
-    mace_check(flags.model_name is not None and len(model_name) > 0,
-               "you should specify model name for build.")
-    command = "bazel build //micro/tools:micro_run_static" \
-              " --config optimization " \
-              " --copt \"-DMICRO_MODEL_NAME=%s\"" % model_name
-    if data_type == mace_pb2.DT_BFLOAT16:
-        command += " --copt \"-DMACE_ENABLE_BFLOAT16\""
-        print("The current engine's data type is bfloat16.")
-    device.execute(command)
-
-
-def get_model_conf_by_name(flags, conf):
-    for name, model_conf in conf["models"].items():
-        if not flags.model_name or name == flags.model_name:
-            return model_conf
-    return None
-
-
-def run_model(flags, args, conf):
-    model_conf = get_model_conf_by_name(flags, conf)
-    mace_check(model_conf is not None, "Get model conf failed.")
-    model_conf = config_parser.normalize_model_config(model_conf)
-    run_model_with_conf(flags, args, flags.model_name, model_conf)
-
-
-def gen_sub_model_conf(output_config, flags, conf):
-    model_conf = copy.deepcopy(get_model_conf_by_name(flags, conf))
-    model_conf['subgraphs'][0]['output_tensors'] = \
-        output_config['output_tensors']
-    model_conf['subgraphs'][0]['output_shapes'] = \
-        output_config['output_shapes']
-    return model_conf
-
-
-def run_layers_validate(flags, args, original_conf):
-    model_name = flags.model_name
-    original_model_dir = flags.output + "/" + \
-        original_conf['library_name'] + "/model"
-    model_dir = "/tmp/micro_run/model"
-    device.execute("mkdir -p %s" % model_dir)
-    device.execute("cp -p %s/%s.pb %s" %
-                   (original_model_dir, model_name, model_dir))
-    params_file_path = "%s/%s.data" % (original_model_dir, model_name)
-    output_configs = layers_validate.get_layers(
-        model_dir, model_name, flags.layers)
-
-    for i in range(len(output_configs)):
-        sub_model_conf = gen_sub_model_conf(
-            output_configs[i], flags, original_conf)
-        with open(output_configs[i]['model_file_path'], "rb") as model_file:
-            net_def = mace_pb2.NetDef()
-            net_def.ParseFromString(model_file.read())
-            with open(params_file_path, "rb") as params_file:
-                weights = bytearray(params_file.read())
-                micro_conf = \
-                    config_parser.normalize_model_config(sub_model_conf)
-                MicroConverter(micro_conf, net_def,
-                               weights, model_name).gen_code()
-                build_engine(model_name, micro_conf[ModelKeys.data_type])
-                run_model_with_conf(flags, args, model_name, micro_conf)
-
-
-def run_model_with_conf(flags, args, model_name, model_conf):
-    target_abi = "host"
-    dev = device.HostDevice("host", target_abi)
-    install_dir = "/tmp/micro_run/" + model_name
-
-    if ModelKeys.check_tensors in model_conf:
-        model_conf[ModelKeys.output_tensors] = model_conf[
-            ModelKeys.check_tensors]
-        model_conf[ModelKeys.output_shapes] = model_conf[
-            ModelKeys.check_shapes]
-
-    model_args = {"model_name": model_name,
-                  "input_node": ",".join(
-                      model_conf[ModelKeys.input_tensors]),
-                  "input_shape": join_2d_array(
-                      model_conf[ModelKeys.input_shapes]),
-                  "output_node": ",".join(
-                      model_conf[ModelKeys.output_tensors]),
-                  "output_shape": join_2d_array(
-                      model_conf[ModelKeys.output_shapes]),
-                  "input_data_format": ",".join(
-                      [df.name for df in
-                       model_conf[ModelKeys.input_data_formats]]),
-                  "output_data_format": ",".join(
-                      [df.name for df in
-                       model_conf[ModelKeys.output_data_formats]])
-                  }
-
-    opts = ["--%s=%s" % (arg_key, arg_val) for arg_key, arg_val in
-            model_args.items()] + args
-
-    # generate data start
-    tmp_dir_name = tempfile.mkdtemp()
-    input_file_prefix = tmp_dir_name + "/" + model_name
-    if ModelKeys.validation_inputs_data in model_conf:
-        input_tensor = model_conf[ModelKeys.input_tensors]
-        input_data = model_conf[ModelKeys.validation_inputs_data]
-        mace_check(len(input_tensor) == len(input_data),
-                   "len(input_tensor) != len(validate_data")
-
-        for i in range(len(input_tensor)):
-            util.download_or_get_file(
-                model_conf[ModelKeys.validation_inputs_data][i], "",
-                util.formatted_file_name(input_file_prefix,
-                                         input_tensor[i]))
-    else:
-        generate_input_data(input_file_prefix,
-                            model_conf[ModelKeys.input_tensors],
-                            model_conf[ModelKeys.input_shapes],
-                            model_conf[ModelKeys.input_ranges],
-                            model_conf[ModelKeys.input_data_types])
-
-    dev.install(Target(tmp_dir_name), install_dir + "/validate_in")
-    target_input_file = "%s/validate_in/%s" % (
-        install_dir, model_name)
-    target_output_dir = "%s/validate_out" % install_dir
-    dev.mkdir(target_output_dir)
-    target_output_file = target_output_dir + "/" + model_name
-    opts += ["--input_file=%s" % target_input_file,
-             "--output_file=%s" % target_output_file]
-    # generate data end
-
-    envs = []
-    if flags.vlog_level > 0:
-        envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level]
-
-    target = Target("bazel-bin/micro/tools/micro_run_static", [],
-                    opts=opts, envs=envs)
-    run_target.run_target(target_abi, install_dir, target,
-                          device_ids="host")
-
-    if flags.validate:
-        validate_model_file = util.download_or_get_model(
-            model_conf[ModelKeys.model_file_path],
-            model_conf[ModelKeys.model_sha256_checksum],
-            tmp_dir_name)
-
-        validate_weight_file = ""
-        if ModelKeys.weight_file_path in model_conf:
-            validate_weight_file = util.download_or_get_model(
-                model_conf[ModelKeys.weight_file_path],
-                model_conf[ModelKeys.weight_sha256_checksum],
-                tmp_dir_name)
-
-        dev.pull(Target(target_output_dir), tmp_dir_name + "/validate_out")
-        output_file_prefix = tmp_dir_name + "/validate_out/" + model_name
-        validate.validate(model_conf[ModelKeys.platform],
-                          validate_model_file,
-                          validate_weight_file,
-                          input_file_prefix,
-                          output_file_prefix,
-                          model_conf[ModelKeys.input_shapes],
-                          model_conf[ModelKeys.output_shapes],
-                          model_conf[ModelKeys.input_data_formats],
-                          model_conf[ModelKeys.output_data_formats],
-                          model_conf[ModelKeys.input_tensors],
-                          model_conf[ModelKeys.output_tensors],
-                          flags.validate_threshold,
-                          model_conf[ModelKeys.input_data_types],
-                          flags.backend,
-                          "",
-                          "")
-    shutil.rmtree(tmp_dir_name)
-
-
-def generate_input_data(input_file, input_node, input_shape, input_ranges,
-                        input_data_type):
-    np.random.seed()
-    for i in range(len(input_node)):
-        data = np.random.random(input_shape[i]) * (
-            input_ranges[i][1] - input_ranges[i][0]) + input_ranges[i][0]
-        input_file_name = util.formatted_file_name(input_file, input_node[i])
-        MaceLogger.info('Generate input file: %s' % input_file_name)
-        if input_data_type[i] == mace_pb2.DT_FLOAT:
-            np_data_type = np.float32
-        elif input_data_type[i] == mace_pb2.DT_INT32:
-            np_data_type = np.int32
-
-        data.astype(np_data_type).tofile(input_file_name)
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--config",
-        type=str,
-        default="",
-        help="yaml conf path"
-    )
-    parser.add_argument(
-        "--model_name",
-        type=str,
-        default="",
-        help="model name in yaml conf"
-    )
-    parser.add_argument(
-        "--validate",
-        action="store_true",
-        help="enable validate"
-    )
-    parser.add_argument(
-        "--validate_threshold",
-        type=float,
-        default="0.99",
-        help="validate threshold"
-    )
-    parser.add_argument(
-        "--layers",
-        type=str,
-        default="-1",
-        help="'start_layer:end_layer' or 'layer', similar to python slice."
-             " Use with --validate flag.")
-    parser.add_argument(
-        "--backend",
-        type=str,
-        default="tensorflow",
-        help="onnx backend framework")
-    parser.add_argument(
-        "--build",
-        action="store_true",
-        help="if build before run"
-    )
-    parser.add_argument(
-        '--output',
-        type=str,
-        default="build",
-        help="output dir")
-    parser.add_argument(
-        '--vlog_level',
-        type=int,
-        default="0",
-        help="vlog level")
-
-    return parser.parse_known_args()
-
-
-if __name__ == "__main__":
-    flags, args = parse_args()
-    conf = config_parser.parse(flags.config)
-    if flags.build or flags.validate:
-        micro_conf = config_parser.normalize_model_config(
-            conf[ModelKeys.models][flags.model_name])
-        build_engine(flags.model_name, micro_conf[ModelKeys.data_type])
-    if flags.validate and flags.layers != "-1":
-        run_layers_validate(flags, args, conf)
-    else:
-        run_model(flags, args, conf)
diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py
index 3b6279ff411a9bd7fc2832a40706b1dbe4f6ad7f..73a428c7e65316904aff6b6f5b7e0d0d5b078c17 100644
--- a/tools/python/transform/base_converter.py
+++ b/tools/python/transform/base_converter.py
@@ -19,6 +19,7 @@ from py_proto import mace_pb2
 
 from utils.config_parser import DataFormat
 from utils.config_parser import DeviceType
+from utils.config_parser import Platform
 
 
 # SAME_LOWER: if the amount of paddings to be added is odd,
@@ -88,7 +89,8 @@ class FrameworkType(Enum):
     CAFFE = 1
     ONNX = 2
     MEGENGINE = 3
-    PYTORCH = 4
+    KERAS = 4
+    PYTORCH = 5
 
 
 MaceSupportedOps = [
@@ -294,6 +296,7 @@ class MaceKeyword(object):
     mace_across_ch_str = 'across_channels'
     mace_apu_16bit_per_tensor = 'mace_apu_16bit_per_tensor'
     mace_apu_data_type_arg_str = 'apu_data_type'
+    mace_int8 = 'int8'
 
 
 class TransformerRule(Enum):
@@ -344,6 +347,8 @@ class TransformerRule(Enum):
     TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV = 45
     TRANSFORM_MUL_MAX_TO_PRELU = 46
     TRANSFORM_EXPAND_DIMS_TO_RESHAPE = 47
+    QUANTIZE_FOLD_RELU = 48
+    TRANSFORM_KERAS_QUANTIZE_INFO = 49
 
 
 class ConverterInterface(object):
@@ -425,6 +430,7 @@ class ConverterOption(object):
         self._transformer_option = None
         self._cl_mem_type = "image"
         self._quantize_stat = False
+        self._platform = None
 
     @property
     def input_nodes(self):
@@ -482,6 +488,10 @@ class ConverterOption(object):
     def quantize_stat(self):
         return self._quantize_stat
 
+    @property
+    def platform(self):
+        return self._platform
+
     @input_nodes.setter
     def input_nodes(self, input_nodes):
         for node in input_nodes.values():
@@ -550,6 +560,10 @@ class ConverterOption(object):
     def quantize_stat(self, quantize_stat):
         self._quantize_stat = quantize_stat
 
+    @platform.setter
+    def platform(self, platform):
+        self._platform = platform
+
     def disable_transpose_filters(self):
         if TransformerRule.TRANSPOSE_FILTERS in self._transformer_option:
             self._transformer_option.remove(TransformerRule.TRANSPOSE_FILTERS)
@@ -609,7 +623,7 @@ class ConverterOption(object):
                 TransformerRule.UPDATE_DATA_FORMAT,
                 TransformerRule.TRANSPOSE_DATA_FORMAT,
                 # Need to be put after SORT_BY_EXECUTION
-                TransformerRule.ADD_QUANTIZE_TENSOR_RANGE,
+                TransformerRule.ADD_QUANTIZE_TENSOR_RANGE
             ]
             if self._device == DeviceType.APU.value:
                 self._transformer_option = self._transformer_option + [
@@ -624,12 +638,18 @@ class ConverterOption(object):
             if self._quantize:
                 self._transformer_option = self._transformer_option + [
                     # need to be put after ADD_QUANTIZE_TENSOR_RANGE
+                    TransformerRule.QUANTIZE_FOLD_RELU,
                     TransformerRule.QUANTIZE_NODES,
                     TransformerRule.QUANTIZE_WEIGHTS,
                     TransformerRule.SORT_BY_EXECUTION,
                     TransformerRule.CHECK_QUANTIZE_INFO,
                 ]
 
+                if self._platform == Platform.KERAS:
+                    self._transformer_option = [
+                        TransformerRule.TRANSFORM_KERAS_QUANTIZE_INFO
+                    ] + self._transformer_option
+
 
 class ConverterUtil(object):
     @staticmethod
diff --git a/tools/python/transform/keras_converter.py b/tools/python/transform/keras_converter.py
new file mode 100644
index 0000000000000000000000000000000000000000..67c305321d19383f4a344d9c62dd5704df0fa1ed
--- /dev/null
+++ b/tools/python/transform/keras_converter.py
@@ -0,0 +1,402 @@
+import sys
+import copy
+
+from enum import Enum
+import six
+
+from py_proto import mace_pb2
+from transform import base_converter
+from transform.base_converter import ActivationType
+from transform.base_converter import ConverterUtil
+from transform.base_converter import DataFormat
+from transform.base_converter import EltwiseType
+from transform.base_converter import FrameworkType
+from transform.base_converter import MaceOp
+from transform.base_converter import MaceKeyword
+from transform.base_converter import PoolingType
+from transform.base_converter import PaddingMode
+from transform.base_converter import PadType
+from transform.base_converter import ReduceType
+from transform.base_converter import RoundMode
+from tensorflow import keras
+from tensorflow.python.keras.layers import convolutional
+from quantize import quantize_util
+from utils.util import mace_check
+
+import tensorflow as tf
+import tensorflow_model_optimization as tfmot
+from tensorflow_model_optimization.python.core.\
+    quantization.keras.quantize_layer import QuantizeLayer
+from tensorflow_model_optimization.python.core.\
+    quantization.keras.quantize_wrapper import QuantizeWrapper
+from tensorflow_model_optimization.python.core.\
+    quantization.keras.quantize_annotate import QuantizeAnnotate
+
+padding_mode = {
+    "valid": PaddingMode.VALID,
+    "same": PaddingMode.SAME
+    # 'full': PaddingMode.FULL
+}
+
+
+def dtype2mtype(dtype):
+    if dtype == "float32":
+        return mace_pb2.DT_FLOAT
+    if dtype == "int32":
+        return mace_pb2.DT_INT32
+    if dtype == "int8":
+        return mace_pb2.INT8
+
+    mace_check(False, "data type %s not supported" % dtype)
+    return None
+
+
+def keras_shape2list(shape):
+    dims = shape.as_list()
+    for i in range(len(dims)):
+        if dims[i] is None:
+            dims[i] = 1
+
+    return dims
+
+
+def get_input(keras_op):
+    if hasattr(keras_op, "input_proxy"):
+        return keras_op.input_proxy
+    else:
+        return keras_op.input
+
+
+def get_output(keras_op):
+    if hasattr(keras_op, "output_proxy"):
+        return keras_op.output_proxy
+    else:
+        return keras_op.output
+
+
+activation_type = {
+    "relu": ActivationType.RELU,
+    # 'relu6': ActivationType.RELUX,
+    # 'PReLU': ActivationType.PRELU,
+    # 'TanH': ActivationType.TANH,
+    "sigmoid": ActivationType.SIGMOID
+    # 'Clip': ActivationType.RELUX,
+}
+
+
+class KerasConverter(base_converter.ConverterInterface):
+    """A class for convert tensorflow 2.0 keras h5 model to mace model."""
+
+    def __init__(self, option, src_model_file):
+        self._op_converters = {
+            keras.layers.Flatten: self.convert_flatten,
+            keras.layers.Dense: self.convert_dense,
+            keras.layers.Conv2D: self.convert_conv2d,
+            keras.layers.MaxPooling2D: self.convert_maxpooling2d,
+            keras.layers.Dropout: self.convert_dropout,
+            keras.layers.DepthwiseConv2D: self.convert_depthwise_conv2d,
+            keras.layers.Softmax: self.convert_softmax,
+            QuantizeLayer: self.convert_quantize_layer,
+            QuantizeWrapper: self.convert_quantize_wrapper,
+        }
+
+        self._option = option
+        self._mace_net_def = mace_pb2.NetDef()
+        ConverterUtil.set_filter_format(self._mace_net_def, DataFormat.HWIO)
+        ConverterUtil.add_data_format_arg(self._mace_net_def, DataFormat.NHWC)
+
+        with tfmot.quantization.keras.quantize_scope():
+            self._keras_model = keras.models.load_model(src_model_file)
+
+    def run(self):
+        for op in self._keras_model.layers:
+            mace_check(
+                type(op) in self._op_converters,
+                "Mace does not support keras op type %s yet" % type(op))
+            self._op_converters[type(op)](op)
+
+        return self._mace_net_def
+
+    def convert_general_op(self, keras_op):
+        op = self._mace_net_def.op.add()
+        op.name = keras_op.name
+        data_type_arg = op.arg.add()
+        data_type_arg.name = "T"
+        data_type_arg.i = dtype2mtype(keras_op.dtype)
+        framework_type_arg = op.arg.add()
+        framework_type_arg.name = MaceKeyword.mace_framework_type_str
+        framework_type_arg.i = FrameworkType.KERAS.value
+        ConverterUtil.add_data_format_arg(op, DataFormat.NHWC)
+
+        return op
+
+    def convert_general_op_with_input_output(self, keras_op):
+        op = self._mace_net_def.op.add()
+        op.name = keras_op.name
+        data_type_arg = op.arg.add()
+        data_type_arg.name = "T"
+        data_type_arg.i = dtype2mtype(keras_op.dtype)
+        framework_type_arg = op.arg.add()
+        framework_type_arg.name = MaceKeyword.mace_framework_type_str
+        framework_type_arg.i = FrameworkType.KERAS.value
+        ConverterUtil.add_data_format_arg(op, DataFormat.NHWC)
+
+        op.input.append(get_input(keras_op).name)
+        op.output.append(get_output(keras_op).name)
+        output_shape = op.output_shape.add()
+        output_shape.dims.extend(keras_shape2list(get_output(keras_op).shape))
+
+        return op
+
+    def convert_flatten(self, keras_op):
+        op = self.convert_general_op_with_input_output(keras_op)
+        op.type = MaceOp.Reshape.name
+
+        dim_arg = op.arg.add()
+        dim_arg.name = MaceKeyword.mace_dim_str
+        dim_arg.ints.extend([0, -1])
+
+        return op
+
+    def convert_dense(self, keras_op):
+        op = self.convert_general_op(keras_op)
+        op.type = MaceOp.MatMul.name
+
+        op.input.append(get_input(keras_op).name)
+
+        # Adds kernel tensor
+        op.input.append(keras_op.kernel.name)
+        kernel = self.add_keras_tensor(keras_op.kernel)
+
+        # Adds bias tensor
+        if keras_op.use_bias:
+            op.input.append(keras_op.bias.name)
+            self.add_keras_tensor(keras_op.bias)
+
+        act_op = self.split_activation_op(keras_op, op)
+        return [op, act_op]
+
+    def convert_conv2d(self, keras_op):
+        op = self.convert_general_op(keras_op)
+        op.type = MaceOp.Conv2D.name
+        op.input.append(get_input(keras_op).name)
+
+        # Adds kernel tensor
+        op.input.append(keras_op.kernel.name)
+        kernel = self.add_keras_tensor(keras_op.kernel)
+
+        # Adds bias tensor
+        if keras_op.use_bias:
+            op.input.append(keras_op.bias.name)
+            self.add_keras_tensor(keras_op.bias)
+
+        padding_arg = op.arg.add()
+        padding_arg.name = MaceKeyword.mace_padding_str
+        padding_arg.i = padding_mode[keras_op.padding].value
+
+        strides_arg = op.arg.add()
+        strides_arg.name = MaceKeyword.mace_strides_str
+        strides_arg.ints.extend(keras_op.strides)
+
+        dilation_arg = op.arg.add()
+        dilation_arg.name = MaceKeyword.mace_dilations_str
+        dilation_arg.ints.extend(keras_op.dilation_rate)
+
+        act_op = self.split_activation_op(keras_op, op)
+        return [op, act_op]
+
+    def convert_depthwise_conv2d(self, keras_op):
+        op = self.convert_general_op(keras_op)
+        op.type = MaceOp.DepthwiseConv2d.name
+        op.input.append(get_input(keras_op).name)
+
+        # Adds kernel tensor
+        op.input.append(keras_op.depthwise_kernel.name)
+        kernel = self.add_keras_tensor(keras_op.depthwise_kernel)
+
+        # Adds bias tensor
+        if keras_op.use_bias:
+            op.input.append(keras_op.bias.name)
+            self.add_keras_tensor(keras_op.bias)
+
+        padding_arg = op.arg.add()
+        padding_arg.name = MaceKeyword.mace_padding_str
+        padding_arg.i = padding_mode[keras_op.padding].value
+
+        strides_arg = op.arg.add()
+        strides_arg.name = MaceKeyword.mace_strides_str
+        strides_arg.ints.extend(keras_op.strides)
+
+        dilation_arg = op.arg.add()
+        dilation_arg.name = MaceKeyword.mace_dilations_str
+        dilation_arg.ints.extend(keras_op.dilation_rate)
+
+        act_op = self.split_activation_op(keras_op, op)
+        return [op, act_op]
+
+    def convert_maxpooling2d(self, keras_op):
+        op = self.convert_general_op_with_input_output(keras_op)
+        op.type = MaceOp.Pooling.name
+
+        pooling_type_arg = op.arg.add()
+        pooling_type_arg.name = MaceKeyword.mace_pooling_type_str
+        pooling_type_arg.i = PoolingType.MAX.value
+
+        padding_arg = op.arg.add()
+        padding_arg.name = MaceKeyword.mace_padding_str
+        padding_arg.i = padding_mode[keras_op.padding].value
+
+        strides_arg = op.arg.add()
+        strides_arg.name = MaceKeyword.mace_strides_str
+        strides_arg.ints.extend(keras_op.strides)
+
+        kernels_arg = op.arg.add()
+        kernels_arg.name = MaceKeyword.mace_kernel_str
+        kernels_arg.ints.extend(keras_op.pool_size)
+
+        return op
+
+    def convert_softmax(self, keras_op):
+        op = self.convert_general_op_with_input_output(keras_op)
+        op.type = MaceOp.Softmax.name
+
+        return op
+
+    def convert_dropout(self, keras_op):
+        op = self.convert_general_op_with_input_output(keras_op)
+        op.type = MaceOp.Identity.name
+
+        return op
+
+    def convert_quantize_layer(self, keras_op):
+        op = self._mace_net_def.op.add()
+        op.name = keras_op.name
+        op.type = MaceOp.Identity.name
+        op.input.append(get_input(keras_op).name)
+        op.output.append(get_output(keras_op).name)
+        output_shape = op.output_shape.add()
+        output_shape.dims.extend(keras_shape2list(get_output(keras_op).shape))
+
+        ConverterUtil.add_data_type_arg(op, mace_pb2.DT_FLOAT)
+        ConverterUtil.add_data_format_arg(op, DataFormat.NHWC)
+
+        output_min = keras_op.weights[0].numpy()
+        output_max = keras_op.weights[1].numpy()
+
+        self.add_quantize_info(op, output_min, output_max)
+
+        return op
+
+    def convert_quantize_wrapper(self, keras_op_wrapper):
+        inside_layer = keras_op_wrapper.layer
+        if isinstance(inside_layer, convolutional.DepthwiseConv2D):
+            inside_layer.depthwise_kernel = keras_op_wrapper.weights[1]
+            inside_layer.bias = keras_op_wrapper.weights[0]
+        elif isinstance(inside_layer, convolutional.Conv):
+            inside_layer.kernel = keras_op_wrapper.weights[1]
+            inside_layer.bias = keras_op_wrapper.weights[0]
+        elif isinstance(inside_layer, keras.layers.Dense):
+            inside_layer.kernel = keras_op_wrapper.weights[1]
+            inside_layer.bias = keras_op_wrapper.weights[0]
+
+        # Adds input name for inside layers
+        inside_layer.input_proxy = keras_op_wrapper.input
+        inside_layer.output_proxy = keras_op_wrapper.output
+
+        op = self._op_converters[type(inside_layer)](inside_layer)
+
+        if isinstance(inside_layer, (convolutional.Conv, keras.layers.Dense)):
+            output_min = keras_op_wrapper.weights[6].numpy()
+            output_max = keras_op_wrapper.weights[7].numpy()
+
+            if not isinstance(op, list):
+                self.add_quantize_info(op, output_min, output_max)
+            else:
+                assert len(op) == 2
+                if op[1].type == MaceOp.Softmax.name:
+                    self.add_quantize_info(op[0], output_min, output_max)
+                else:
+                    self.add_quantize_info(op[1], output_min, output_max)
+
+        return op
+
+    def add_keras_tensor(self, keras_tensor):
+        tensor = self._mace_net_def.tensors.add()
+        tensor.name = keras_tensor.name
+        tensor.dims.extend(keras_tensor.shape)
+        tensor.data_type = dtype2mtype(keras_tensor.dtype)
+        tensor.float_data.extend(keras_tensor.numpy().flat)
+        return tensor
+
+    def split_activation_op(self, keras_op, op):
+        activation = keras_op.get_config()["activation"]
+        if "class_name" in activation:
+            assert activation["class_name"] == "QuantizeAwareActivation"
+            activation = activation["config"]["activation"]
+
+        if activation == "linear":
+            op.output.append(get_output(keras_op).name)
+            output_shape = op.output_shape.add()
+            output_shape.dims.extend(
+                keras_shape2list(get_output(keras_op).shape)
+            )
+
+            return None
+        else:
+            activation_tmp_name = get_output(keras_op).name + "_act"
+            op.output.append(activation_tmp_name)
+            output_shape = op.output_shape.add()
+            output_shape.dims.extend(
+                keras_shape2list(get_output(keras_op).shape)
+            )
+
+            activation_op = self._mace_net_def.op.add()
+            activation_op.name = keras_op.name + "_act"
+            if activation == "softmax":
+                activation_op.type = MaceOp.Softmax.name
+            else:
+                activation_op.type = MaceOp.Activation.name
+                type_arg = activation_op.arg.add()
+                type_arg.name = MaceKeyword.mace_activation_type_str
+                type_arg.s = six.b(activation_type[activation].name)
+
+            activation_op.input.append(activation_tmp_name)
+            activation_op.output.append(get_output(keras_op).name)
+            output_shape = activation_op.output_shape.add()
+            output_shape.dims.extend(
+                keras_shape2list(get_output(keras_op).shape)
+            )
+
+            data_type_arg = activation_op.arg.add()
+            data_type_arg.name = "T"
+            data_type_arg.i = dtype2mtype(keras_op.dtype)
+            framework_type_arg = activation_op.arg.add()
+            framework_type_arg.name = MaceKeyword.mace_framework_type_str
+            framework_type_arg.i = FrameworkType.KERAS.value
+            ConverterUtil.add_data_format_arg(activation_op, DataFormat.NHWC)
+
+            return activation_op
+
+    def add_quantize_info(self, op, minval, maxval):
+        quantize_schema = self._option.quantize_schema
+        if quantize_schema == MaceKeyword.mace_apu_16bit_per_tensor:
+            maxval = max(abs(minval), abs(maxval))
+            minval = -maxval
+            scale = maxval / 2 ** 15
+            zero = 0
+        elif quantize_schema == MaceKeyword.mace_int8:
+            scale, zero, minval, maxval = quantize_util.adjust_range_int8(
+                minval, maxval
+            )
+        else:
+            scale, zero, minval, maxval = quantize_util.adjust_range(
+                minval, maxval, self._option.device, non_zero=False
+            )
+
+        quantize_info = op.quantize_info.add()
+        quantize_info.minval = minval
+        quantize_info.maxval = maxval
+        quantize_info.scale = scale
+        quantize_info.zero_point = zero
+
+        return quantize_info
diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py
index c26f275350718dab74ad65ac68106f65c3859f11..c541e885892890e88e0cf691af31b2ec77b63d8c 100644
--- a/tools/python/transform/transformer.py
+++ b/tools/python/transform/transformer.py
@@ -33,6 +33,7 @@ from transform.base_converter import MaceTransposableDataFormatOps  # noqa
 from transform.base_converter import PaddingMode
 from transform.base_converter import ReduceType
 from transform.base_converter import TransformerRule
+from utils.config_parser import Platform
 from quantize import quantize_util
 from utils.util import mace_check
 
@@ -121,6 +122,10 @@ class Transformer(base_converter.ConverterInterface):
                 self.transform_mul_max_to_prelu,
             TransformerRule.TRANSFORM_EXPAND_DIMS_TO_RESHAPE:
                 self.transform_expand_dims_to_reshape,
+            TransformerRule.QUANTIZE_FOLD_RELU:
+                self.quantize_fold_relu,
+            TransformerRule.TRANSFORM_KERAS_QUANTIZE_INFO:
+                self.transform_keras_quantize_info
         }
 
         self._option = option
@@ -1010,7 +1015,7 @@ class Transformer(base_converter.ConverterInterface):
         """Transform global conv to fc should be placed after transposing
         input/output and filter"""
 
-        if self._option.quantize:
+        if self._option.quantize or self._option.platform == Platform.KERAS:
             return
 
         net = self._model
@@ -1119,9 +1124,10 @@ class Transformer(base_converter.ConverterInterface):
         transposed_filter = set()
         transposed_deconv_filter = set()
 
-        if ((self._option.quantize and
+        if (((self._option.quantize and
                 self._option.device == DeviceType.CPU.value) or
-                self._option.device == DeviceType.APU.value):
+                self._option.device == DeviceType.APU.value) and
+                (not self._option.quantize_schema == MaceKeyword.mace_int8)):
             print("Transpose filters to OHWI")
             if filter_format == DataFormat.HWIO:
                 transpose_order = [3, 0, 1, 2]
@@ -1310,6 +1316,9 @@ class Transformer(base_converter.ConverterInterface):
         return False
 
     def transform_matmul_to_fc(self):
+        if self._option.platform == Platform.KERAS:
+            return
+
         net = self._model
         filter_format = self.filter_format()
         for op in net.op:
@@ -1701,6 +1710,8 @@ class Transformer(base_converter.ConverterInterface):
                 if self._option.quantize_schema == \
                         MaceKeyword.mace_apu_16bit_per_tensor:
                     data_type_arg.i = mace_pb2.DT_INT16
+                elif self._option.quantize_schema == MaceKeyword.mace_int8:
+                    data_type_arg.i = mace_pb2.DT_INT8
                 else:
                     data_type_arg.i = mace_pb2.DT_UINT8
             elif data_type_arg.i == mace_pb2.DT_UINT8:
@@ -1715,6 +1726,13 @@ class Transformer(base_converter.ConverterInterface):
                            or op.type == MaceOp.Dequantize.name,
                            "Only Quantization ops support int16, "
                            "but got %s(%s)" % (op.name, op.type))
+            elif data_type_arg.i == mace_pb2.DT_INT8 \
+                and self._option.quantize_schema == \
+                    MaceKeyword.mace_int8:
+                mace_check(op.type == MaceOp.Quantize.name
+                           or op.type == MaceOp.Dequantize.name,
+                           "Only Quantization ops support int8, "
+                           "but got %s(%s)" % (op.name, op.type))
             else:
                 mace_check(op.type == MaceOp.Quantize.name,
                            "Quantization only support float ops, "
@@ -1739,6 +1757,8 @@ class Transformer(base_converter.ConverterInterface):
             if self._option.quantize_schema == \
                     MaceKeyword.mace_apu_16bit_per_tensor:
                 ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT16)
+            elif self._option.quantize_schema == MaceKeyword.mace_int8:
+                ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT8)
             else:
                 ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
             ConverterUtil.add_data_format_arg(op_def, input_node.data_format)
@@ -1766,6 +1786,8 @@ class Transformer(base_converter.ConverterInterface):
             if self._option.quantize_schema == \
                     MaceKeyword.mace_apu_16bit_per_tensor:
                 ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT16)
+            elif self._option.quantize_schema == MaceKeyword.mace_int8:
+                ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT8)
             else:
                 ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8)
             ConverterUtil.add_data_format_arg(op_def, output_node.data_format)
@@ -1828,6 +1850,10 @@ class Transformer(base_converter.ConverterInterface):
                 quantized_tensor = \
                     quantize_util.quantize_int16(tensor.float_data)
                 tensor.data_type = mace_pb2.DT_INT16
+            elif self._option.quantize_schema == MaceKeyword.mace_int8:
+                quantized_tensor = quantize_util.quantize_int8(
+                    tensor.float_data)
+                tensor.data_type = mace_pb2.DT_INT8
             else:
                 non_zero = self._option.device == DeviceType.CPU.value
                 quantized_tensor = quantize_util.quantize(tensor.float_data,
@@ -1890,6 +1916,9 @@ class Transformer(base_converter.ConverterInterface):
             minval = -maxval
             scale = maxval / 2**15
             zero = 0
+        elif quantize_schema == MaceKeyword.mace_int8:
+            scale, zero, minval, maxval = quantize_util.adjust_range_int8(
+                minval, maxval)
         else:
             scale, zero, minval, maxval = \
                 quantize_util.adjust_range(minval, maxval, self._option.device,
@@ -2001,6 +2030,9 @@ class Transformer(base_converter.ConverterInterface):
                         min_val = -max_val
                         scale = max_val / 2**15
                         zero = 0
+                    elif quantize_schema == MaceKeyword.mace_int8:
+                        scale, zero, min_val, max_val = \
+                            quantize_util.adjust_range_int8(min_val, max_val)
                     else:
                         scale, zero, min_val, max_val = \
                             quantize_util.adjust_range(min_val, max_val,
@@ -2042,6 +2074,10 @@ class Transformer(base_converter.ConverterInterface):
                     minval = -maxval
                     scale = maxval / 2**15
                     zero = 0
+                elif quantize_schema == MaceKeyword.mace_int8:
+                    scale, zero, minval, maxval = \
+                        quantize_util.adjust_range_int8(
+                            input_node.range[0], input_node.range[1])
                 else:
                     scale, zero, minval, maxval = \
                         quantize_util.adjust_range(input_node.range[0],
@@ -2619,3 +2655,38 @@ class Transformer(base_converter.ConverterInterface):
                 del op.arg[:]
                 return True
         return False
+
+    def quantize_fold_relu(self):
+        if self._option.quantize_schema != MaceKeyword.mace_int8:
+            return
+
+        net = self._model
+
+        for op in net.op:
+            if op.type == MaceOp.Activation.name:
+                act_type_arg = ConverterUtil.get_arg(
+                    op, MaceKeyword.mace_activation_type_str)
+                act_type = act_type_arg.s.decode()
+
+                if act_type in ["RELU", "RELUX"]:
+                    producer = self._producer[op.input[0]]
+                    # The type of "producer" is not limited to MatMul,
+                    # you can try other types
+                    if producer.type == MaceOp.MatMul.name:
+                        self.replace_quantize_info(producer, op)
+                        self.safe_remove_node(op, producer)
+                        return True
+
+        return False
+
+    def transform_keras_quantize_info(self):
+        mace_check(self._option.platform == Platform.KERAS, "For KERAS models")
+        changed = False
+        for op in self._model.op:
+            for i in range(len(op.quantize_info)):
+                if not op.output[i] in self._quantize_activation_info:
+                    self._quantize_activation_info[op.output[i]] = \
+                        op.quantize_info[i]
+                    changed = True
+
+        return changed
diff --git a/tools/python/utils/config_parser.py b/tools/python/utils/config_parser.py
index 36c502c1c8cb323fced11e0ab1b3d22804d16658..5521d4e8e81dc42c8dfffd2f213a6031320fac91 100644
--- a/tools/python/utils/config_parser.py
+++ b/tools/python/utils/config_parser.py
@@ -151,7 +151,8 @@ class Platform(Enum):
     CAFFE = 1
     ONNX = 2
     MEGENGINE = 3
-    PYTORCH = 4
+    KERAS = 4
+    PYTORCH = 5
 
 
 def parse_platform(str):
diff --git a/tools/python/utils/convert_util.py b/tools/python/utils/convert_util.py
index ba6a5cce637e1d865dd664ab824e41cd44079012..7b597e3ac346acca039d8e7805965a06aa573977 100644
--- a/tools/python/utils/convert_util.py
+++ b/tools/python/utils/convert_util.py
@@ -48,6 +48,10 @@ def merge_params(net_def, data_type):
             data = bytearray(
                 np.array(tensor.int32_data).astype(np.uint8).tolist())
             tensor.data_size = len(tensor.int32_data)
+        elif tensor.data_type == mace_pb2.DT_INT8:
+            data = bytearray(
+                np.array(tensor.int32_data).astype(np.uint8).tolist())
+            tensor.data_size = len(tensor.int32_data)
         elif tensor.data_type == mace_pb2.DT_FLOAT16:
             data = bytearray(
                 np.array(tensor.float_data).astype(np.float16).tobytes())
@@ -85,6 +89,8 @@ def merge_params(net_def, data_type):
             del tensor.int32_data[:]
         elif tensor.data_type == mace_pb2.DT_UINT8:
             del tensor.int32_data[:]
+        elif tensor.data_type == mace_pb2.DT_INT8:
+            del tensor.int32_data[:]
 
     return net_def, model_data
 
@@ -100,5 +106,7 @@ def data_type_to_np_dt(data_type, default_np_dt):
         return np.uint8
     elif data_type == mace_pb2.DT_BFLOAT16:
         return np.uint16
+    elif data_type == mace_pb2.DT_INT8:
+        return np.int8
     else:
         return np.float32