diff --git a/.gitignore b/.gitignore index 91e5e303e1011a0185e2f0b04b4183ddd16285ae..075175a124d0a084b1e4f987353a6cc297dd576b 100644 --- a/.gitignore +++ b/.gitignore @@ -22,12 +22,14 @@ mace/codegen/version/ mace/codegen/engine/ mace/codegen/lib/ -micro/codegen/models/ -micro/codegen/engines/ - examples/android/macelibrary/src/main/cpp/mace/ examples/android/macelibrary/src/main/cpp/include/ examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/ examples/android/macelibrary/src/main/jniLibs/arm64-v8a/ tools/python/py_proto/*_pb2.py + +micro/codegen/models/ +micro/codegen/engines/ +micro/examples/micro +micro/build \ No newline at end of file diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 0b0e2436368096a3d72e571f8106e0666b22943b..659b7c9a0785ed3ba53e9474ba3981a9678a847d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -19,7 +19,7 @@ cpplint: pylint: stage: linting script: - - pycodestyle $(find -name "*.py") + - pycodestyle . --filename=*.py --exclude=examples,third_party build_docs: stage: build @@ -111,14 +111,12 @@ mace_cc_test: DEVICE_CONF_FILE=generic-mobile-devices/devices.yml fi - python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS - - python tools/bazel_adb_run.py --target="//micro/test/ccunit:micro_ops_test" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a mace_cc_benchmark: stage: test script: - if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi - python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*" - - python tools/bazel_adb_run.py --target="//micro/test/ccbenchmark:micro_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a only: - triggers @@ -145,14 +143,6 @@ model_tests: - python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file - python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark - - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml - - python tools/converter.py convert --config=${CONF_FILE} --enable_micro - - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn - - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark - - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml - - python tools/converter.py convert --config=${CONF_FILE} --enable_micro - - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn - - rm -rf mace-models quantization_tests: stage: test @@ -206,3 +196,4 @@ micro-child: trigger: include: - 'micro/.gitlab-ci.yml' + strategy: depend diff --git a/.gitmodules b/.gitmodules index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..410fef8a41eb862c1d10be82b195eb35cde1ac18 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,13 @@ +[submodule "micro/third_party/CMSIS_5"] + path = micro/third_party/CMSIS_5 + url = https://github.com/ARM-software/CMSIS_5.git + shallow = true +[submodule "micro/third_party/googletest"] + path = micro/third_party/googletest + url = https://github.com/google/googletest.git + shallow = true +[submodule "micro/third_party/gflags"] + path = micro/third_party/gflags + url = https://github.com/gflags/gflags.git + shallow = true + diff --git a/docker/mace-micro-dev/mace-micro-dev.dockerfile b/docker/mace-micro-dev/mace-micro-dev.dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..abc1f132fbce20ccf42f2ee9e8eaf5ec38dfb4e3 --- /dev/null +++ b/docker/mace-micro-dev/mace-micro-dev.dockerfile @@ -0,0 +1,15 @@ +FROM ubuntu:18.04 + +RUN apt-get update +RUN apt-get install -y wget +RUN apt-get install -y g++ gcc +RUN apt-get install -y gcc-arm-none-eabi +RUN apt-get install -y python3 python3-pip git mercurial + +RUN wget https://cdn.cnbj1.fds.api.mi-img.com/mace/third-party/cmake-3.18.3-Linux-x86_64.sh +RUN chmod +x cmake-3.18.3-Linux-x86_64.sh && ./cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=/usr + +RUN python3 -m pip install -U pip +RUN python3 -m pip install jinja2 pyyaml sh numpy six filelock +RUN python3 -m pip install tensorflow==2.3.0 tensorflow_model_optimization +RUN python3 -m pip install mbed-cli diff --git a/docs/micro-controllers/basic_usage.rst b/docs/micro-controllers/basic_usage.rst index a1228ecced3696772b39f4d97d51077b8f203a86..853a3fc6e9396eea39ac42761126c2cdc8580de2 100644 --- a/docs/micro-controllers/basic_usage.rst +++ b/docs/micro-controllers/basic_usage.rst @@ -1,128 +1,217 @@ Basic usage for Micro Controllers ================================== +MACE Micro is a lightweight neural network inference engine for MCUs and low-power DSPs. +At now we support Cortex-M MCUs and Qualcomm Hexagon DSPs. You can get our projects from GitHub. -Build and run an example model -------------------------------- +Get MACE Micro Projects +----------------------- -At first, make sure the environment has been set up correctly already (refer to :doc:`../installation/env_requirement`). +MACE Micro is a sub project of MACE, so you can get it from MACE. -The followings are instructions about how to quickly build and run a provided model in -`MACE Model Zoo `__. +.. code-block:: sh -Here we use the har-cnn model as an example. + git clone https://github.com/XiaoMi/mace.git + # Inits submodules by yourself + cd mace && git submodule update --init micro && cd .. -**Commands** +Environment Requirements +------------------------ - 1. Pull `MACE `__ project. +On a ubuntu18.04/20.04 PC, do the following steps. - .. code-block:: sh +.. code-block:: sh - git clone https://github.com/XiaoMi/mace.git - cd mace/ - git fetch --all --tags --prune + apt-get update + apt-get install -y wget - # Checkout the latest tag (i.e. release version) - tag_name=`git describe --abbrev=0 --tags` - git checkout tags/${tag_name} + apt-get install -y g++ + # Required for Cortex-M MCUs + apt-get install -y gcc-arm-none-eabi + apt-get install -y python3 python3-pip - .. note:: + python3 -m pip install jinja2 pyyaml sh numpy six filelock + # Installs cmake above 3.13.0 + wget https://cdn.cnbj1.fds.api.mi-img.com/mace/third-party/cmake-3.18.3-Linux-x86_64.sh + chmod +x cmake-3.18.3-Linux-x86_64.sh && ./cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=/usr - It's highly recommended to use a release version instead of master branch. + python3 -m pip install -U pip + # The Tensorflow version depends on your model + # The Tensroflow 1.x frozen model and Tensorflow 2.x Keras model are both supported + python3 -m pip install tensorflow==2.3.0 + python3 -m pip install tensorflow_model_optimization +You also can use a docker as the environment. - 2. Pull `MACE Model Zoo `__ project. +.. code-block:: sh - .. code-block:: sh + cd mace/docker/mace-micro-dev + docker build . -f mace-micro-dev.dockerfile --tag mace-micro-dev + cd ../../.. + # Maps your workspace to docker container + docker run -ti -v $(pwd):/workspace/ -w /workspace mace-micro-dev - git clone https://github.com/XiaoMi/mace-models.git +Convert a model to c++ code +---------------------------- - 3. Convert the pre-trained har-cnn model to c++ code. +Here we use a pre-trained model of the MNIST database, - .. code-block:: sh +.. code-block:: sh - cd path/to/mace - # output lib path: build/har-cnn/model/har_cnn_micro.tar.gz - CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml - python tools/python/convert.py --config=$CONF_FILE --enable_micro + cd mace + # Converts a tensorflow 2.x keras model, you need install python3 and tensorflow==2.x additional + python3 tools/python/convert.py --config=micro/pretrained_models/keras/mnist/mnist.yml --enable_micro - 4. Build Micro-Controllers engine and models to library on host. +Model config file +----------------- - .. code-block:: sh +The following is a completed model config file, - cd micro - ./tools/cmake/cmake-build-host.sh +.. code-block:: sh - .. note:: + library_name: har + target_abis: [host] + model_graph_format: file + model_data_format: file + models: + har_int8: + platform: keras + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5 + model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd + subgraphs: + - input_tensors: + - "conv2d_1_input:0" + input_shapes: + - 1, 90, 3, 1 + input_ranges: + - -5, 15 + output_tensors: + - "dense_3/Softmax:0" + output_shapes: + - "1, 6" + runtime: cpu + data_type: fp32_fp32 + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 + quantize: 1 + quantize_schema: int8 + quantize_range_file: /workspace/mace/micro/pretrained_models/keras/har/har.range - - The build result ``build/cmake-build/host/libmicro.a``'s abi is host, if you want to run the model on micro controllers, you should build the code with the right toolchain, for example +For the bfloat16 model, - .. code-block:: sh - cd micro - export HEXAGON_SDK_ROOT=/home/user/Qualcomm/Hexagon_SDK/3.4.1 - export HEXAGON_TOOLS=/home/user/Qualcomm/HEXAGON_Tools/6.4.06 - ./tools/cmake/cmake-build-hexagon6.sh +.. code-block:: yaml - 5. Run the model on host. + data_type: bf16_fp32 - .. code-block:: sh +For the int8 model, - CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml - # Run - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build +.. code-block:: yaml - # Test model run time - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --round=100 + quantize: 1 + quantize_schema: int8 + # Required when your model has not quantize info + quantize_range_file: range_file_path - # Validate the correctness by comparing the results against the - # original model and framework, measured with cosine distance for similarity. - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate - # Validate the layers' correctness. - python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate --layers 0:-1 +Build MACE Micro and models libraries +-------------------------------------- -Deploy your model into applications +Here, we build the MACE Micro engine and models to libraries on a linux host machine. The CMake build parameters depends on your model config file. + +For float32 model, + +.. code-block:: sh + + ./micro/tools/cmake/cmake-build-host.sh + +For bfloat16 model, + +.. code-block:: sh + + ./micro/tools/cmake/cmake-build-host.sh -DMACE_MICRO_ENABLE_BFLOAT16=ON + +.. note:: + + You can only use either float32 or bfloat16 + +For int8 model, + +.. code-block:: sh + + ./micro/tools/cmake/cmake-build-host.sh -DMACE_MICRO_ENABLE_CMSIS=ON + +Use libraries directly +----------------------- + +With these steps, we can find necessary libraries and headers in the "build/micro/host/install" directory, you can use the libraries directly. + +.. code-block:: sh + + # Builds example + g++ micro/examples/classifier/main.cc -DMICRO_MODEL_NAME=mnist -DMICRO_DATA_NAME=mnist -I build/micro/host/install/include/ -L build/micro/host/install/lib/ -lmicro -lmodels -lmicro -o mnist + # Runs the mnist example + ./mnist + + +Code example ------------------------------------ -Please refer to \ ``/mace/micro/tools/micro_run.cc`` for full usage. The following list the key steps. +The following code is the mnist example source files, which the main steps is annotated .. code-block:: cpp - // Include the headers - #include "micro/include/public/micro.h" - - // 1. Create MaceMicroEngine instance - MaceMicroEngine *micro_engine = nullptr; - MaceStatus status = har_cnn::GetMicroEngineSingleton(µ_engine); - - // 1. Create and register Input buffers - std::vector> inputs; - std::vector input_sizes; - for (size_t i = 0; i < input_shapes.size(); ++i) { - input_sizes.push_back(std::accumulate(input_shapes[i].begin(), - input_shapes[i].end(), sizeof(float), - std::multiplies())); - inputs.push_back(std::shared_ptr(new char[input_sizes[i]], - std::default_delete())); - } - // TODO: fill data into input buffers - for (size_t i = 0; i < input_names.size(); ++i) { - micro_engine->RegisterInputData(i, inputs[i].get(), - input_shapes[i].data()); + #include "data/mnist.h" + + #include + + // Include MACE Micro header + #include "micro.h" + + namespace micro { + namespace minst { + + // We use forward declaration to avoid include the special engine header + MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine); + } + } // namespace micro - // 3. Run the model - MaceStatus status = micro_engine->Run(); + int main() { + // Step 1, get the mnist micro engine + micro::MaceMicroEngine *micro_engine = NULL; + micro::MaceStatus status = + micro::mnist::GetMicroEngineSingleton(µ_engine); - // 4. Get the results - for (size_t i = 0; i < output_names.size(); ++i) { - void *output_buffer = nullptr; - const int32_t *output_dims = nullptr; + // Step 2, set input data + static float *input_data = data_mnist_4; + int32_t input_dims[4] = {1, 28, 28, 1}; + micro_engine->RegisterInputData(0, input_data, input_dims); + + // Step3, run the inference + micro_engine->Run(); + + // Step 4, get output data + float *output_buffer = NULL; + const int32_t *output_dims = NULL; uint32_t dim_size = 0; - MaceStatus status = - micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size); - // TODO: the result data is in output_buffer, you can not delete output_buffer. + micro_engine->GetOutputData( + 0, reinterpret_cast(&output_buffer), &output_dims, &dim_size); + + for (int32_t i = 0; i < output_dims[1]; ++i) { + printf("%d: %f\n", i, output_buffer[i]); + } + + return 0; } + +For more examples, goto the directory "micro/examples" + +Performance +----------- + +We deploy a `HAR-CNN `__ int8 model on the NUCLEO-F767ZI(Cortex-M7) board. Each inference of HAR CNN model takes 12 ms. \ No newline at end of file diff --git a/docs/micro-controllers/deploy.rst b/docs/micro-controllers/deploy.rst new file mode 100644 index 0000000000000000000000000000000000000000..fa480beb472fc0872cd80bb047ec7c4b5b7556dd --- /dev/null +++ b/docs/micro-controllers/deploy.rst @@ -0,0 +1,48 @@ +Deploy +====== + +MACE Micro module is written in c++98 and only depends on . +We can write a CMake toolchain file to build the program for the special platform. + +For Cortex-M MCU +---------------- + +Now we deploy the MNIST classifier example on a NUCLEO-F767ZI development with the Mbed OS. +Install a GCC Arm Embedded compiler by the terminal. + +.. code-block:: sh + + # Installs gcc arm + sudo apt-get install gcc-arm-none-eabi + +Refer to to install Mbed OS tools. + +Now we can convert the model and build the program, + +.. code-block:: sh + + python3 tools/python/convert.py --config=micro/pretrained_models/keras/mnist/mnist-int8.yml --enable_micro + ./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh -DARM_CPU=cortex-m7 -DMACE_MICRO_ENABLE_CMSIS=ON -DMACE_MICRO_ENABLE_HARDFP=OFF + +The "-DARM_CPU=cortex-{m7|m4|..}" is a necessary CMake variable for different series of Arm MCUs. +You can use the Mace Micro install package("build/micro/gcc-arm-none-eabi/install") in yourself project. Here we use "mbed-cli" to compile it + +.. code-block:: sh + + # cp the MACE Micro libraries to the workspace directory + cp build/micro/gcc-arm-none-eabi/install micro/examples/classifier -r + cd micro/examples/classifier + # Compile the program + mbed compile -t GCC_ARM -m NUCLEO_F767ZI -D MICRO_MODEL_NAME=mnist_int8 -D MICRO_DATA_NAME=mnist + # Flash the program to the development board + cp BUILD/NUCLEO_F767ZI/GCC_ARM/classifier.bin /media/$USER/NODE_F767ZI + # Connet to the default COM port + sudo chown $USER:$USER /dev/ttyACM0 + mbed sterm + +Press the reset(black) button to run the example again. + +For Hexagon DSP +--------------- + +In the micro/cmake/toolchain folder, there are two hexagon CMake toolchain files for reference, For more details, please goto \ No newline at end of file diff --git a/docs/micro-controllers/op_lists.rst b/docs/micro-controllers/op_lists.rst new file mode 100644 index 0000000000000000000000000000000000000000..e95be8aee87902bc85edbd731e131d4e5c8bba2c --- /dev/null +++ b/docs/micro-controllers/op_lists.rst @@ -0,0 +1,34 @@ +Operator lists +=============== + +Float32 and bfloat16 operators + +* batch_norm +* conv_2d +* depthwise_conv_2d +* pooling +* activation +* argmax +* bias_add +* cast +* concat +* eltwise +* expand_dims +* matmul +* reduce +* reshape +* softmax +* squeeze +* stack +* stride_slice + +Int8 operators + +* conv_2d +* depthwsie_conv_2d +* eltwise +* mat_mul +* pooling +* softmax +* quantize +* dequantize diff --git a/mace/proto/CMakeLists.txt b/mace/proto/CMakeLists.txt index 1fc025ee07ef9f7944e43e82800b3776c3a7870a..a2e70afc6323329564c18e1e787801ead67953d0 100644 --- a/mace/proto/CMakeLists.txt +++ b/mace/proto/CMakeLists.txt @@ -1,39 +1,42 @@ -set(MACE_PROTO_PROTOS mace.proto) -set(MACE_PROTO_SRCS) -set(MACE_PROTO_HDRS) -set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto) - -foreach(proto_file ${MACE_PROTO_PROTOS}) +macro(generate_proto proto_file) get_filename_component(proto_file_abs ${proto_file} ABSOLUTE) get_filename_component(basename ${proto_file} NAME_WE) - set(PROTO_GENERATED_FILES ${basename}.pb.cc ${basename}.pb.h) - list(APPEND MACE_PROTO_SRCS ${basename}.pb.cc) - list(APPEND MACE_PROTO_HDRS ${basename}.pb.h) + set(${basename}_proto_files ${basename}.pb.cc ${basename}.pb.h) + set(${basename}_proto_srcs ${basename}.pb.cc) add_custom_command( - OUTPUT ${PROTO_GENERATED_FILES} + OUTPUT ${basename}_proto_files COMMAND ${PROTOC_BIN} --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs} - COMMENT "Generating ${PROTO_GENERATED_FILES} from ${proto_file}" + COMMENT "Generating ${basename}_proto_files from ${proto_file}" DEPENDS protoc_bin VERBATIM ) - set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py) + set(PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto) + set(PROTO_GENERATED_PY_FILES ${PROTO_PYTHON_DIR}/${basename}_pb2.py) add_custom_command( OUTPUT ${PROTO_GENERATED_PY_FILES} - COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs} + COMMAND ${PROTOC_BIN} --python_out ${PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs} COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}" DEPENDS protoc_bin VERBATIM ) -endforeach() -add_custom_target(mace_proto_src DEPENDS ${PROTO_GENERATED_FILES} + add_custom_target(${basename}_proto_cpp DEPENDS ${basename}_proto_files COMMENT "Checking if re-generation is required") -add_custom_target(mace_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES}) + add_custom_target(${basename}_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES}) +endmacro() + +generate_proto(mace.proto) +generate_proto(micro_mem.proto) -add_library(proto ${MACE_PROTO_SRCS}) +add_library(proto ${mace_proto_srcs}) +add_dependencies(proto mace_proto_cpp) +set_source_files_properties( + ${mace_proto_srcs} + PROPERTIES GENERATED TRUE +) target_link_libraries(proto libprotobuf_lite) install(TARGETS proto ARCHIVE DESTINATION lib) diff --git a/mace/proto/mace.proto b/mace/proto/mace.proto index 0997046095be6325f70a70f4626f97ba32e81857..ca031951884529e9cc1b2cde22a841591ae8cb54 100644 --- a/mace/proto/mace.proto +++ b/mace/proto/mace.proto @@ -16,6 +16,7 @@ enum DataType { DT_FLOAT16 = 5; DT_BFLOAT16 = 6; DT_INT16 = 7; + DT_INT8 = 8; } enum MemoryType { diff --git a/micro/.gitignore b/micro/.gitignore deleted file mode 100644 index 7f1bd1122eeab9d7ebc6363eb1473b28b1088823..0000000000000000000000000000000000000000 --- a/micro/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -build -test/**/codegen diff --git a/micro/.gitlab-ci.yml b/micro/.gitlab-ci.yml index 6f244d5db207a4b47126b5d75834b7ed3406db68..2bd826c14c82e92cab84509bf59fa6b2da92f8b2 100644 --- a/micro/.gitlab-ci.yml +++ b/micro/.gitlab-ci.yml @@ -1,3 +1,8 @@ +before_script: + - git submodule deinit -f . + - git submodule sync + - git submodule update --init . + stages: - convert - build @@ -6,23 +11,18 @@ stages: model-convert: stage: convert script: - - rm -rf mace-models - - GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git - - > - - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml - - python tools/python/convert.py --config=${CONF_FILE} --enable_micro + - bash micro/tools/ci/model_convert.sh artifacts: paths: - mace-models untracked: true -host-build: +cross-build: stage: build script: - - cd micro && ./tools/cmake/cmake-build-host.sh -DMICRO_MODEL_NAME=har_cnn + - bash micro/tools/ci/cross_build.sh + - bash micro/tools/ci/host_build_and_run_examples.sh + - bash micro/tools/ci/host_build_and_run_tests.sh + # The mbed-cli protobuf version conflicts with others + # - bash micro/tools/ci/build_mbed_example.sh -host-test: - stage: test - script: - - CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml - - python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn diff --git a/micro/CMakeLists.txt b/micro/CMakeLists.txt index 72f2b509930a0f22fa0a78f30a6a5b361af23171..1914b98f12b726fab8a0010e58e028e7d54eb601 100644 --- a/micro/CMakeLists.txt +++ b/micro/CMakeLists.txt @@ -1,8 +1,48 @@ -cmake_minimum_required(VERSION 3.7 FATAL_ERROR) -message("CMAKE_VERSION: ${CMAKE_VERSION}") -project(micro C CXX) +cmake_minimum_required(VERSION 3.13 FATAL_ERROR) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") +project(micro C CXX ASM) + +# CMSIS_5 requires C99 +set(CMAKE_C_STANDARD 99) + +add_compile_options("-Wall;-Wextra") + +option(MACE_MICRO_ENABLE_CMSIS "Whether to enable cmsis driver" OFF) +option(MACE_MICRO_ENABLE_BFLOAT16 "Whether to enable bfloat16 support" OFF) +option(MACE_MICRO_ENABLE_TESTS "Whether to enable Mace Micro tests" OFF) +option(MACE_MICRO_ENABLE_EXAMPLES "Whether to enable Mace Micro examples" OFF) + +if(MACE_MICRO_GCC_ARM) + include(cmake/config_gcc_arm.cmake) +endif() + +#set CMAKE_BUILD_TYPE default value as Release +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release" + CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." + FORCE) +endif() + +if(MACE_MICRO_ENABLE_CMSIS) + function(compilerSpecificCompileOptions PROJECTNAME ROOT) + target_compile_options(${PROJECTNAME} + PRIVATE "-Wno-unused-parameter" + PRIVATE "-Wno-sign-compare" + PRIVATE "-Wno-strict-aliasing" + PRIVATE "-Wno-unused-variable" + ) + endfunction() + set(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/third_party/CMSIS_5) + + include_directories(${ROOT}/CMSIS/Core/Include) + + add_subdirectory(${ROOT}/CMSIS/DSP/Source EXCLUDE_FROM_ALL) + add_subdirectory(${ROOT}/CMSIS/NN/Source EXCLUDE_FROM_ALL) + + target_include_directories(CMSISDSP INTERFACE ${ROOT}/CMSIS/Core/Include) + target_include_directories(CMSISNN INTERFACE ${ROOT}/CMSIS/Core/Include) + include_directories(third_party/CMSIS_5/CMSIS/Core/Include) +endif() if(HEXAGON6) # Does not work with "-O3" @@ -13,10 +53,7 @@ if(MACE_MICRO_ARM_NONE) add_definitions(-DMACE_MICRO_ARM_NONE) endif() -option(MACE_ENABLE_BFLOAT16 "Whether to enable bfloat16 support" OFF) -option(MACE_MICRO_ENABLE_TESTS "Whether to enable Mace Micro tests" ON) - -if(MACE_ENABLE_BFLOAT16) +if(MACE_MICRO_ENABLE_BFLOAT16) add_definitions(-DMACE_ENABLE_BFLOAT16) endif() @@ -24,15 +61,12 @@ if(MACE_MICRO_NDEBUG) add_definitions(-DMACE_MICRO_NDEBUG) endif() -include(third_party/third_party.cmake) - add_subdirectory(include) add_subdirectory(port) add_subdirectory(base) add_subdirectory(model) add_subdirectory(framework) add_subdirectory(ops) -add_subdirectory(codegen) file(GLOB micro_base_srcs base/*.cc) file(GLOB micro_codegen_models_srcs codegen/models/**/*.cc) @@ -41,15 +75,13 @@ file(GLOB micro_framework_srcs framework/*.cc) file(GLOB micro_models_srcs model/*.cc) file(GLOB micro_ops_nhwc_base_srcs ops/nhwc/base/*.cc) file(GLOB micro_ops_nhwc_srcs ops/nhwc/*.cc) +file(GLOB micro_ops_nhwc_cmsis_nn_srcs ops/nhwc/cmsis_nn/*.cc) file(GLOB micro_ops_srcs ops/*.cc) file(GLOB micro_ops_utils_srcs ops/utils/*.cc) file(GLOB micro_port_srcs port/*.cc) -# To build a single library -add_library(micro +list(APPEND micro_src ${micro_base_srcs} - ${micro_codegen_models_srcs} - ${micro_codegen_engines_srcs} ${micro_framework_srcs} ${micro_models_srcs} ${micro_ops_srcs} @@ -58,22 +90,56 @@ add_library(micro ${micro_ops_utils_srcs} ${micro_port_srcs} ) -target_include_directories(micro PUBLIC ..) + +if(MACE_MICRO_ENABLE_CMSIS) + list(APPEND micro_src ${micro_ops_nhwc_cmsis_nn_srcs}) +endif() + +add_library(micro ${micro_src}) +target_include_directories(micro PUBLIC .. PUBLIC include/public) install(TARGETS micro ARCHIVE DESTINATION lib LIBRARY DESTINATION lib RUNTIME DESTINATION bin ) +install(FILES include/public/micro.h DESTINATION include) + +if(MACE_MICRO_ENABLE_CMSIS) + target_link_libraries(micro PRIVATE CMSISNN) + + install(TARGETS + CMSISNNReshape + CMSISNNBasicMaths + CMSISNNConcatenation + CMSISNNFullyConnected + CMSISNNConvolutions + CMSISNNActivation + CMSISNNPooling + CMSISNNSoftmax + CMSISNNSupport + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib + RUNTIME DESTINATION bin + ) +endif() + +add_subdirectory(codegen) if(HEXAGON OR HEXAGON_STUB) include(cmake/find_hexagon_sdk.cmake) endif() -if(NOT HEXAGON) +if(NOT HEXAGON AND MICRO_MODEL_NAME) + add_subdirectory(third_party/gflags EXCLUDE_FROM_ALL) add_subdirectory(tools) endif() if(MACE_MICRO_ENABLE_TESTS) + add_subdirectory(third_party/googletest EXCLUDE_FROM_ALL) add_subdirectory(test) -endif(MACE_MICRO_ENABLE_TESTS) +endif() + +if(MACE_MICRO_ENABLE_EXAMPLES) + add_subdirectory(examples) +endif() diff --git a/micro/base/logger.cc b/micro/base/logger.cc index 4663e5741ef4458051e4e4260784aaf20cd8c319..005fe7c592694ef18ebe0f54b7b9f9ea79090b5e 100644 --- a/micro/base/logger.cc +++ b/micro/base/logger.cc @@ -30,7 +30,7 @@ const int32_t kInt8ValueBufferLength = 4; const int32_t kFloatValueBufferLength = 21; inline bool IsValidLogLevel(const LogLevel level) { - return level >= CLEAN && level < INVALID_MAX; + return level < INVALID_MAX; } char LogLevelToShortStr(LogLevel level) { diff --git a/micro/base/serialize_type.h b/micro/base/serialize_type.h index 3d26742856d2e54e0b4c22345984f9aeda47ca2d..258b27047895553f0b776d3f1fa687a8293ae479 100644 --- a/micro/base/serialize_type.h +++ b/micro/base/serialize_type.h @@ -18,6 +18,7 @@ #include #include "micro/include/public/micro.h" +#include "micro/include/port/define.h" namespace micro { diff --git a/micro/base/types.h b/micro/base/types.h index 6de264b4119e0ac68080a11df129d17f7b04a364..0f018d6a7348129e02a11bce31423a44540dcfa8 100644 --- a/micro/base/types.h +++ b/micro/base/types.h @@ -52,6 +52,35 @@ MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32); MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16); #endif +struct QuantizeInfo { + float scale; + int32_t zero; +}; + +namespace ops { +namespace eltwise { // for redefine + +enum Type { + SUM = 0, + SUB = 1, + PROD = 2, + DIV = 3, + MIN = 4, + MAX = 5, + NEG = 6, + ABS = 7, + SQR_DIFF = 8, + POW = 9, + EQUAL = 10, + FLOOR_DIV = 11, + CLIP = 12, + SIGN = 13, + NONE = 14, +}; + +} // namespace eltwise +} // namespace ops + } // namespace micro #endif // MICRO_BASE_TYPES_H_ diff --git a/micro/base/utils.cc b/micro/base/utils.cc index 305e461f2411555063d0924fb185e1f3f2c6fcd4..5f8637da11fd1dbdb12593ba2767993c9e386b65 100644 --- a/micro/base/utils.cc +++ b/micro/base/utils.cc @@ -105,5 +105,25 @@ float log(float x) { return ::log(x); } + +template +const T &max(const T &a, const T &b) { + return (a < b) ? b : a; +} + +template +const T &min(const T &a, const T &b) { + return (a < b) ? a : b; +} + +bool ShapeIsEqual(const int32_t *dims0, + const int32_t *dims1, uint32_t dim_size) { + while (dim_size-- > 0) { + if (dims0[dim_size] != dims1[dim_size]) + return false; + } + return true; +} + } // namespace base } // namespace micro diff --git a/micro/base/utils.h b/micro/base/utils.h index 56eb955ebd7670e888527325e0bd5a142a0ade8f..d47394c2de901171f89bacaaa283855043861c64 100644 --- a/micro/base/utils.h +++ b/micro/base/utils.h @@ -26,6 +26,8 @@ uint32_t strlen(const char *str); int32_t strcmp(const char *str1, const char *str2); void memcpy(void *dst, const void *src, uint32_t bytes); int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims); +bool ShapeIsEqual(const int32_t *dims0, + const int32_t *dims1, uint32_t dim_size); float sqrt(float x); int32_t ceil(float f); int32_t floor(float f); diff --git a/micro/cmake/config_gcc_arm.cmake b/micro/cmake/config_gcc_arm.cmake new file mode 100644 index 0000000000000000000000000000000000000000..ef626103eb7626e29039b494835b00d50137f2c8 --- /dev/null +++ b/micro/cmake/config_gcc_arm.cmake @@ -0,0 +1,36 @@ +if(NOT ARM_CPU) + message(FATAL_ERROR "please set ARM_CPU, such as: -DARM_CPU=cortex-m4. We set -mcpu=${ARM_CPU}") +endif() + +add_compile_options("-mcpu=${ARM_CPU};-mthumb") +add_compile_options("-ffunction-sections;-fdata-sections") + +# floating-point ABI +option(MACE_MICRO_ENABLE_HARDFP "Whether to use hard float-point ABI" ON) + +if(MACE_MICRO_ENABLE_HARDFP) + add_compile_options("-mfloat-abi=hard") +else() + add_compile_options("-mfloat-abi=softfp") +endif() + +# FPU +if (ARM_CPU STREQUAL "cortex-m55" ) + add_compile_options("-mfpu=fpv5-d16") + add_link_options("-mfpu=fpv5-d16") +endif() + +if (ARM_CPU STREQUAL "cortex-m33" ) + add_compile_options("-mfpu=fpv5-sp-d16") + add_link_options("-mfpu=fpv5-sp-d16") +endif() + +if (ARM_CPU STREQUAL "cortex-m7" ) + add_compile_options("-mfpu=fpv5-d16") + add_link_options("-mfpu=fpv5-d16") +endif() + +if (ARM_CPU STREQUAL "cortex-m4" ) + add_compile_options("-mfpu=fpv4-sp-d16") + add_link_options("-mfpu=fpv4-sp-d16") +endif() diff --git a/micro/cmake/toolchain/gcc-arm-none-eabi.cmake b/micro/cmake/toolchain/gcc-arm-none-eabi.cmake index a7610ae4f0ffa84340a43e2fffcceb254463bef6..6411be4b89e70be06ed712deafe9ad7f41de0a35 100644 --- a/micro/cmake/toolchain/gcc-arm-none-eabi.cmake +++ b/micro/cmake/toolchain/gcc-arm-none-eabi.cmake @@ -1,15 +1,25 @@ -set(CMAKE_SYSTEM_PROCESSOR arm) - -set(CMAKE_C_COMPILER "${GCC_ARM_ROOT}/arm-none-eabi-gcc") -set(CMAKE_CXX_COMPILER "${GCC_ARM_ROOT}/arm-none-eabi-g++") -set(CMAKE_AR "${GCC_ARM_ROOT}/arm-none-eabi-ar" CACHE FILEPATH "Archiver") -set(CMAKE_LINKER "${GCC_ARM_ROOT}/arm-none-eabi-ld") -set(CMAKE_EXE_LINKER_FLAGS "--specs=nosys.specs" CACHE INTERNAL "") -set(MACE_MICRO_ARM_NONE ON) +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_SYSTEM_PROCESSOR arm) set(CMAKE_FIND_ROOT_PATH "${GCC_ARM_ROOT}") set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +find_program(CMAKE_C_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe) +find_program(CMAKE_CXX_COMPILER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe) +find_program(CMAKE_ASM_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe) +find_program(CMAKE_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) +find_program(CMAKE_CXX_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) +find_program(CMAKE_C_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe) +find_program(CMAKE_LINKER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe) + +find_program(ELF2BIN NAMES arm-none-eabi-objcopy arm-none-eabi-objcopy.exe) + +# Force compiler settings +SET(CMAKE_C_COMPILER_WORKS TRUE) +SET(CMAKE_CXX_COMPILER_WORKS TRUE) + +set(MACE_MICRO_GCC_ARM ON) diff --git a/micro/codegen/CMakeLists.txt b/micro/codegen/CMakeLists.txt index ee75d1a2d566a28e40fc4a86bf7600f8efdd8bdd..392d222f695664cbd8753ed05410c740c509062c 100644 --- a/micro/codegen/CMakeLists.txt +++ b/micro/codegen/CMakeLists.txt @@ -1,29 +1,17 @@ file(GLOB_RECURSE generated_models_srcs models *.cc) -add_library(generated_models - ${generated_models_srcs} -) -target_link_libraries(generated_models - micro_framework - micro_include - micro_model - micro_ops -) - file(GLOB_RECURSE micro_engine_srcs engines micro_engine_factory.cc) -add_library(micro_engine - ${micro_engine_srcs} -) -target_link_libraries(micro_engine - micro_framework - micro_model - micro_ops - generated_models -) - file(GLOB_RECURSE micro_engine_c_srcs engines micro_engine_c_interface.cc) -add_library(micro_engine_c + +# Use ".keep.cc" as a source file when there are no model source files in "models" directory +add_library(models + ${generated_models_srcs} + ${micro_engine_srcs} ${micro_engine_c_srcs} ) -target_link_libraries(micro_engine_c - micro_engine +target_link_libraries(models + micro ) + +install(TARGETS models + ARCHIVE DESTINATION lib +) \ No newline at end of file diff --git a/micro/codegen/engines/.keep.cc b/micro/codegen/engines/.keep.cc new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/micro/codegen/models/.keep.cc b/micro/codegen/models/.keep.cc new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/micro/examples/CMakeLists.txt b/micro/examples/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c5aff86ea6de7fc2a4e13ab371cc7d6604a46009 --- /dev/null +++ b/micro/examples/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(classifier) \ No newline at end of file diff --git a/micro/examples/classifier/.gitignore b/micro/examples/classifier/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..c6a0d80403faea040637a5e661839d4c5c08368e --- /dev/null +++ b/micro/examples/classifier/.gitignore @@ -0,0 +1,6 @@ +mbed-os +BUILD +install +mbed_app.json +__pycache__ +mbed_settings.py \ No newline at end of file diff --git a/micro/examples/classifier/.mbed b/micro/examples/classifier/.mbed new file mode 100644 index 0000000000000000000000000000000000000000..9c0edc955256fc417c5f1d340253bbb08ec16e2e --- /dev/null +++ b/micro/examples/classifier/.mbed @@ -0,0 +1,2 @@ +TARGET=NUCLEO_F767ZI +ROOT=. diff --git a/micro/examples/classifier/CMakeLists.txt b/micro/examples/classifier/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..c900585d7ad304e18455946b061602aa10b5f694 --- /dev/null +++ b/micro/examples/classifier/CMakeLists.txt @@ -0,0 +1,10 @@ + +if(NOT MICRO_MODEL_NAME OR NOT MICRO_DATA_NAME) + message(FATAL_ERROR "MICRO_MODEL_NAME or MICRO_DATA_NAME is undefined") +endif() + +add_executable(${MICRO_MODEL_NAME} main.cc) +target_compile_options(${MICRO_MODEL_NAME} PRIVATE "-Wno-error") +target_link_libraries(${MICRO_MODEL_NAME} micro models) +target_compile_definitions(${MICRO_MODEL_NAME} PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}") +target_compile_definitions(${MICRO_MODEL_NAME} PRIVATE "-DMICRO_DATA_NAME=${MICRO_DATA_NAME}") diff --git a/micro/examples/classifier/data.h b/micro/examples/classifier/data.h new file mode 100644 index 0000000000000000000000000000000000000000..1f4eebcc5902fe13749320e112985dff9257fbb3 --- /dev/null +++ b/micro/examples/classifier/data.h @@ -0,0 +1,38 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_EXAMPLES_CLASSIFIER_DATA_H_ +#define MICRO_EXAMPLES_CLASSIFIER_DATA_H_ + +#include "data/har.h" +#include "data/kws.h" +#include "data/mnist.h" +#include "stdint.h" + +namespace mnist { +const float *input = data_mnist_4; +const int32_t input_dims[4] = {1, 28, 28, 1}; +} // namespace mnist + +namespace har { +const float *input = data_har_standing; +const int32_t input_dims[4] = {1, 90, 3, 1}; +} // namespace har + +namespace kws { +const float *input = data_kws_yes; +const int32_t input_dims[4] = {1, 98, 40, 1}; +} // namespace kws + +#endif // MICRO_EXAMPLES_CLASSIFIER_DATA_H_ diff --git a/micro/examples/classifier/data/har.h b/micro/examples/classifier/data/har.h new file mode 100644 index 0000000000000000000000000000000000000000..515436cb7df3d612dbd2a1ffbb1f820a97ae4957 --- /dev/null +++ b/micro/examples/classifier/data/har.h @@ -0,0 +1,159 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_EXAMPLES_DATA_HAR_H_ +#define MICRO_EXAMPLES_DATA_HAR_H_ + +static float data_har_jogging[270] = { + 5.012288, 11.264028, 0.95342433, -0.6946377, 12.680544, 0.50395286, + 4.903325, 10.882658, -0.08172209, -0.61291564, 18.496431, 3.0237172, + -1.1849703, 12.108489, 7.205164, 1.3756552, -2.4925237, -6.510526, + -0.61291564, 10.56939, 5.706926, -0.50395286, 13.947236, 7.0553403, + -8.430995, 11.413852, 5.134871, 0.95342433, 1.3756552, 1.6480621, + -8.19945, 19.57244, 2.7240696, 1.4165162, 5.7886477, 2.982856, + -1.879608, -2.982856, -0.29964766, -6.1291566, 6.851035, -8.158588, + 5.829509, 18.0061, 8.539958, 6.2789803, 2.982856, 2.9147544, + -1.56634, 8.308413, -1.4573772, 3.5276701, 13.593107, 9.425281, + -2.0294318, -5.706926, -10.18802, 2.7649305, 10.337844, -9.724928, + 3.568531, 13.6748295, 1.5390993, -0.50395286, 3.8681788, 3.718355, + -2.3018389, 1.6889231, 0.08172209, -3.568531, 19.57244, 6.510526, + -0.8036005, -3.2961242, -4.630918, 0.50395286, 10.841797, 13.525005, + 5.706926, 15.595298, 6.1700177, -8.662541, 7.273266, 4.0180025, + -1.334794, 1.2258313, 2.3699405, -4.5900574, 19.57244, 4.7126403, + 3.8681788, 3.759216, 0.84446156, -1.7978859, 1.5390993, 8.730643, + 7.668256, 11.264028, -1.3075534, -2.3699405, 14.2877445, 8.281172, + 2.7240696, 1.4573772, 0.88532263, -3.5957718, 18.659876, -0.6537767, + 3.9499009, 4.140586, 3.990762, 0.46309182, -2.4108016, 2.4108016, + 3.7864566, 14.137921, -3.1463003, 3.336985, 19.231932, 6.5513873, + 5.6660647, 3.7864566, 0.53119355, 0.23154591, 0.7627395, 0.7627395, + -4.8216033, 19.57244, 8.158588, 1.8387469, -1.1168685, -2.7921712, + -3.2961242, 10.079058, 13.824653, 11.604536, 17.079916, 1.334794, + -3.173541, 14.015338, 5.706926, 0.61291564, 1.1168685, 2.5606253, + -7.8861814, 19.57244, 1.9885708, 3.1463003, 5.243834, 4.671779, + -3.0237172, -4.3312707, -3.336985, -0.08172209, 11.917805, -7.8861814, + -1.0351465, 14.818938, 4.6036777, -2.4516625, 2.5333846, 3.486809, + -1.3756552, 2.070293, -0.19068487, -2.4925237, 19.57244, 6.469665, + 1.4573772, -5.243834, -4.372132, -1.4165162, 9.80665, 5.7477865, + -1.2666923, 14.709975, 6.2108784, -3.6774938, 3.173541, 3.7864566, + 1.8387469, 2.7649305, -1.7570249, -1.2666923, 19.313654, 6.3198414, + 2.4108016, -7.6546354, -6.1291566, -0.61291564, 16.358038, 4.944186, + 0.040861044, 17.502148, 2.5333846, -7.6546354, 7.8180795, 4.372132, + -1.2666923, 0.7218784, 0.8036005, -5.012288, 19.57244, 5.5162406, + 1.9477097, 2.7921712, 2.070293, -5.053149, 1.6480621, 7.6273947, + 9.384419, 13.443283, 1.0351465, -5.434519, 13.211738, 6.4424243, + -0.61291564, 1.879608, 1.4165162, 4.7126403, -6.5513873, -6.0201936, + -1.7570249, 9.302697, -6.428804, -0.9125633, 10.501288, -0.27240697, + 2.6014864, 19.381754, 4.440233, 5.7886477, 3.214402, 1.1441092, + -1.9885708, 12.4489975, -2.7240696, 1.4165162, 16.780268, 8.471856, + 0.42223078, -8.267551, -7.3549876, -3.568531, 10.95076, -0.8036005, + -4.671779, 11.727119, 0.38136974, -2.1383946, 1.6889231, 3.5276701, + -1.334794, 2.4925237, -0.3405087, -2.9147544, 19.57244, 7.5865335, + 3.5276701, -3.9499009, -1.920469, -4.0588636, 10.038197, 14.2877445}; + + +static float data_har_walking[270] = { + -0.99, 11.45, -3.0645783, 1.18, 14.94, -3.718355, + 1.27, 13.82, -1.2258313, -0.15, 11.14, -2.1111538, + -1.38, 8.05, -0.84446156, -1.99, 5.94, 0.14982383, + -0.08, 4.94, 0.88532263, -0.27, 4.14, 2.2609777, + -3.26, 6.44, 4.1814466, -5.75, 13.02, 7.273266, + -2.37, 10.65, 8.008764, -0.46, 15.94, 0.7218784, + 1.8, 6.13, -1.1168685, -4.75, 10.84, -3.0645783, + -1.46, 8.39, 0.88532263, 1.33, 7.78, -0.46309182, + -3.72, 8.47, -0.7218784, -3.72, 8.47, -0.7218784, + -1.88, 7.63, -0.08172209, -1.12, 9.3, -0.10896278, + -2.37, 10.95, -0.8036005, -4.06, 12.3, -0.7627395, + -3.41, 14.52, -0.7218784, 0.34, 12.22, -3.7864566, + 0.76, 15.32, -2.6014864, -0.04, 13.53, -1.1849703, + -0.53, 9.72, -2.1792557, 0.11, 5.52, -1.6480621, + 0.38, 4.06, 0.46309182, 0.04, 3.26, 0.14982383, + -3.34, 5.83, 4.862464, -6.05, 13.14, 7.668256, + -0.91, 11.14, 11.073342, -0.5, 16.13, -0.9125633, + -0.27, 7.7, -1.1849703, -3.45, 9.28, -2.1383946, + -2.03, 9.04, -0.53119355, 2.03, 6.89, -0.5720546, + -2.18, 7.5, -1.3756552, -1.8, 7.21, -0.0, + -1.57, 9.96, 0.08172209, -3.21, 12.07, -0.14982383, + -5.09, 12.22, -0.7627395, -2.68, 14.98, -3.173541, + 1.99, 12.79, -3.2961242, 0.84, 14.82, -2.2609777, + 0.69, 13.21, -2.2609777, -1.08, 9.15, -1.2258313, + -0.95, 4.9, -0.7627395, -0.11, 4.67, 0.19068487, + 0.61, 3.49, 0.08172209, -1.84, 5.48, 5.134871, + -5.6, 14.06, 7.3958488, -1.08, 12.03, 8.308413, + 1.73, 14.56, 2.9147544, -0.76, 5.94, -5.325556, + -5.6, 12.83, -0.0, 0.04, 6.66, -0.9942854, + 1.65, 7.89, -0.6537767, -2.3, 7.93, -2.3426998, + -1.92, 8.24, -0.040861044, -1.42, 9.96, -0.14982383, + -3.72, 11.5, 0.14982383, -4.59, 12.18, -0.5720546, + -2.79, 14.25, -3.2961242, 3.15, 13.02, -3.1054392, + 1.46, 14.94, -2.2201166, -2.22, 12.49, -2.1111538, + -1.42, 9.53, -1.607201, -0.11, 6.17, -0.8036005, + 0.34, 4.71, 0.10896278, 1.04, 3.49, 0.53119355, + -1.99, 5.05, 3.255263, -6.66, 14.29, 7.082581, + -3.87, 10.04, 9.765789, -1.5, 18.39, -0.6946377, + 2.37, 5.01, -0.5720546, -5.24, 10.76, -3.173541, + -1.46, 8.2, 0.53119355, 2.6, 6.97, -0.040861044, + -3.53, 8.85, -1.879608, -1.23, 7.06, -0.23154591, + -1.53, 11.3, 0.23154591, -2.53, 11.65, -0.6946377, + -3.83, 12.34, -0.50395286, -2.96, 13.25, -3.173541, + 2.83, 13.25, -3.173541, 0.65, 14.41, -1.1441092, + -0.89, 11.8, -2.6014864, -1.18, 7.21, -1.334794}; + +static float data_har_standing[270] = { + 3.17, 9.28, 1.1441092, 3.3, 9.23, 1.1168685, + 3.21, 9.3, 1.1849703, 3.17, 9.28, 1.0760075, + 3.17, 9.34, 1.1168685, 3.26, 9.28, 1.1168685, + 3.21, 9.3, 1.1168685, 3.21, 9.23, 1.1168685, + 3.17, 9.28, 1.1168685, 3.15, 9.28, 1.1849703, + 3.17, 9.34, 1.1168685, 3.21, 9.28, 1.1849703, + 3.21, 9.3, 1.0760075, 3.15, 9.34, 1.1168685, + 3.21, 9.28, 1.0760075, 3.21, 9.34, 1.1441092, + 3.26, 9.3, 1.1441092, 3.17, 9.34, 1.1168685, + 3.21, 9.3, 1.1168685, 3.21, 9.28, 1.1168685, + 3.26, 9.28, 1.1849703, 3.17, 9.3, 1.1168685, + 3.21, 9.28, 1.1168685, -1.88, 9.85, -0.23154591, + -0.19, 9.92, -0.5720546, -0.61, 10.27, -0.88532263, + 0.76, 10.57, -1.7570249, 0.42, 9.47, -1.1168685, + 0.38, 9.47, -1.9477097, -1.04, 10.65, -1.525479, + -1.92, 9.51, -0.5720546, -1.31, 9.85, -0.53119355, + -0.08, 9.92, -1.7570249, 1.73, 9.77, -0.8036005, + 1.5, 9.92, -1.4573772, 1.27, 10.5, -1.879608, + 0.61, 10.12, -1.9885708, -0.53, 9.77, -1.879608, + -0.42, 9.62, -1.6480621, 0.65, 10.42, -2.2201166, + 0.65, 10.42, -2.2201166, 1.61, 9.38, -1.8387469, + 1.61, 9.43, -1.525479, 1.61, 9.43, -1.525479, + 0.95, 10.27, -1.3075534, 0.19, 10.38, -1.1849703, + 0.31, 9.81, -1.4165162, 1.12, 9.62, -1.6889231, + 1.23, 9.85, -1.6480621, 1.04, 9.7, -1.8387469, + 0.57, 9.89, -2.0294318, 0.65, 9.96, -1.9885708, + 0.95, 9.96, -1.7570249, 1.42, 10, -1.7297841, + 1.69, 9.89, -1.525479, 1.46, 10, -1.4165162, + 0.69, 9.77, -1.6889231, 0.08, 9.96, -1.9477097, + -0.08, 10.19, -2.1111538, 0.38, 9.72, -1.9885708, + 0.93, 10.12, -2.1111538, 1.33, 9.62, -1.9885708, + 1.08, 9.85, -1.9477097, 0.8, 9.77, -1.7570249, + 0.69, 10.34, -1.6889231, 0.72, 9.66, -1.3075534, + 0.69, 10, -1.3756552, 0.93, 9.62, -1.4573772, + 0.76, 10.12, -1.607201, 0.93, 9.72, -1.7978859, + 0.76, 10.23, -1.9885708, 0.76, 9.23, -1.920469, + 0.57, 10.34, -2.1383946, 0.99, 9.58, -1.879608, + 1.33, 10.04, -1.7978859, 1.61, 9.85, -1.4165162, + 0.61, 10.15, -0.88532263, 0.53, 9.58, -1.4573772, + 0.15, 10.19, -1.920469, 0.34, 9.85, -1.334794, + 0.8, 10.31, -1.7978859, 0.69, 9.53, -1.9477097, + 0.8, 9.92, -1.879608, 0.5, 10.04, -1.1849703, + 1.12, 9.43, -1.7978859, 1.31, 10.27, -1.2666923, + 1.5, 9.77, -1.607201, 0.46, 10.04, -0.9125633, + 0.31, 9.85, -1.0760075, 0.61, 10.19, -1.1849703}; + +#endif // MICRO_EXAMPLES_DATA_HAR_H_ diff --git a/micro/examples/classifier/data/kws.h b/micro/examples/classifier/data/kws.h new file mode 100644 index 0000000000000000000000000000000000000000..c64a3a8b8c657829a41b80a4e94fd9767551f586 --- /dev/null +++ b/micro/examples/classifier/data/kws.h @@ -0,0 +1,122 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_EXAMPLES_DATA_KWS_H_ +#define MICRO_EXAMPLES_DATA_KWS_H_ + +// speech_commands_dataset/google_speech_commands/splitted_data/test/yes/1b4c9b89_nohash_1.wav +// clang-format off +static float data_kws_yes[98 * 40] = { +-44.89, -1.35, 0.99, 0.13, -0.37, -0.9 , -1.43, -0.4 , -0.35, -0.64, 0.33, 0.17, -0.19, 0.23, -0.3 , -0.33, -0.17, 0.34, 0.6 , -0.03, -0.74, 0.39, -0.07, -0.05, 0.03, 0.3 , -0.39, -0.43, -0.33, -0.49, -0.1 , -0.31, -0.73, -0.28, -0.34, -0.4 , -0.54, -0.06, 0.19, 0.24, +-45.04, -1.31, 0.75, 0.35, 0.07, -0.86, -1.23, 0.39, -0.71, -0.42, 0.64, 0.4 , 0.45, 0.44, 0.5 , 0.36, -0.47, -0.63, -0.02, 0.18, -0.44, 0.78, 0.28, -0.13, 0.14, 0.11, -0.14, -0.09, -0.25, -0.08, 0.02, 0.15, -0.22, 0.29, -0.19, -0.39, -0.26, -0.06, 0.09, -0.14, +-44.73, -0.97, 1.66, -0.16, -0.5 , -1.7 , -1.91, 0.2 , -0.92, -0.64, 0.09, 0.66, -0.36, -0.12, 0.65, 0.01, -0.55, -0.78, -0.7 , -0.45, -0.41, -0.04, 0.83, 0.05, 0.32, 0.16, -0.61, -0.28, -0.87, -0.55, -0.23, 0.26, -0.14, 0.11, -0.49, -0.7 , -0.75, -0.47, 0.22, 0.13, +-44.85, -0.92, 0.63, -0.89, -1.39, -1.89, -1.43, 0.38, 0.06, -0.01, 0.06, 0.67, -0.31, -0.12, 0.86, 0.32, 0.12, -0.48, -1.72, -1.9 , -1.21, -0.01, 0.57, 0.31, 0.41, 0.34, -0.38, -0.26, -1.1 , -0.02, 0.15, 0.29, 0.43, 0.3 , 0.05, -0.07, -0.63, -0.01, -0.32, -0.12, +-44.73, -0.27, 1.15, -1.17, -1.25, -2.07, -1.58, 0.33, 0.73, 0.34, -0.21, -0.14, 0.28, 0.2 , -0.33, 0.72, 0.22, -0.97, -1.08, -1.59, -1.26, 0.45, 0.35, 0.74, 1.27, 1.11, -0.04, -1.31, -1. , 0.23, 0.25, 0.73, 0.15, 0.15, 0.13, -0.51, -0.78, -0.11, -0.18, 0.17, +-44.49, -0.47, 1.83, 0.01, -1.43, -2.24, -2.64, -0.45, 0.95, 0.53, -0.76, -0.85, 0.08, 0.19, -0.75, -0.6 , -0.31, -1.21, -1.72, -1.67, -1.37, 0.55, 0.38, 1.28, 1.35, 0.17, -0.56, -0.84, -1.34, -0.13, 0.42, 1.16, -0.31, -0.02, 0.82, -0.12, 0.31, 0.43, -0.39, -0.05, +-44.51, 0.27, 1.46, -1.51, -1.65, -1.17, -1.26, -0.55, 0.53, 0.75, 1.09, 0.27, 0.24, 0.72, -0.84, -1.15, -0.93, -1.85, -1.79, -1.52, -1.79, -1.2 , -0.15, 1.32, 0.82, 0.49, -0.39, -0.19, -0.83, -0.39, 0.04, 0.53, -0.17, -0.06, 0.85, -0.21, -0.57, -0.39, -0.35, 0.09, +-44.21, -0.23, 0.99, -1.6 , -2.04, -1.24, -0.91, -0.59, -0.62, 0.01, 0.88, 0.58, 0.77, 0.55, 0.09, -0.27, -0.54, -0.74, -1. , -0.99, -0.94, -0.7 , 0.41, 1.46, 0.97, -0.46, -1.12, -0.44, -0.1 , 0.15, 0.56, 0.9 , -0.27, -0.46, 0.24, -0.15, -0.37, 0.05, 0.1 , 0.3 , +-43.8 , 0.31, 1.09, -1. , -1.23, -0.9 , -0.67, -1.09, -0.6 , 0.04, 1.09, 0.57, 0.97, 0.78, 0.46, -0.21, -1.13, -0.11, -0.62, -0.52, -1. , -1.51, 0.4 , 0.91, 0.47, -0.2 , -0.2 , -0.29, -0.55, 0.48, -0.09, 0.5 , -0.21, -0.16, 0.36, -0.15, -0.07, -0.31, -0.46, 0.26, +-42.98, 0.38, 1.25, -0.87, -0.74, -0.48, -1.4 , -0.95, -2.06, 0.06, 1.74, 0.45, 1.07, 0.93, 0.6 , 0.44, -0.81, -0.76, -0.14, -0.37, -0.33, -0.79, -0.08, 0.42, 0.64, -0.42, -0.49, -0.83, -0.97, 0.34, 0.18, 0.18, -0.25, 0.15, 0.36, -0.21, -0.31, -0.02, -0.66, 0.14, +-43.47, -0.22, 0.98, -0.73, -1.34, -0.63, -1.71, -2.36, -2.27, -0.22, 0.69, -0.45, -0.44, -0.93, 0.31, 0.84, -0.36, -1.56, -1.77, -1.12, -0.7 , -1.69, -0.96, 0.19, 0.22, -0.62, -0.77, -0.48, -0.49, 0.74, 0.34, 0.33, -0.13, 0.19, 0.54, -0.38, -0.09, -0.08, -0.7 , -0.26, +-40.58, 2.42, 3.86, 1.07, -0.08, 1.06, -0.27, -1.88, -1.96, -0.06, 0.87, -0.88, -1.11, -0.86, -0.72, 0.44, -0.7 , -1.09, -0.63, 0.26, 0.56, -0.48, -0.08, 0.18, 0.81, 0.84, -0.72, 0.08, 0.15, 0.3 , -0.41, -0.13, -0.47, -0.22, -0.66, -0.95, 0.36, 0.47, 0.35, -0.16, +-30.63, 5.5 , 7.67, 1.49, 1.11, 0.08, -2.62, -2.58, -1.61, 0.04, -0.34, -2.59, -1.08, -0.79, -1.11, 0.12, -2.28, -0.39, -0.02, 0.43, 0.3 , -0.08, -0.97, 0.45, -0.27, 0.46, -1.06, -0.06, -0.01, -0.16, -0.32, 0.18, -0.06, -0.49, 0.23, -0.46, -0.17, 0.27, -0.26, -0.21, +-23.7 , 5.92, 8.26, 1.79, -1.58, 0.32, -3.54, -2.29, -1.89, -1.11, -0.6 , -3.36, -0.73, -0.66, -1.49, -1.37, -2.87, -0.23, -1.15, 0.18, 0.13, -0.27, -0.42, 1.24, 0.26, -0.04, -0.9 , -0.26, -0.4 , -0.34, 0.17, -0.08, 0.14, -0.3 , 0.14, -0.1 , 0.5 , 0.09, -0.15, 0.21, +-20.41, 5.76, 8.87, 2.16, -3.84, 0.8 , -3.07, -0.3 , -1.21, -1.29, 0.39, -1.81, 0.52, -0.46, -1.18, -1.07, -2.39, -1.82, -1.47, -0.07, -1.03, -0.29, 0.27, 1.35, 0.68, 1.06, -0.28, 0.38, -0.71, -0.51, 0.33, 0.18, 0.62, 0.31, -0.09, -0.13, 0.3 , -0. , 0.35, 0.29, +-16.62, 5.57, 7.72, 3.08, -5.01, 1.33, -3.02, 0.9 , -1.33, -1.08, 1.04, -1.93, 1.08, -0.5 , -1.92, -1.18, -1.59, -1.83, -1.65, -0.77, -2.08, -0.32, -0.21, 1.61, 1.74, 1.41, 0.39, 0.69, -0.43, -0.58, -0.13, -0.56, 0.52, -0.24, -0.31, 0.19, 0.06, 0.01, 0.45, 0.13, +-14. , 4.68, 6.43, 4.07, -6.68, 1.08, -2.65, 0.83, -1.69, -0.75, 1.04, -2.29, 1.32, -0.69, -1.83, -2.7 , -1.66, -1.77, -1.68, -1.04, -2.47, -0.43, 0.68, 2.06, 1.64, 1.44, 0.36, 0.29, -0.74, -0.89, -0.57, -0.68, 0.97, -0.11, -0.31, 0.13, -0.17, -0.03, 0.39, 0.19, +-12.78, 4.39, 5.95, 4.88, -7.19, 0.37, -2.62, 0.29, -2.01, -0.74, -0.1 , -2.34, 1.3 , -0.49, -1.01, -3.36, -1.41, -1.35, -1.55, -1.82, -2.38, -0.87, -0.02, 1.38, 1. , 1.32, 0.36, 0.9 , -0.71, -1.36, -0.61, -0.69, 1.31, 0.41, -0.29, 0.23, 0.34, -0.08, 0.16, -0.08, +-10.62, 4.2 , 5.31, 5.65, -7.15, 0.33, -2.05, -0.71, -1.05, 0.26, -0.8 , -1.84, 1.35, -0.42, -0.63, -3.38, -1.35, -1.8 , -1.98, -1.5 , -1.59, -0.8 , -0.3 , 1.52, 0.94, 1.88, 1.04, 0.93, -0.86, -1.03, -0.47, -0.17, 0.56, 0.36, -0.26, 0.05, -0.04, 0.07, 0.35, 0.46, + -8.94, 4.54, 5.63, 7.13, -7.9 , -0.35, -2.27, -1.07, -0.78, 0.02, -0.47, -1.27, 1.6 , -0.48, -1.44, -2.38, -0.83, -1.54, -2.21, -1.45, -0.91, -0.68, -0.79, 1.89, 0.51, 1.84, 0.99, 0.96, -0.37, -1.47, -0.82, -0.12, 1.05, 0.83, 0.09, -0.24, -0.58, -0.02, 0.3 , 0.48, + -6.8 , 4.53, 3.75, 6.32, -7.55, -0.35, -1.42, -0.74, -1.47, -0.16, 0.54, -0.79, 1.39, -0.75, -1.5 , -2.04, -1.34, -2.2 , -2.79, -2.04, -0.45, -0.65, -0.67, 1.85, 0.89, 2.39, 0.59, 1.09, -0.39, -0.83, -0.86, -0.81, 0.7 , 0.73, 0.16, -0.25, -0.35, -0.19, 0.44, 0.88, + -4.24, 4.77, 2.41, 6.13, -6.42, -1.08, -1.41, -1.1 , -2.56, -0.02, 0.69, -0.61, 1.32, -1.21, -1.47, -1.54, -1.07, -2.19, -2.35, -2.05, -0.76, -0.77, -1.07, 1.56, 1.4 , 2.26, 0.6 , 0.68, -0.48, -0.82, -1.11, -0.66, 1.04, 1.01, 0.05, -0.23, -0.35, -0.5 , 0.76, 0.31, + -1.88, 5.5 , 1.56, 6.08, -6.31, -2.14, -1.35, -0.75, -2.83, -0.15, 1.07, -0.55, 0.9 , -1.4 , -1.82, -1.17, -0.95, -1.98, -1.78, -1.33, -0.67, -0.65, -0.92, 1.67, 0.78, 1.97, 0.54, 0.72, -0.34, -1.04, -1.15, -0.23, 1.02, 1.03, 0.03, -0.23, -0.74, -0.21, 1.14, 0.62, + -0.88, 5.98, -0.15, 5.6 , -6.56, -1.91, -0.83, 0.26, -2.67, -0.06, 0.96, -1.09, 0.58, -1.83, -1.23, -0.92, -1.18, -1.99, -2.38, -1.48, -0.99, -0.5 , -0.89, 2.1 , 0.87, 1.91, 0.32, 0.9 , -0.55, -1.26, -1.4 , -0.49, 1.11, 0.86, -0.06, -0.2 , -0.23, 0.26, 1.12, 0.59, + -0.15, 6.05, -0.3 , 5.39, -6.8 , -2.08, -1.67, 0.4 , -2.63, -0.37, 0.6 , -1.5 , 0.29, -1.28, -1.11, -1.3 , -1.38, -1.44, -1.92, -1.53, -1.41, -0.55, -1.36, 1.53, 0.76, 1.71, 0.36, 0.84, -0.51, -1.54, -0.65, -0.34, 0.98, 0.93, -0.18, -0.13, -0.61, 0.17, 0.87, 0.58, + 0.64, 5.24, -0.64, 5.1 , -6.04, -1.78, -1.56, 1.3 , -2.35, -0.45, 0.19, -0.76, -0.23, -1.09, -0.86, -1.3 , -1.3 , -1.55, -1.7 , -2.49, -1.38, -0.5 , -1.44, 1.99, 0.97, 2.6 , 0.68, 0.77, -0.15, -1.59, -0.94, -0.74, 1.06, 1.44, -0.19, -0.18, -0.69, -0.32, 0.98, 0.59, + 1.19, 5.41, -0.69, 4.08, -5.65, -1.63, -1.23, 1.41, -2.04, -0.45, -0.05, -1. , -0.11, -0.79, -1.08, -1.3 , -0.56, -2.12, -1.2 , -2.42, -1.41, -0.43, -1.54, 1.42, 1.14, 2.36, 0.68, 1.09, -0.27, -1.45, -1.05, -0.95, 0.92, 0.96, 0.57, 0.02, -0.77, -0.26, 1.12, 0.38, + 2.17, 4.88, -1.04, 3.79, -6.26, -2.31, -1.15, 1.32, -1.94, -0.29, 0.76, -0.93, -0.51, -1.1 , -1.28, -1.85, -1.32, -2.07, -1.36, -2.01, -1.47, -0.48, -1.57, 1.11, 0.87, 2.5 , 0.71, 0.83, 0.28, -1.13, -0.99, -0.49, 0.74, 0.84, 0.02, -0.2 , -1.13, 0.03, 1.01, 0.64, + 1.98, 4.9 , -1.71, 3.65, -5.56, -2.68, -1.5 , 1.12, -1.38, -1.03, 0.6 , -1.15, -0.61, -1.13, -1.25, -1.63, -1.6 , -1.8 , -1.47, -2.31, -2.04, -0.5 , -1.35, 0.81, 0.88, 2.1 , 1.12, 0.92, 0.15, -1.2 , -1.12, -0.66, 0.65, 1.01, 0.06, -0.2 , -1.25, -0.27, 0.92, 0.4 , + 2.19, 4.83, -1.07, 2.77, -4.97, -1.63, -1.74, 1.72, -1.67, -0.47, 0.63, -1.42, -0.1 , -1.33, -0.97, -0.66, -1.4 , -1.96, -1.17, -1.85, -1.9 , -1.15, -1.37, 0.94, 1.01, 2.09, 0.95, 1.45, 0.1 , -1.2 , -1.64, -0.77, 1.13, 0.99, 0.45, 0.09, -1.16, -0.32, 0.67, 0.66, + 2.58, 5.07, -1.55, 2.55, -4.88, -1.43, -2.32, 1.79, -1.93, -0.57, 0.32, -1.2 , -0.34, -1.87, -0.56, -1.15, -1.31, -1.64, -0.93, -2.37, -1.65, -0.76, -1.89, 0.91, 0.57, 2.22, 0.91, 1.65, -0.31, -1.22, -1.38, -0.58, 0.6 , 1.1 , 0.72, -0.04, -0.89, -0.26, 0.25, 0.79, + 3.32, 4.62, -1.6 , 3. , -4.52, -1.64, -2.64, 3.01, -2.01, -0.43, 0.46, -1.22, 0.09, -1.76, -0.69, -1.15, -1.29, -1.39, -1.04, -2.32, -1.54, -0.72, -1.85, 1. , 0.57, 2.53, 1.08, 1.6 , -0.25, -0.95, -1.45, -0.42, 0.76, 1.14, 0.49, 0.14, -1.17, -0.46, 0.35, 0.85, + 2.17, 4.74, -2.02, 3.29, -4.59, -1.09, -2.04, 2.6 , -1.93, -1.05, -0.05, -1.57, 0.34, -1.73, -0.59, -0.88, -0.79, -1.49, -1.23, -2.2 , -1.9 , -1.02, -1.75, 1.32, 0.78, 2.53, 1.26, 1.44, -0.35, -1.26, -1.81, -0.49, 0.63, 1.27, 0.95, 0.63, -1.03, -0.83, 0.15, 0.55, + 2.5 , 5.01, -2.89, 2.97, -4.66, -1.86, -2.03, 2.68, -2.41, -0.83, -0.19, -1.84, 0.09, -1.78, -0.75, -0.84, -1.12, -1.59, -1.25, -2.08, -2.03, -1.25, -1.51, 0.94, 0.39, 2.29, 0.69, 1.33, -0.28, -1.23, -1.4 , -0.64, 0.65, 1.08, 0.5 , 0.54, -0.74, -0.55, 0.43, 0.41, + 1.96, 5.42, -2.5 , 3. , -4.28, -1.9 , -2.37, 3. , -2.23, -0.47, -0.05, -1.68, 0.48, -1.57, -0.35, -0.86, -1.21, -1.26, -1.36, -1.98, -1.9 , -1.3 , -1.44, 0.83, 0.27, 2.21, 0.83, 1.54, 0.18, -1.56, -1.28, -0.52, 0.31, 1.44, 0.63, 0.36, -1.07, -0.45, 0.22, 0.68, + 0.91, 5.38, -2.65, 3.5 , -4.2 , -1.86, -2.06, 2.66, -2. , -0.69, -0.14, -2. , 0.42, -1.94, -0.44, -0.56, -1.19, -1.42, -1.29, -2.33, -1.49, -1.46, -1.28, 0.72, 0.52, 2.33, 1.39, 1.19, -0.15, -1.89, -1.54, -0.86, 0.83, 1.25, 0.96, 0.16, -0.75, -0.92, 0.6 , 0.67, + 1.25, 5.63, -3.01, 3.37, -4.19, -1.91, -2.51, 3.2 , -2.1 , -0.5 , 0.24, -1.75, 0.7 , -2.27, -0.58, -0.76, -1.67, -1.58, -1.44, -2.11, -1.6 , -1.49, -0.86, 0.75, 0.82, 2.09, 1.5 , 1.16, 0.43, -1.53, -1.36, -0.76, 0.9 , 0.73, 0.54, -0.06, -0.67, -0.96, 0.39, 0.7 , + 1.07, 5.51, -2.91, 3.37, -4.23, -1.75, -2.61, 3.29, -1.91, -0.4 , -0.05, -1.49, 0.19, -1.5 , -0.79, -0.89, -1.47, -1.43, -1.15, -2.14, -1.65, -0.98, -0.99, 0.56, 1.1 , 2.07, 1.2 , 1.17, 0.21, -1.24, -1.72, -0.46, 1.15, 0.89, 0.77, -0.42, -0.71, -0.37, 0.38, 0.87, + 0.04, 5.62, -2.34, 3.14, -3.98, -1.39, -2.56, 2.89, -1.67, -0.12, -0.36, -1. , -0.02, -1.47, -0.59, -1.2 , -1.39, -1.31, -0.91, -2.12, -1.51, -0.73, -0.35, 0.15, 1.21, 2.52, 0.94, 1.14, -0.3 , -1.2 , -1.4 , -0.63, 1.07, 1.19, 0.77, -0.44, -0.57, -0.25, 0.32, 1.09, + -0.39, 5.5 , -2.03, 3.28, -4.07, -1.63, -2.96, 2.6 , -2.11, -0.14, -0.33, -0.94, 0.45, -1.84, -0.8 , -1.26, -1.25, -1.62, -0.85, -2. , -1.81, -0.82, -0.46, 0.43, 1.48, 2.59, 0.65, 0.89, -0.7 , -0.97, -1.33, -0.19, 1.27, 0.85, 0.6 , -1.06, -0.68, 0.04, 0.94, 0.7 , + -1.24, 5.17, -2.24, 2.95, -4.22, -2.11, -3.4 , 3.09, -2.63, -0.24, -0.24, -1.12, -0.14, -2.24, -1.01, -0.96, -1.37, -2.17, -1.25, -1.8 , -2.02, -0.91, -0.49, 0.25, 1.23, 2.12, 0.44, 1.25, -0.76, -1.08, -1.61, -0.18, 1.03, 0.57, 0.15, -0.96, -0.2 , -0.02, 0.77, 0.52, + -3.35, 5.05, -2.34, 2.27, -3.37, -2.33, -3.07, 3.44, -2.09, -0.32, -0.52, -1.26, 0.46, -2.71, -1.45, -1.35, -1.97, -2.73, -1.42, -2.33, -1.94, -0.6 , -0.28, 0.55, 1.75, 2.77, 0.53, 1.33, -1.12, -1.23, -1.59, -0.23, 0.68, 0.44, -0.12, -0.9 , -0.27, -0.27, 1.24, 0.53, + -3.5 , 6.71, -1.67, 2.93, -2.73, -2.89, -2.96, 3.07, -1.77, -0.12, -0.86, -0.33, 0.95, -2.41, -1.35, -1.14, -1.6 , -1.74, -0.98, -1.92, -1.45, -0.34, 0.25, 0.47, 1.15, 2.29, 0.37, 1.1 , -0.95, -0.76, -0.94, 0.13, 0.45, 1.02, -0.11, -0.99, -0.49, -0.45, 1.16, 0.74, + -5.66, 7.54, -1.41, 3.07, -1.83, -2.61, -2.84, 2.33, -1.98, 0.15, -1.03, -0.41, 0.95, -2.4 , -0.69, -0.76, -1.62, -1.87, -0.9 , -1.47, -1.33, -0.57, 0.7 , 0.67, 1.35, 2.02, 0.86, 0.38, -1.3 , -0.87, -0.88, 0.37, 0.59, 1.28, -0.11, -0.95, -0.18, 0.3 , 0.85, 0.53, + -7.53, 7.39, -0.67, 3.04, -1.46, -2.83, -3.09, 1.52, -1.42, 0.02, -1.26, -0.99, 0.93, -2.49, -0.16, -0.29, -1.47, -2.18, -1.73, -1.07, -1.36, -0.22, 0.77, 0.43, 1.44, 1.94, 1.16, 0.14, -1.47, -0.99, -0.59, 0.54, 1.07, 1.21, -0.4 , -0.64, -0.01, 0.32, 0.73, 0.15, +-10.08, 7.08, -0.71, 2.43, -1.03, -2.69, -2.64, 0.03, -1.3 , -0.27, -1.21, -1.01, 1.28, -2.91, -0.93, -0.78, -2.06, -2.51, -2.01, -1.02, -2.41, -0.37, 1.32, 1.43, 2.13, 1.45, 1.08, -0.36, -1.63, -0.95, -0.92, 0.81, 1.11, 1.02, -0.85, -1.14, -0.32, 0.55, 0.41, 0.1 , +-11.46, 7.97, -0.12, 2.17, -0.8 , -2.74, -2.68, -0.41, -1.98, -0.41, -1.52, -0.68, 0.81, -2.63, -0.82, -0.73, -2.02, -2.58, -2.03, -0.69, -2.06, -0.19, 0.66, 1.55, 2.28, 0.61, 0.56, 0.07, -1.65, -1.21, -0.27, 0.84, 1.13, 0.58, -1. , -0.52, 0.06, 0.01, 0.64, 0.18, +-14.22, 8.09, 1.04, 2.73, -1.06, -2.99, -2.39, -1.12, -1.26, -0.38, -1.56, -0.65, 0.9 , -1.64, -1.29, -0.95, -1.68, -2.26, -2.29, -0.47, -1.26, 0.06, 0.66, 1.02, 2.14, 0.07, 0.71, 0.29, -1.62, -0.62, 0.12, 1.17, 0.21, 0.02, 0.29, -0.32, -0.22, -0.01, 0.55, -0.24, +-18.32, 8.04, 1.98, 2.64, -0.5 , -3.72, -3.02, -0.6 , 0.05, -0.55, -1.85, -0.28, 0.59, -1.54, -1.64, -0.8 , -1.73, -2.41, -2.67, -0.17, -1.2 , 0.58, 0.99, 0.2 , 2.5 , 0.4 , 0.23, 0.47, -1.37, -0.28, -0.37, 1. , 0.3 , -0.58, 0.2 , -0.45, 0.39, 0.18, -0.09, -0.23, +-18.67, 6.73, 2.71, 2.7 , 0.38, -3.07, -2.17, -1.72, -0.77, -1.27, -2.74, -0.34, 0.44, -1.43, -1.75, -0.69, -1.45, -1.71, -2.99, 0.29, -0.85, 1.5 , 1.15, -0.35, 1.58, 0.56, -0.05, -0.29, -0.2 , -0.35, -0.13, 0.1 , -0.1 , -0.71, 0.59, -0.21, 0.04, 0.42, -0.65, 0.38, +-18.72, 4.44, 3.88, 1.9 , 1.14, -3.24, -0.78, -1.4 , 0.31, -0.61, -2.81, -0.9 , 0.09, -1.28, -1.59, -0.75, -1.69, -2.11, -2.89, 0.23, 0.66, 1.43, 1.24, -0.74, 0.47, -0.31, -0.35, 0.14, 0.46, -0.71, 0.56, 0.6 , -0.38, -0.13, 0.03, -0.57, 0.33, 0.66, -0.37, -0.27, +-17.44, 0.95, 4.47, 1.02, 0.62, -3.11, -0.17, -0.38, 0.77, -0.7 , -1.87, -1.16, 0.32, -0.48, -0.65, -0.74, -1.64, -2. , -2.01, -0.53, 0.82, 1.41, 1.24, -0.05, 0.4 , -0.73, -0.52, 0.6 , 1.09, -1.05, 0.55, 0.72, -1.46, -0.09, 0.01, -0.98, 0.15, 0.72, -0. , -0.19, +-17.43, -2.28, 4.38, -0.31, -0.37, -3.6 , -0.38, -0.42, 0.61, -0.68, -0.74, -1.15, -0.21, -0.74, -0.42, -0.9 , -1.24, -1.88, -1.27, 0.37, 0.67, 1.26, 0.97, 1.21, 0.25, -1.04, -0.91, 0.12, 1.19, -1.18, 0.68, 0.57, -1.06, -0.21, 0.44, -0.21, 0.34, 0.17, -0.25, -0.26, +-19.1 , -3.72, 4.63, -0.79, -1.24, -4.14, -0.5 , -0.2 , 0.72, -0.82, -0.43, -1.13, -0.84, -1.15, 0.55, -0.77, -1.47, -0.72, -0.74, 0.79, 0.35, 0.78, 0.76, 0.26, -0.44, -0.96, -0.28, 0.14, 0.47, -0.84, -0.28, -0.38, -0.19, 0.08, 0.21, 0.21, 0.37, -0.29, -0.01, 0.07, +-20.31, -5.44, 4.49, -0.96, -1.03, -4.17, -1.61, -0.61, -0.1 , -1.47, -0.29, 0.02, -0.13, -0.46, 0.04, -1.26, -1.09, -0.6 , -0.61, 0.6 , -0.54, -0.26, -0.19, 0.28, 0.29, -0.85, -0.5 , -0.17, 1.03, -0.33, -0.33, 0.12, 0.42, 0.1 , -0.48, -0.17, -0.35, 0.57, 0.22, -0.03, +-19.91, -6.34, 4.5 , -2.06, -0.3 , -3.67, -1.05, -1.14, 0.06, -0.68, -0.36, 0.71, 0.12, -0.61, -0.71, -1.84, -0.42, -0.63, -0.68, 0.49, 0.84, 0.54, 0.04, 0.13, -0.04, -0.76, 0.2 , -0.89, 0.42, -0.31, -0.89, -0.03, -0.39, -0.09, -0.92, -0.41, -0.26, 0.71, 0.79, 0.02, +-17.95, -6.59, 4.74, -2.04, 0.27, -2.92, -0.8 , -1.45, -0.08, -0.98, -0.09, 0.88, -0.75, -0.64, -0.66, -1.12, -0.82, -1.15, -1.1 , -0.64, 0.82, 0.84, -0.17, -0.24, -0.38, 0.17, 0.64, -0.39, 0.43, -0.01, -0.79, -0.04, 0.11, -0.01, -0.87, -0.45, -0.53, 0.5 , 0.25, -0.74, +-18.89, -7.41, 4.77, -3.02, 0.42, -2.94, -0.14, -0.72, -0.7 , -2.04, -0.74, 0.59, -0.09, 0.08, -0.89, -1.64, -0.94, -1.08, -0.16, -0.44, 1.24, 0.59, 0.63, 0.21, -0.44, -0.16, 0.53, 0.61, 0.79, 0.32, -0.54, 0.36, -0.22, -0.22, -0.78, -0.16, 0.21, 0.42, 0.21, -0.34, +-20.96, -7.89, 4.76, -3.29, -0.23, -2.75, 0.29, -0.91, -0.25, -1.55, -0.07, 0.08, -0.36, -0. , -1.35, -1.89, -1.29, -1.01, -0.62, -0.76, 0.4 , -0.12, 0.4 , 0.34, -0.87, -1.51, -0.22, 0.5 , -0.14, 0.01, 0.06, 0.22, -0.68, -0.26, -0.79, -0.43, -0.14, -0.03, 0.49, -0.24, +-20.41, -6.75, 4.22, -4.34, -0.04, -2.71, -0.17, -0.43, 0.56, -1.04, 0.15, 0.27, 0.18, -1.06, -1.2 , -2.3 , -1.18, -1.16, -0.8 , -0.31, 0.34, 0.17, 0.69, -0.22, -1.05, -1.04, -0.81, 0.57, 0.26, 0.06, -0.46, 0.23, -0.41, -0.32, -0.53, 0.17, 0.09, -0.53, 0.71, -0.07, +-20.87, -6.75, 3.79, -4.44, 2.23, -1.63, -0.05, -0.24, 0.08, -2.45, -0.43, -0.62, -0.2 , -0.83, -1.16, -2.31, -0.49, -0.46, -1.13, 0.19, 0.45, 0.13, -0.03, -0.11, -0.54, -0.79, -0.64, 0.65, 0.3 , -0.22, -0.54, 0.05, -0.58, 0.18, -0.2 , 0.03, -0.24, -0.37, 0.19, 0.28, +-19.59, -6.88, 3.56, -4.03, 3.2 , -1.26, -0. , -0.54, 0.06, -1.68, -0.99, -0.25, -0.69, -0.92, -1.18, -2.7 , -0.74, 0.39, 0.23, 0.28, -0.11, -0.01, 0.08, 0.18, 0.16, -0.84, 0.14, 0.92, 0.77, 0.58, -0.55, -0.43, 0.07, 0.51, -0.66, -0.54, -0.64, -0.03, 0.27, 0.28, +-18.27, -6.64, 3.51, -3.95, 2.51, -1.73, -1.23, -2.08, 0.02, -1.59, -0.8 , 0.3 , -0.59, -1.79, -1.63, -2.8 , -2.1 , 0.39, 0.53, -0.37, 0.09, 0.34, -0.23, -0. , -0.15, -0.95, 0.55, 0.95, 0.95, 0.27, -0.87, -0.48, 0.21, -0.11, -0.78, -0.01, 0.37, -0.34, 0.15, -0.11, +-16.11, -6.92, 3.46, -2.78, 3.18, -1.97, -0.46, -2.28, -0.05, 0.09, 0.64, 0.92, -0.46, -1.25, -1.24, -2.5 , -2.06, -1.07, 0.92, -0.8 , 0.15, 0.03, 0.02, -0.12, 0.01, -0.73, 0.41, 0.51, 0.45, 0.23, -0.63, -0.35, 0.76, 0.13, -0.74, -0.41, -0.19, 0.02, 0.82, 0.28, +-15.23, -7.11, 3.65, -3.43, 2.29, -2.05, -0.62, -2.5 , -0.59, 0.36, 0.36, 0.46, -0.67, -2.22, -1.72, -2.1 , -2.71, -1.96, 0.79, -0.53, -0.07, -0.68, 0.27, -0.35, -0.1 , -0.26, 0.12, 0.61, -0.1 , 0.06, -0.19, -0.18, 0.46, 0.15, -0.84, -0.52, -0.06, 0.56, 0.51, -0.38, +-14.86, -5.97, 4.23, -4. , 1.81, -1.25, -0.08, -3.03, 0.09, 0.9 , 0.76, 0.09, -0.57, -1.51, -1.79, -2.4 , -2.68, -2.2 , 0.48, -0.14, 0.45, 0.19, 1.13, 0.09, -0.44, -0.92, 0.03, 0.6 , -0.28, -0.02, -0.25, -0.36, 0.73, 0.79, -0.1 , -0.64, -0.32, 0.11, -0.36, -0.39, +-14.71, -5.84, 4.19, -4.63, 1.29, -0.86, 0.55, -2.79, -0.13, 1.1 , 0.44, -0.56, -0.01, -1.06, -1.68, -2.74, -2.09, -2.33, 0.23, -0.19, 0.75, 1.04, 1.38, 0.42, 0.06, -0.78, -0.32, 0.52, -0.23, 0.16, -0.57, -0.09, 0.86, 0.54, -0.03, -0.51, -0.5 , 0.28, -0.06, 0.12, +-14.96, -5.79, 4.54, -3.81, 1.32, -0.61, 0.67, -2.61, -0.79, 0.71, 0.31, -0.75, -0.44, -0.73, -1.63, -2.48, -1.74, -1.48, 0.95, -0.69, 0.83, 0.47, 0.13, 0.58, 0.04, -0.08, -0.56, 0.39, 0.15, -0.27, -1.08, -0.16, 0.75, -0.1 , -0.85, -0.55, -0.62, -0.2 , 0.11, 0.82, +-15.02, -5.15, 4.73, -3.72, 1.77, -1.1 , 0.01, -1.68, -0.76, 0.58, 0.56, -0.33, -1.07, -1.15, -0.86, -1.73, -2.08, -1.56, 1.02, -0.4 , 0.24, 0.26, -0.08, 0.04, -1.12, -0.39, -0.94, 0.5 , 0.26, 0.06, -0.36, -0.23, 0.73, 0.45, -0.89, -0.47, -0.39, 0.02, -0.24, 0.54, +-15.79, -6. , 3.39, -4.75, 1.64, -1.14, 0.21, -1.07, -0.32, -0.67, 0.19, -0.12, -1.75, -1.41, -0.91, -0.93, -1.23, -1.53, 0.97, -0.69, 0.18, 0.78, 0.61, 0.64, -0.97, -0.15, 0.02, 0.13, 0.14, -0.29, -0.47, -0.61, -0.36, 0.7 , -0.99, -0.53, -0.01, 0.29, 0.25, 0.63, +-16.02, -5.3 , 4.41, -4.48, 1.44, -1.95, -0.11, -0.64, -0.01, -0.24, -0.04, -0.41, -1.5 , -1.55, -0.88, -0.85, -1.31, -1.79, 0.28, -1.1 , -0.41, 0.95, 0.54, 0.23, -0.44, -0.2 , 0.22, 1.45, 0.48, -0.5 , 0.03, 0.24, 0.14, 0.82, -0.77, -0.65, 0.11, -0.16, -0.43, -0.14, +-13.64, -4.42, 5.16, -3.28, 0.83, -2.48, 0.53, -1.11, -0.25, -0.16, -0.24, 0.04, -1.2 , -1.94, -1.9 , -1.09, -0.92, -2.27, 0.13, -0.68, -0.3 , 1.03, 0.03, -0.12, -1.46, -1.02, -0. , 1.1 , 0.15, -0.57, -0.14, -0.04, 0.16, 0.47, -0.05, -0.35, -0.37, -0.38, 0.03, -0.69, +-13.44, -3.74, 4.8 , -2.37, 0.64, -2.17, 0.79, -1.4 , -1.52, -1.36, -0.08, -0.13, -0.84, -1.36, -0.79, -0.74, 0.23, -0.53, 0.21, -0.52, -0.53, 0.62, -0.22, 0.38, -1.06, -1.08, 0.46, 0.35, -0.24, -0.68, -0.2 , 0.04, -0.13, -0.3 , -0.15, -0.45, -0.57, -0.8 , 0.17, -0.56, +-13.07, -3.63, 4.09, -1.67, 0.98, -2.32, 0.12, -0.82, -2.04, -2.17, -0.11, 0.35, -0.14, -1.52, -0.59, -0.78, 0.22, 0.36, 0.3 , -0.87, -0.18, -0.33, -0.28, -0.71, -1.04, -0.59, 0.77, 0.24, 0.3 , -0.78, -0.46, 0.61, -0.22, 0.14, 0.62, -0.4 , -0.86, -0.72, 0.31, 0.43, +-10.39, -2.84, 3.3 , -2.89, -0.34, -4. , -0.16, -1.32, -1.4 , -1.64, -0.29, -0.45, 0.24, -1.18, -0.26, 0.21, -0.04, 0.27, 0.38, -1.44, 0.09, -0.16, -1.15, -1.17, -1.51, -1.67, 0.92, 0.07, 0.4 , -0.21, -0.07, 0.43, -0.57, 0.15, 0.62, -0.72, -0.69, -0.64, 0.1 , 0.8 , +-10.11, -2.13, 2.83, -2.91, -0.56, -4.35, -0.99, -2.07, -1.65, -0.77, 0.66, -0. , 0.15, -0.24, -0.7 , 0.15, -0.41, -0.18, -0.63, -1.76, -0.2 , -0.17, -0.81, -1.14, -1.3 , -1.64, 0.22, 0.34, -0.08, 0.12, -0. , 0.22, -0.38, -0.51, -0.39, -0.63, -0.13, -0.3 , -0.28, -0.36, +-12.57, -2.4 , 2.26, -3.4 , -0.74, -3.7 , -1.19, -2.37, -0.45, -0.4 , 0.76, 1.08, -0.12, -0.76, -0.98, -0.5 , -0.66, -0.37, -0.9 , -2.11, -0.91, -1.54, -1.43, -0.88, -1.01, -1.25, 0.15, -0.02, -0.06, 0.22, -0.16, 0.23, -0.5 , -0.5 , -0.53, -0.53, -0.1 , -0.07, -0.21, -0.8 , +-13.4 , -1.89, 2.33, -1.94, 0.55, -2.88, 0.77, -1.89, -1.43, -1.5 , 0.94, 1.13, -0.23, 0.38, -0.41, 0.36, -0.35, -0.14, -0.23, -0.83, -0.87, -1.2 , -1.18, -1.06, -1.02, -0.37, 0.13, 0.55, 0.11, 0.71, -0.17, -0.76, -0.52, -0.35, -0.33, 0.06, 0.69, 0.33, -0.22, -0.14, +-16.04, -1.86, 1.81, -1.42, 0.03, -3.21, 0.67, -2.4 , -1.24, -1.65, 0.73, 1.22, -0.36, 0.58, -0.7 , 0.71, -0.47, -0.02, 0.68, 0.1 , -0.4 , -0.86, -0.96, -0.9 , -0.28, -0.08, -0.38, 0.34, 0.17, 0.51, 0.59, -0.66, -0.91, -0.76, -0.35, -0.22, 0.1 , 0.03, -0.44, 0.38, +-18.52, -1.94, 0.27, -2.45, -0.53, -3.62, -0.66, -3.1 , -1.92, -0.73, 0.61, 1.64, -0.02, 0.93, -0.89, 0.78, -1.01, -0.23, 1.55, 0.06, -0.29, -0.41, -0.39, -0.01, -0.46, -0.17, -0.14, 0.3 , 0.73, 0.57, 0.15, -0.6 , -0.54, -0.15, -0.7 , 0.04, 0.07, 0.17, -0.2 , -0.24, +-20.67, -1.22, -0.67, -2.04, -0.19, -2.09, -0.5 , -3.26, -1.77, 0.46, -0.07, 0.11, -0.63, 0.36, -1.66, 0.91, 0.16, 0.31, 1.89, -1.35, -1.09, -0.3 , -0.9 , -0.35, -0.81, -0.28, 0.1 , 0.22, 0.92, 0.07, 0.3 , -0.34, -0.59, -0.39, -0.35, 0.41, -0.09, 0.47, -0.25, -0.74, +-22.56, 0.22, -0.31, -0.99, -0.92, -1.9 , 0.24, -3.33, -1.97, 0.93, 0.63, 0.2 , 0.27, 1.4 , -1.4 , 1.02, -0.38, -0.69, 0.91, -1.02, -1.11, -0.13, -0.49, -0.58, -1.14, -0.3 , -0.01, -0.12, 0.64, -0.09, 0.23, -0.55, -0.44, 0.05, -0.49, 0.21, -0.3 , 0.25, -0.5 , -0.57, +-25.12, 0.38, -0.51, -0.74, -1.52, -1.66, -0.35, -3.21, -3.29, -0.43, -0.48, -0.52, 0.77, 0.66, -0.93, 1.47, -1.15, -0.98, 0.71, -0.68, -0.72, -0.78, -0.01, -0.27, -1. , -0.62, -0.34, -0.16, 0.72, 0.68, 0.88, -0. , 0.33, -0.45, -1.02, 0.38, -0.3 , 0.74, 0.18, 0.05, +-26.65, -0.22, 0.48, 0.36, -2.25, -0.59, 0.8 , -2.28, -3.23, -0.71, -0.15, -0.88, 0.76, -0.42, -1.41, 0.79, -1.39, -0.76, 0.22, -0.17, 0.16, 0.01, -0.97, 0.06, -0.39, -0.98, -1.15, -0.28, 0.68, 0.1 , 0.55, 0.16, -0.41, 0.02, -0.21, 0.41, 0.22, 0.72, 0.02, -0.08, +-28.18, 0.83, 0.31, 0.86, -1.84, -0.5 , 0.37, -1.71, -2.05, -0.67, 0.17, -0.55, 0.65, -0.93, -1.48, 0.49, -0.5 , -0.5 , -0.98, -0.63, -0.24, 0.37, -0.86, 0.2 , 0.13, -0.44, -0.05, 0.26, 0.12, 0.67, 0.43, 0.1 , -0.88, -0.12, -0.65, 0.28, -0.15, -0.24, 0.47, -0.12, +-30.09, 0.59, 0.48, 0.75, -1.7 , 0.55, 1.27, -1.7 , -1.54, 0.41, -0.28, -0.81, 0.4 , -1.04, -1. , 0.39, -0.36, -0.65, -0.53, -0.78, 0.09, 0.91, -0.98, -0.55, 0.7 , 0.43, 0.4 , 0.37, -0.61, -0.25, 0.18, 0.37, -0.2 , -0.04, -0.33, 0. , -0.46, 0.06, 0.23, -0.8 , +-30.94, 1.65, 0.49, 1.19, -1.27, 0.52, 1.53, -0.65, -0.76, -0.09, -0.99, -0.97, 0.01, -1.34, -1.32, -0.17, -0.38, -0.66, -0.29, -0.18, -0.42, 0.3 , -1.34, -0.86, 0.57, -0.09, 0.02, 1.11, -0.31, -0.07, 0.37, 0.39, -0.08, -0.52, -0.75, 0.01, -0.18, 0.45, -0.01, -0.79, +-33.47, 0.84, 0.87, 1.3 , -1.76, 0.52, 1.42, -1.01, -1.5 , -0.05, -0.88, -0.41, 0.08, -0.97, -1.3 , -0.45, -0.37, -0.67, -0.22, -0.09, 0.59, 1.16, -0.96, -0.51, 0.92, -0.13, -0.58, 0.17, -1.19, 0.01, 0.14, -0.22, -0.14, -0.53, 0.18, -0.22, -0.67, 0.05, -0.47, -0.48, +-35.46, 0.35, 0.35, 0.83, -1.86, -0.47, 1.34, -0.84, -2.33, -0.59, -0.88, -0.7 , -0.35, -1.32, -1.41, -0.83, -0.66, -0.33, -0.35, 0.05, -0.81, -0.2 , -1.28, -0.9 , 0.61, -0.39, -0.6 , -0.23, -1.59, -0.03, 0.09, 0.15, 0.06, -0.77, -0.75, 0.18, -0.53, 0.12, -0.06, -0.09, +-36.29, 1.18, 1.48, 1.56, -1.13, -0.4 , 1.08, -0.94, -1.65, -0.76, -0.27, -0.33, 0.16, -0.69, -1.14, -0.71, -0.6 , -0.01, 0.25, -0.39, -0.15, 1.1 , -0.77, -0.01, 0.39, -0.25, -0.28, -0.41, -1.22, -0.31, -0.42, 0.33, 0.57, -0.59, -1.09, 0.35, -0.57, 0.6 , 0.42, -0.22, +-36.83, 1.23, 1.13, 1.07, -1.34, -0.44, 0.27, -1.2 , -1.88, -1.4 , -0.37, -0.96, -0.22, -0.38, -0.61, -0.77, 0.06, 0.13, -0.47, -0.67, -0.36, 0.71, -0.18, 1.3 , 1.34, 0.43, -0.3 , -0.4 , -0.62, -0.4 , -0.16, 0.55, 0.5 , -0.01, -0.77, 0.08, -0.96, 0.6 , 0.53, -0.42, +-34.23, 1.7 , 1.22, -0.28, -1.18, 0.2 , -0.24, -1.68, -1.38, -1.42, -0.26, 0.1 , -1.29, 0.21, -1.14, -1.08, 0.33, 0.47, -0.98, -0.66, -1.31, 0.36, -0.03, 0.49, 0.99, 0.5 , -0.25, 0.28, -0.79, -0.05, 0.31, 0.24, 0.83, 0.28, -0.8 , 0.13, -0.33, 0.58, -0.09, -0.58, +-33.07, 2. , 2.13, -0.58, -2.29, -0.25, 0.09, -2.28, -1.65, -1.1 , 0.45, 0.89, -1.05, -0.06, -1.62, -0.6 , 0.56, 0.61, -0.87, -0.88, -1.48, 1.28, 0.9 , 0.52, 0.93, -0.19, -0.55, 0.97, -1.18, -0.65, -0.15, 0.07, 0.75, 0.04, -0.64, 0.61, -0.44, 0.19, -0.01, -0.25, +-33.92, 2.05, 1.2 , -0.36, -1.79, -1.27, -1.4 , -2.7 , -1.18, -0.96, 0.12, 0.06, -1.16, -0.74, -1.71, 0.51, 0.91, 0.74, -0.29, -0.97, -1.16, 1.1 , 1.73, 0.45, 0.49, -0.88, -0.93, 0.82, 0.07, -0.61, -0.66, -0.33, 0.98, 0.47, -0.26, -0.08, -0.85, 0.32, 0.67, 0.17, +-33.89, 1.78, 0.67, -1.01, -1. , -1.34, -1.22, -1.68, -0.82, -1.17, 0.86, 0.44, -2.04, -1.37, -1.3 , 0.62, 0.97, 0.15, -0.32, 0.03, -1.13, 0.1 , 0.7 , 0.99, 0.49, -0.34, -0.83, 0.05, 0.26, -0.5 , -0.14, -0.09, 0.97, 0.84, -0.25, -0.23, -0.71, -0.49, 0.02, -0.49, +-32.95, 1.04, -0.01, -1.73, -1.93, -1.48, -0.84, -1.4 , -1.31, -0.85, 1.2 , -0.73, -1.98, -2.16, -1.91, 0.03, 0.68, -1.36, -1.24, 0.11, -1.92, -0.7 , -0.38, 0.84, 0.77, -0.1 , -0.87, -0.1 , 0.24, -0.08, -0.12, -0.51, 0.07, -0.03, 0.65, -0.35, -1. , -0.12, 0.81, -0.35, +-31.72, 1.3 , -0.02, -0.05, -0.17, 0. , 0.83, -1.11, -0.98, -1.09, 0.99, -1.65, -1.6 , -1.46, -2.18, 0.15, 1.19, -0.99, -0.73, 0.66, -1.97, -0.61, -0.63, 0.71, 1.08, 0.75, -0.78, 1.35, 0.51, 0.45, -0.73, -1.02, 0.24, 0.06, 1.04, -0.55, -1.43, -0.34, 1.23, 0.42, +-31.71, 2.01, -0.12, 0.46, -0.43, -0.89, 0.54, -1.01, -0.23, -0.14, -0.48, -1.97, -1.79, -2.49, -2.97, -0.1 , 1.14, -1.34, -0.64, 0.4 , -2.33, -0.91, -0.35, 0.89, 1.58, 0.26, -1. , 0.34, 0.51, -0.04, -1.09, 0.04, 0.24, 0.54, 0.8 , -0.51, -1.01, -0.13, 0.17, 0.3 +}; +// clang-format on + +#endif // MICRO_EXAMPLES_DATA_KWS_H_ diff --git a/micro/examples/classifier/data/mnist.h b/micro/examples/classifier/data/mnist.h new file mode 100644 index 0000000000000000000000000000000000000000..1c0e47d2395af8401a6b3ffb71358c5a00e2c960 --- /dev/null +++ b/micro/examples/classifier/data/mnist.h @@ -0,0 +1,51 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_EXAMPLES_DATA_MNIST_H_ +#define MICRO_EXAMPLES_DATA_MNIST_H_ + +// clang-format off +static float data_mnist_4[28*28] = { +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.8,0.4,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.5,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.7,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.1,0.8,1. ,0.6,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.1,1. ,1. ,0.5,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.7,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,0.8,0.1,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,1. ,0.8,0. ,0. ,0. ,0. ,0. ,0. ,0.7,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,1. ,0.8,0.1,0. ,0. ,0. ,0. ,0.2,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,1. ,0.9,0.7,0.5,0.6,0.2,0.6,1. ,1. ,0.6,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.5,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,0.8,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.5,0.5,0.5,0.5,0.5,0.9,1. ,0.6,0.1,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.9,0.9,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,0.9,1. ,0.3,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.8,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.8,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.9,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0., +0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. +}; +// clang-format on + +#endif // MICRO_EXAMPLES_DATA_MNIST_H_ diff --git a/micro/examples/classifier/main.cc b/micro/examples/classifier/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..b76cc0b038c11f1b947c9afc31f97a1d57df6514 --- /dev/null +++ b/micro/examples/classifier/main.cc @@ -0,0 +1,52 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "data.h" +#include "micro.h" + +namespace micro { +namespace MICRO_MODEL_NAME { + +MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine); + +} +} // namespace micro + +int main() { + micro::MaceMicroEngine *micro_engine = NULL; + micro::MICRO_MODEL_NAME::GetMicroEngineSingleton(µ_engine); + + micro_engine->RegisterInputData(0, MICRO_DATA_NAME::input, + MICRO_DATA_NAME::input_dims); + micro_engine->Run(); + + float *output_buffer = NULL; + const int32_t *output_dims = NULL; + uint32_t dim_size = 0; + micro_engine->GetOutputData(0, reinterpret_cast(&output_buffer), + &output_dims, &dim_size); + + int32_t output_total_size = 1; + for (int32_t i = 0; i < dim_size; ++i) { + output_total_size *= output_dims[i]; + } + + for (int32_t i = 0; i < output_total_size; ++i) { + printf("%d: %f\n", i, output_buffer[i]); + } + + return 0; +} diff --git a/micro/examples/classifier/mbed-os.lib b/micro/examples/classifier/mbed-os.lib new file mode 100644 index 0000000000000000000000000000000000000000..e36da5dbac97a99819c15e14cb59b9c03860034d --- /dev/null +++ b/micro/examples/classifier/mbed-os.lib @@ -0,0 +1 @@ +https://github.com/ARMmbed/mbed-os/#0db72d0cf26539016efbe38f80d6f2cb7a3d4414 diff --git a/micro/framework/operator.cc b/micro/framework/operator.cc index 3065484ce0a3512d9cce6f7666f26a352e9f9961..9ecdad69fd536d7ebf786d3fd04e528768bf16fc 100644 --- a/micro/framework/operator.cc +++ b/micro/framework/operator.cc @@ -74,6 +74,8 @@ uint32_t Operator::GetInputSize() { } const void *Operator::DoGetInputData(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + const void *data = NULL; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; @@ -94,6 +96,8 @@ const void *Operator::DoGetInputData(uint32_t idx) { } uint32_t Operator::GetInputShapeDimSize(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + uint32_t dim_size = 0; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; @@ -115,6 +119,8 @@ uint32_t Operator::GetInputShapeDimSize(uint32_t idx) { } const int32_t *Operator::GetInputShapeDims(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + const int32_t *dims = NULL; const OpIOInfo *input_info = op_context_->input_info(idx); const uint32_t op_def_idx = input_info->op_def_idx_; @@ -138,14 +144,20 @@ uint32_t Operator::GetOutputSize() { } DataType Operator::GetOutputDataType(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + return op_def_->output_type(idx); } void *Operator::DoGetOutputData(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + return engine_config_->tensor_mem_ + op_def_->mem_offset(idx); } uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + uint32_t dim_size = 0; model::OutputShape *output_shape = const_cast(op_context_->output_resize_shape(idx)); @@ -156,6 +168,8 @@ uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) { } const int32_t *Operator::GetOutputShapeDims(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + const int32_t *dims = NULL; model::OutputShape *output_shape = const_cast(op_context_->output_resize_shape(idx)); @@ -167,6 +181,8 @@ const int32_t *Operator::GetOutputShapeDims(uint32_t idx) { MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, const int32_t *dims) { + MACE_ASSERT(idx < GetOutputSize()); + model::OutputShape *output_shape = const_cast(op_context_->output_resize_shape(idx)); #ifndef MACE_MICRO_NDEBUG @@ -201,6 +217,44 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, return MACE_SUCCESS; } +QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) { + MACE_ASSERT(idx < GetInputSize()); + + QuantizeInfo quantize_info = {0.0f, 0}; + const OpIOInfo *input_info = op_context_->input_info(idx); + const uint32_t op_def_idx = input_info->op_def_idx_; + if (kIdxConstTensor == op_def_idx) { + const model::ConstTensor *const_tensor = + engine_config_->net_def_->tensor(input_info->output_idx_); + quantize_info.scale = const_tensor->scale(); + quantize_info.zero = const_tensor->zero_point(); + return quantize_info; + } else if (kIdxModelInput == op_def_idx) { + MACE_ASSERT1(false, "Unexpected, the model input has no quantize info"); + } else { + const model::OperatorDef *pre_op_def = + engine_config_->net_def_->op(op_def_idx); + model::QuantizeActivationInfo quantize_activation_info = + pre_op_def->quantize_info(input_info->output_idx_); + quantize_info.scale = quantize_activation_info.scale(); + quantize_info.zero = quantize_activation_info.zero_point(); + return quantize_info; + } + + return quantize_info; +} + +QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) { + MACE_ASSERT(idx < GetOutputSize()); + + QuantizeInfo quantize_info; + model::QuantizeActivationInfo quantize_activation_info = + op_def_->quantize_info(idx); + quantize_info.scale = quantize_activation_info.scale(); + quantize_info.zero = quantize_activation_info.zero_point(); + return quantize_info; +} + #ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC #define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \ template <> \ diff --git a/micro/framework/operator.h b/micro/framework/operator.h index 6269773e02a57637bc1abe3dec256bfa1056d842..a053f78f57d398a675d8364bca5a9a8cad35bba7 100644 --- a/micro/framework/operator.h +++ b/micro/framework/operator.h @@ -17,8 +17,8 @@ #include "micro/base/logging.h" #include "micro/base/types.h" -#include "micro/include/public/micro.h" #include "micro/framework/scratch_buffer.h" +#include "micro/include/public/micro.h" namespace micro { @@ -84,6 +84,9 @@ class Operator { const int32_t *input_dims); MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx); + QuantizeInfo GetInputQuantizeInfo(uint32_t idx); + QuantizeInfo GetOutputQuantizeInfo(uint32_t idx); + template const T *GetInputData(uint32_t idx) { return static_cast(DoGetInputData(idx)); @@ -101,7 +104,7 @@ class Operator { const model::OperatorDef *op_def_; MaceMicroEngineConfig *engine_config_; - private: + protected: OpContext *op_context_; }; diff --git a/micro/include/public/micro.h b/micro/include/public/micro.h index 6618b64b346669e8a28a79cde26d72d19e9e4e21..eee2635586550b270afee7c729ce3ea4411291cd 100644 --- a/micro/include/public/micro.h +++ b/micro/include/public/micro.h @@ -17,7 +17,9 @@ #include -#include "micro/include/port/define.h" +#ifndef NULL +#define NULL 0 +#endif namespace micro { @@ -61,7 +63,7 @@ class Graph; class Operator; } // namespace framework -struct MACE_API MaceMicroEngineConfig { +struct MaceMicroEngineConfig { model::NetDef *net_def_; const uint8_t *model_data_; framework::Graph *graph_; @@ -73,7 +75,7 @@ struct MACE_API MaceMicroEngineConfig { uint32_t scratch_buffer_size_; }; -class MACE_API MaceMicroEngine { +class MaceMicroEngine { public: MaceMicroEngine() {} ~MaceMicroEngine() {} diff --git a/micro/include/utils/bfloat16.h b/micro/include/utils/bfloat16.h index b293548d7870350091f6dccbd4bd1b5842fdfb7f..421626cbad729ed40aebbfeb02c0f4a8dafef4bf 100644 --- a/micro/include/utils/bfloat16.h +++ b/micro/include/utils/bfloat16.h @@ -32,7 +32,14 @@ union Sphinx { class BFloat16 { public: - BFloat16(); + BFloat16() {} + + explicit BFloat16(float value) { data_ = Sphinx(value).i >> 16; } + + explicit BFloat16(int value) { + data_ = Sphinx(static_cast(value)).i >> 16; + } + operator float() const { return Sphinx(static_cast(data_ << 16)).f; diff --git a/micro/model/operator_def.cc b/micro/model/operator_def.cc index 31ffa678dafad659fc724cbf586cf739caf444b8..b71d033b450efc13c000798721854abf6f27b86a 100644 --- a/micro/model/operator_def.cc +++ b/micro/model/operator_def.cc @@ -23,11 +23,22 @@ MACE_DEFINE_STRING_FUNC(OperatorDef, name, name_) MACE_DEFINE_STRING_FUNC(OperatorDef, type, type_) MACE_DEFINE_OBJECT_FUNC(OperatorDef, int32_t, device_type) MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, Argument, arg, args_) -MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, OutputShape, - output_shape, output_shapes_) +MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, + OutputShape, + output_shape, + output_shapes_) MACE_DEFINE_ARRAY_FUNC(OperatorDef, DataType, output_type, output_types_) +MACE_DEFINE_ARRAY_FUNC(OperatorDef, + QuantizeActivationInfo, + quantize_info, + quantize_info_); // the mem_offset is the mem_id in proto file MACE_DEFINE_ARRAY_FUNC(OperatorDef, int32_t, mem_offset, mem_offsets_) +MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, scale); +MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, int32_t, zero_point); +MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, minval); +MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, maxval); + } // namespace model } // namespace micro diff --git a/micro/model/operator_def.h b/micro/model/operator_def.h index 92695ad90a14fd3482e9818662f53a9f4a35db0a..8ad01ebfbea3d25bc21d7a30b3e4af752041741b 100644 --- a/micro/model/operator_def.h +++ b/micro/model/operator_def.h @@ -23,6 +23,22 @@ namespace micro { namespace model { +class QuantizeActivationInfo { + public: + MACE_DEFINE_HARD_CODE_MAGIC(QuantizeActivationInfo) + + MACE_DECLARE_OBJECT_FUNC(float, scale); + MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point); + MACE_DECLARE_OBJECT_FUNC(float, minval); + MACE_DECLARE_OBJECT_FUNC(float, maxval); + + private: + SerialFloat scale_; + SerialInt32 zero_point_; + SerialFloat minval_; + SerialFloat maxval_; +}; + class OperatorDef : public Serialize { public: MACE_DEFINE_HARD_CODE_MAGIC(OperatorDef) @@ -35,6 +51,7 @@ class OperatorDef : public Serialize { MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg); MACE_DECLARE_PTR_ARRAY_FUNC(OutputShape, output_shape); MACE_DECLARE_ARRAY_FUNC(DataType, output_type); + MACE_DECLARE_ARRAY_FUNC(QuantizeActivationInfo, quantize_info); // the mem_offset is the mem_id in proto file MACE_DECLARE_ARRAY_FUNC(int32_t, mem_offset); @@ -48,6 +65,7 @@ class OperatorDef : public Serialize { SerialArray args_; SerialArray output_shapes_; SerialArray output_types_; + SerialArray quantize_info_; SerialArray mem_offsets_; }; diff --git a/micro/ops/CMakeLists.txt b/micro/ops/CMakeLists.txt index 43ddce0a11c0fa03b6349f923fb8a588f0769fa8..0825e180074738bf60949484c9aab46af21a57f1 100644 --- a/micro/ops/CMakeLists.txt +++ b/micro/ops/CMakeLists.txt @@ -1,7 +1,6 @@ set(MICRO_OPS_SRCS shape.cc reduce.cc - reshape.cc matmul.cc nhwc/depthwise_conv_2d_ref.cc nhwc/conv_2d_c4_s4.cc @@ -31,12 +30,13 @@ set(MICRO_OPS_SRCS activation.cc ) +add_subdirectory(nhwc) + add_library(micro_ops ${MICRO_OPS_SRCS} ) target_link_libraries(micro_ops - micro_base - micro_framework + PRIVATE micro_base ) diff --git a/micro/ops/eltwise.cc b/micro/ops/eltwise.cc index 98f3897ea96f2b2eaf989e529fa26b6c851dfbbe..975a60cecfc7825dcae055256fe2d2fbda64de57 100644 --- a/micro/ops/eltwise.cc +++ b/micro/ops/eltwise.cc @@ -19,14 +19,6 @@ namespace micro { namespace ops { namespace eltwise { -bool ShapeIsEqual(const int32_t *dims0, - const int32_t *dims1, uint32_t dim_size) { - while (--dim_size > 0) { - if (dims0[dim_size] != dims1[dim_size]) - return false; - } - return true; -} int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size) { diff --git a/micro/ops/eltwise.h b/micro/ops/eltwise.h index 263082cca9225438dcaac456a983a4a47510d512..fd08114206b0f4acf1676912a89a3caa8e1fe708 100644 --- a/micro/ops/eltwise.h +++ b/micro/ops/eltwise.h @@ -19,31 +19,13 @@ #include "micro/base/utils.h" #include "micro/framework/operator.h" #include "micro/framework/scratch_buffer.h" +#include "micro/base/types.h" namespace micro { namespace ops { -namespace eltwise { // for redefine -enum Type { - SUM = 0, - SUB = 1, - PROD = 2, - DIV = 3, - MIN = 4, - MAX = 5, - NEG = 6, - ABS = 7, - SQR_DIFF = 8, - POW = 9, - EQUAL = 10, - FLOOR_DIV = 11, - CLIP = 12, - SIGN = 13, - NONE = 14, -}; +namespace eltwise { -bool ShapeIsEqual(const int32_t *dims0, - const int32_t *dims1, uint32_t dim_size); int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size); void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size); template @@ -202,9 +184,8 @@ class EltwiseOp : public framework::Operator { if (input1_size == 1) { TensorScalarEltwise(type_, input0_, input1_[0], input0_size, swapped, output_ptr); - } else if (eltwise::ShapeIsEqual(input0_dims_, - input1_shape, - input0_dim_size_)) { + } else if (base::ShapeIsEqual(input0_dims_, input1_shape, + input0_dim_size_)) { TensorEltwise(type_, input0_, input1_, input0_size, swapped, output_ptr); } else if (need_general_broadcast) { diff --git a/micro/ops/matmul.h b/micro/ops/matmul.h index 94d9b03507a8c24064cc5db0e71aa03514c4acca..23cd0d0dd4884a3ec004137ad19b7dd507a0c2a8 100644 --- a/micro/ops/matmul.h +++ b/micro/ops/matmul.h @@ -40,10 +40,8 @@ class MatMulOp : public framework::Operator { uint32_t input_b_dim_size_; const mifloat *bias_; -#ifndef MACE_MICRO_NDEBUG const int32_t *bias_dims_; uint32_t bias_dim_size_; -#endif mifloat *output_; diff --git a/micro/ops/nhwc/CMakeLists.txt b/micro/ops/nhwc/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce890e12b0f665d3b0338f56b9747c4315ae5025 --- /dev/null +++ b/micro/ops/nhwc/CMakeLists.txt @@ -0,0 +1,3 @@ +if(MACE_MICRO_ENABLE_CMSIS) + add_subdirectory(cmsis_nn) +endif() diff --git a/micro/ops/nhwc/cmsis_nn/CMakeLists.txt b/micro/ops/nhwc/cmsis_nn/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ca6db7303fdf0f4caf07f4717502be4e7f5fb1d7 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/CMakeLists.txt @@ -0,0 +1,16 @@ +add_library(micro_ops_nhwc_cmsis_nn + arm_conv_2d_int8.cc + arm_pooling_int8.cc + arm_softmax_int8.cc + arm_mat_mul_int8.cc + arm_eltwise_int8.cc + arm_depthwise_conv_2d_int8.cc + dequantize.cc + quantize.cc + utilities.cc +) + +target_link_libraries(micro_ops_nhwc_cmsis_nn + PRIVATE micro_base + PRIVATE CMSISNN +) diff --git a/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..f886be8d7317b0f421f6db6d3e74991a07759c8b --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.cc @@ -0,0 +1,135 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h" + +#include + +#include "micro/base/logger.h" +#include "micro/framework/op_context.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/model/const_tensor.h" +#include "micro/model/net_def.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +namespace micro { +namespace ops { + +MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) { + MACE_ASSERT(filter_dims_[0] == output_dims[3] && + input_dims_[3] == filter_dims_[3]); + + QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT); + QuantizeInfo filter_quantize_info = GetInputQuantizeInfo(FILTER); + QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT); + + double double_multiplier = input_quantize_info.scale * + filter_quantize_info.scale / + output_quantize_info.scale; + int32_t multiplier; + int32_t shift; + QuantizeMultiplier(double_multiplier, &multiplier, &shift); + + cmsis_nn_conv_params conv_params; + /// input_offset is negative + conv_params.input_offset = -input_quantize_info.zero; + conv_params.output_offset = output_quantize_info.zero; + conv_params.activation.min = -128; + conv_params.activation.max = 127; + conv_params.stride.w = strides_[1]; + conv_params.stride.h = strides_[0]; + conv_params.padding.w = padding_sizes_[1] / 2; + conv_params.padding.h = padding_sizes_[0] / 2; + conv_params.dilation.w = dilations_[1]; + conv_params.dilation.h = dilations_[0]; + + ScratchBuffer scratch_buffer(engine_config_); + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = scratch_buffer.GetBuffer(output_dims[3]); + quant_params.shift = scratch_buffer.GetBuffer(output_dims[3]); + for (int32_t i = 0; i < output_dims[3]; ++i) { + quant_params.multiplier[i] = multiplier; + quant_params.shift[i] = shift; + } + + MACE_ASSERT(input_dims_[0] == 1); + MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1); + + cmsis_nn_dims input_dims; + input_dims.n = input_dims_[0]; + input_dims.h = input_dims_[1]; + input_dims.w = input_dims_[2]; + input_dims.c = input_dims_[3]; + const int8_t *input_data = reinterpret_cast(input_); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_dims_[0]; + filter_dims.h = filter_dims_[1]; + filter_dims.w = filter_dims_[2]; + filter_dims.c = filter_dims_[3]; + const int8_t *filter_data = reinterpret_cast(filter_); + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_dims[3]; + int32_t *bias_data = + const_cast(reinterpret_cast(bias_)); + if (bias_data == NULL) { + bias_data = scratch_buffer.GetBuffer(output_dims[3]); + for (int32_t i = 0; i < bias_dims.c; ++i) { + bias_data[i] = 0; + } + } + + cmsis_nn_dims cmn_output_dims; + cmn_output_dims.n = output_dims[0]; + cmn_output_dims.h = output_dims[1]; + cmn_output_dims.w = output_dims[2]; + cmn_output_dims.c = output_dims[3]; + int8_t *output_data = reinterpret_cast(output_); + + cmsis_nn_context cmn_context; + cmn_context.size = arm_convolve_wrapper_s8_get_buffer_size( + &conv_params, &input_dims, &filter_dims, &cmn_output_dims); + if (cmn_context.size > 0) { + cmn_context.buf = scratch_buffer.GetBuffer(cmn_context.size); + } else { + cmn_context.buf = NULL; + } + + arm_status status = arm_convolve_wrapper_s8( + &cmn_context, &conv_params, &quant_params, &input_dims, input_data, + &filter_dims, filter_data, &bias_dims, bias_data, &cmn_output_dims, + output_data); + MACE_ASSERT(status == ARM_MATH_SUCCESS) + << "failed in arm_convolve_wrapper_s8"; + + return MACE_SUCCESS; +} + +MaceStatus ArmConv2dInt8Op::Run() { + int32_t output_dims[4] = {0}; + InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims); + ResizeOutputShape(0, 4, output_dims); + + MACE_RETURN_IF_ERROR(Compute(output_dims)); + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..d7448171884c854ec91a1361b3317c40cc6ea017 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h @@ -0,0 +1,35 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_ + +#include "micro/ops/nhwc/base/conv_2d_base.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { + +class ArmConv2dInt8Op : public Conv2dBase { + public: + virtual MaceStatus Run(); + + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_ diff --git a/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..e3746fe80d5cf8e9917eb41b262dc8ebceb83eb9 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.cc @@ -0,0 +1,136 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h" + +#include + +#include "micro/base/logger.h" +#include "micro/framework/op_context.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/model/const_tensor.h" +#include "micro/model/net_def.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +namespace micro { +namespace ops { + +MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) { + QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT); + QuantizeInfo filter_quantize_info = GetInputQuantizeInfo(FILTER); + QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT); + + double double_multiplier = input_quantize_info.scale * + filter_quantize_info.scale / + output_quantize_info.scale; + int32_t multiplier; + int32_t shift; + QuantizeMultiplier(double_multiplier, &multiplier, &shift); + + cmsis_nn_dw_conv_params dw_conv_params; + dw_conv_params.ch_mult = filter_dims_[0]; + /// input_offset is negative + dw_conv_params.input_offset = -input_quantize_info.zero; + dw_conv_params.output_offset = output_quantize_info.zero; + dw_conv_params.activation.min = -128; + dw_conv_params.activation.max = 127; + dw_conv_params.stride.w = strides_[1]; + dw_conv_params.stride.h = strides_[0]; + dw_conv_params.padding.w = padding_sizes_[1] / 2; + dw_conv_params.padding.h = padding_sizes_[0] / 2; + dw_conv_params.dilation.w = dilations_[1]; + dw_conv_params.dilation.h = dilations_[0]; + + ScratchBuffer scratch_buffer(engine_config_); + + cmsis_nn_per_channel_quant_params quant_params; + quant_params.multiplier = scratch_buffer.GetBuffer(output_dims[3]); + quant_params.shift = scratch_buffer.GetBuffer(output_dims[3]); + for (int32_t i = 0; i < output_dims[3]; ++i) { + quant_params.multiplier[i] = multiplier; + quant_params.shift[i] = shift; + } + + MACE_ASSERT(input_dims_[0] == 1); + MACE_ASSERT(filter_dims_[0] == 1); + MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1); + + cmsis_nn_dims input_dims; + input_dims.n = input_dims_[0]; + input_dims.h = input_dims_[1]; + input_dims.w = input_dims_[2]; + input_dims.c = input_dims_[3]; + const int8_t *input_data = reinterpret_cast(input_); + + cmsis_nn_dims filter_dims; + filter_dims.n = filter_dims_[0]; + filter_dims.h = filter_dims_[1]; + filter_dims.w = filter_dims_[2]; + filter_dims.c = filter_dims_[3]; + const int8_t *filter_data = reinterpret_cast(filter_); + + cmsis_nn_dims bias_dims; + bias_dims.n = 1; + bias_dims.h = 1; + bias_dims.w = 1; + bias_dims.c = output_dims[3]; + int32_t *bias_data = + const_cast(reinterpret_cast(bias_)); + if (bias_data == NULL) { + bias_data = scratch_buffer.GetBuffer(output_dims[3]); + for (int32_t i = 0; i < bias_dims.c; ++i) { + bias_data[i] = 0; + } + } + + cmsis_nn_dims cmn_output_dims; + cmn_output_dims.n = output_dims[0]; + cmn_output_dims.h = output_dims[1]; + cmn_output_dims.w = output_dims[2]; + cmn_output_dims.c = filter_dims.c * filter_dims.n; + int8_t *output_data = reinterpret_cast(output_); + + cmsis_nn_context cmn_context; + cmn_context.size = arm_depthwise_conv_wrapper_s8_get_buffer_size( + &dw_conv_params, &input_dims, &filter_dims, &cmn_output_dims); + + if (cmn_context.size > 0) { + cmn_context.buf = scratch_buffer.GetBuffer(cmn_context.size); + } else { + cmn_context.buf = NULL; + } + + arm_status status = arm_depthwise_conv_wrapper_s8( + &cmn_context, &dw_conv_params, &quant_params, &input_dims, input_data, + &filter_dims, filter_data, &bias_dims, bias_data, &cmn_output_dims, + output_data); + MACE_ASSERT(status == ARM_MATH_SUCCESS) + << "failed in arm_convolve_wrapper_s8"; + + return MACE_SUCCESS; +} + +MaceStatus ArmDepthwiseConv2dInt8Op::Run() { + int32_t output_dims[4] = {0}; + InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims); + output_dims[3] *= input_dims_[3]; + ResizeOutputShape(0, 4, output_dims); + + MACE_RETURN_IF_ERROR(Compute(output_dims)); + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..ce41a6ebc143442f002a8146f49bfcf25889ed37 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h @@ -0,0 +1,35 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_ + +#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h" +#include "micro/ops/utils/activation.h" + +namespace micro { +namespace ops { + +class ArmDepthwiseConv2dInt8Op : public DepthwiseConv2dBase { + public: + virtual MaceStatus Run(); + + private: + MaceStatus Compute(int32_t (&output_dims)[4]); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_ diff --git a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..6cb44e35a20410385c6f8143d22b84ba4f696063 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.cc @@ -0,0 +1,109 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h" + +#include + +#include "micro/base/logging.h" +#include "micro/base/types.h" +#include "micro/base/utils.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +namespace micro { +namespace ops { + +MaceStatus ArmEltwiseInt8Op::OnInit() { + MACE_ASSERT(GetInputSize() == 2); + + input0_ = GetInputData(INPUT0); + input0_dims_ = GetInputShapeDims(INPUT0); + input0_dim_size_ = GetInputShapeDimSize(INPUT0); + + input1_ = GetInputData(INPUT1); + input1_dims_ = GetInputShapeDims(INPUT1); + input1_dim_size_ = GetInputShapeDimSize(INPUT1); + + output_ = GetOutputData(OUTPUT); + + type_ = static_cast( + GetArgByName("type", static_cast(NONE))); + coeff_ = GetRepeatArgByName("coeff", &coeff_size_); + + return MACE_SUCCESS; +} + +MaceStatus ArmEltwiseInt8Op::Run() { + MACE_ASSERT1(GetInputSize() == 2, + "ArmEltwiseInt8Op only supports 2 inputs"); + MACE_ASSERT(input0_dim_size_ == input1_dim_size_); + MACE_ASSERT(base::ShapeIsEqual(input0_dims_, input1_dims_, input1_dim_size_)); + + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_)); + + if (type_ == eltwise::SUM) { + QuantizeInfo input_quantize_info0 = GetInputQuantizeInfo(0); + QuantizeInfo input_quantize_info1 = GetInputQuantizeInfo(1); + QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT); + + int32_t input0_offset = -input_quantize_info0.zero; + double input0_scale = input_quantize_info0.scale; + int32_t input1_offset = -input_quantize_info1.zero; + double input1_scale = input_quantize_info1.scale; + int32_t output_offset = output_quantize_info.zero; + double output_scale = output_quantize_info.scale; + + int32_t left_shift = 20; + + const double twice_max_input_scale = + 2 * static_cast(base::max(input0_scale, input1_scale)); + const double real_input0_multiplier = + static_cast(input0_scale) / twice_max_input_scale; + const double real_input1_multiplier = + static_cast(input1_scale) / twice_max_input_scale; + const double real_output_multiplier = + twice_max_input_scale / + ((1 << left_shift) * static_cast(output_scale)); + + int32_t input0_multiplier = 0; + int32_t input0_shift = 0; + QuantizeMultiplier(real_input0_multiplier, &input0_multiplier, + &input0_shift); + + int32_t input1_multiplier = 0; + int32_t input1_shift = 0; + QuantizeMultiplier(real_input1_multiplier, &input1_multiplier, + &input1_shift); + + int32_t output_multiplier = 0; + int32_t output_shift = 0; + QuantizeMultiplier(real_output_multiplier, &output_multiplier, + &output_shift); + + int32_t element_size = base::GetShapeSize(input0_dim_size_, input0_dims_); + arm_elementwise_add_s8(input0_, input1_, input0_offset, input0_multiplier, + input0_shift, input1_offset, input1_multiplier, + input1_shift, left_shift, output_, output_offset, + output_multiplier, output_shift, -128, 127, + element_size); + } else { + MACE_ASSERT1(false, "Unsupported ArmEltwiseInt8Op type"); + } + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..6e8a0aea8ff6e4e36eab4d19ef968330a67df029 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h @@ -0,0 +1,62 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_ + +#include "micro/base/logger.h" +#include "micro/base/logging.h" +#include "micro/base/types.h" +#include "micro/base/utils.h" +#include "micro/framework/op_context.h" +#include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/model/const_tensor.h" +#include "micro/model/net_def.h" + +namespace micro { +namespace ops { + +class ArmEltwiseInt8Op : public framework::Operator { + public: + MaceStatus OnInit(); + + MaceStatus Run(); + + private: + const int8_t *input0_; + const int32_t *input0_dims_; + uint32_t input0_dim_size_; + + const int8_t *input1_; + const int32_t *input1_dims_; + uint32_t input1_dim_size_; + + int8_t *output_; + + eltwise::Type type_; + const float *coeff_; + uint32_t coeff_size_; + int32_t scalar_input_index_; + bool nchw_; + + MACE_OP_INPUT_TAGS(INPUT0, INPUT1); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + + +#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_ diff --git a/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..e2dd8fd2dacde6dea88bad682a14643c9521055f --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.cc @@ -0,0 +1,152 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h" + +#include + +#include "micro/base/logger.h" +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/op_context.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/model/argument.h" +#include "micro/model/const_tensor.h" +#include "micro/model/net_def.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +namespace micro { +namespace ops { + +MaceStatus ArmMatMulInt8Op::OnInit() { + transpose_a_ = GetArgByName("transpose_a", false); + transpose_b_ = GetArgByName("transpose_b", false); + input_a_ = GetInputData(INPUT_A); + input_b_ = GetInputData(INPUT_B); + output_ = GetOutputData(OUTPUT); + + if (GetInputSize() >= 3) { + bias_ = GetInputData(BIAS); + bias_dim_size_ = GetInputShapeDimSize(BIAS); + bias_dims_ = GetInputShapeDims(BIAS); + } else { + bias_ = NULL; + bias_dim_size_ = 0; + bias_dims_ = NULL; + } + + input_a_dim_size_ = GetInputShapeDimSize(INPUT_A); + input_b_dim_size_ = GetInputShapeDimSize(INPUT_B); + + input_a_dims_ = GetInputShapeDims(INPUT_A); + input_b_dims_ = GetInputShapeDims(INPUT_B); + + return MACE_SUCCESS; +} + +MaceStatus ArmMatMulInt8Op::Run() { + MACE_ASSERT(Validate()); + + MACE_ASSERT(input_a_dim_size_ == 2); + MACE_ASSERT(input_b_dim_size_ == 2); + + MACE_ASSERT(input_a_dims_[0] == 1); + + MACE_ASSERT(transpose_b_); + MACE_ASSERT(!transpose_a_); + + const int32_t lhs_rows = input_a_dims_[0]; + const int32_t rhs_rows = input_b_dims_[0]; + const int32_t rhs_cols = input_b_dims_[1]; + + const int32_t rhs_t_cols = rhs_rows; + + const int32_t rows = lhs_rows; + const int32_t cols = rhs_t_cols; + + if (bias_ != NULL) { + MACE_ASSERT(bias_dim_size_ == 1); + MACE_ASSERT(bias_dims_[0] == cols); + } + + int32_t *output_dims0 = + ScratchBuffer(engine_config_).GetBuffer(input_a_dim_size_); + + output_dims0[0] = rows; + output_dims0[1] = cols; + + MACE_RETURN_IF_ERROR( + ResizeOutputShape(OUTPUT, input_a_dim_size_, output_dims0)); + + QuantizeInfo input_quantize_info_a = GetInputQuantizeInfo(INPUT_A); + QuantizeInfo input_quantize_info_b = GetInputQuantizeInfo(INPUT_B); + QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT); + + double double_multiplier = input_quantize_info_a.scale * + input_quantize_info_b.scale / + output_quantize_info.scale; + int32_t multiplier; + int32_t shift; + QuantizeMultiplier(double_multiplier, &multiplier, &shift); + + ScratchBuffer scratch_buffer(engine_config_); + + int32_t *bias = NULL; + if (bias_ == NULL) { + bias = scratch_buffer.GetBuffer(cols); + for (int32_t i = 0; i < cols; ++i) { + bias[i] = 0; + } + } else { + bias = const_cast(bias_); + } + + arm_status status = arm_nn_vec_mat_mult_t_s8( + input_a_, input_b_, bias, output_, -input_quantize_info_a.zero, + input_quantize_info_b.zero, output_quantize_info.zero, multiplier, shift, + rhs_cols, rhs_rows, -128, 127); + + MACE_ASSERT(status == ARM_MATH_SUCCESS); + + return MACE_SUCCESS; +} + +bool ArmMatMulInt8Op::Validate() { + const int32_t lhs_rank = input_a_dim_size_; + const int32_t rhs_rank = input_b_dim_size_; + if (input_a_dim_size_ == input_b_dim_size_) { + for (uint32_t i = 0; i < input_a_dim_size_ - 2; ++i) { + MACE_ASSERT1(input_a_dims_[i] == input_b_dims_[i], + "batch dimensions are not equal"); + } + } else { + MACE_ASSERT1(input_a_dim_size_ == 2 || input_b_dim_size_ == 2, + "Either lhs or rhs matrix should has rank 2 " + "for non-batched matrix multiplication"); + } + + int32_t lhs_depth = + transpose_a_ ? input_a_dims_[lhs_rank - 2] : input_a_dims_[lhs_rank - 1]; + int32_t rhs_depth = + transpose_b_ ? input_b_dims_[rhs_rank - 1] : input_b_dims_[rhs_rank - 2]; + if (lhs_depth != rhs_depth) { + MACE_ASSERT1(false, "the number of A's column must be equal to B's row "); + return false; + } + + return true; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..5b8bcf5ddeb8d4cd5ed35c6a51b170e245312a7b --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h @@ -0,0 +1,55 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { +class ArmMatMulInt8Op : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + bool Validate(); + + private: + const int8_t *input_a_; + const int32_t *input_a_dims_; + uint32_t input_a_dim_size_; + + const int8_t *input_b_; + const int32_t *input_b_dims_; + uint32_t input_b_dim_size_; + + const int32_t *bias_; + const int32_t *bias_dims_; + uint32_t bias_dim_size_; + + int8_t *output_; + + bool transpose_a_; + bool transpose_b_; + + MACE_OP_INPUT_TAGS(INPUT_A, INPUT_B, BIAS); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_ diff --git a/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..7e5851622f2abeddd14813601f5d84a416cd1ed6 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.cc @@ -0,0 +1,123 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h" + +#include + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/scratch_buffer.h" +#include "micro/include/utils/macros.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +namespace micro { +namespace ops { + +void ArmPoolingInt8Op::MaxPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + MACE_UNUSED(dilation_hw); + + cmsis_nn_context ctx; + ctx.buf = NULL; + ctx.size = 0; + + cmsis_nn_pool_params pool_params; + pool_params.activation.min = -128; + pool_params.activation.max = 127; + pool_params.stride.h = stride_hw[0]; + pool_params.stride.w = stride_hw[1]; + pool_params.padding.h = pad_hw[0]; + pool_params.padding.w = pad_hw[1]; + + MACE_ASSERT(input_dims_[0] == 1); + + cmsis_nn_dims input_dims; + input_dims.n = input_dims_[0]; + input_dims.h = input_dims_[1]; + input_dims.w = input_dims_[2]; + input_dims.c = input_dims_[3]; + const int8_t *input_data = reinterpret_cast(input); + + cmsis_nn_dims filter_dims; + filter_dims.h = filter_hw[0]; + filter_dims.w = filter_hw[1]; + + cmsis_nn_dims output_dims; + output_dims.n = output_dims_[0]; + output_dims.h = output_dims_[1]; + output_dims.w = output_dims_[2]; + output_dims.c = output_dims_[3]; + int8_t *output_data = reinterpret_cast(output_); + + arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, + &output_dims, output_data); +} + +void ArmPoolingInt8Op::AvgPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw) { + MACE_UNUSED(dilation_hw); + + const int32_t out_width = output_dims_[2]; + const int32_t in_channels = input_dims_[3]; + + cmsis_nn_context ctx; + ctx.size = arm_avgpool_s8_get_buffer_size(out_width, in_channels); + ScratchBuffer scratch_buffer(engine_config_); + if (ctx.size > 0) { + ctx.buf = scratch_buffer.GetBuffer(ctx.size); + } else { + ctx.buf = NULL; + } + + cmsis_nn_pool_params pool_params; + pool_params.activation.min = -128; + pool_params.activation.max = 127; + pool_params.stride.h = stride_hw[0]; + pool_params.stride.w = stride_hw[1]; + pool_params.padding.h = pad_hw[0]; + pool_params.padding.w = pad_hw[1]; + + MACE_ASSERT(input_dims_[0] == 1); + + cmsis_nn_dims input_dims; + input_dims.n = input_dims_[0]; + input_dims.h = input_dims_[1]; + input_dims.w = input_dims_[2]; + input_dims.c = input_dims_[3]; + const int8_t *input_data = reinterpret_cast(input); + + cmsis_nn_dims filter_dims; + filter_dims.h = filter_hw[0]; + filter_dims.w = filter_hw[1]; + + cmsis_nn_dims output_dims; + output_dims.n = output_dims_[0]; + output_dims.h = output_dims_[1]; + output_dims.w = output_dims_[2]; + output_dims.c = output_dims_[3]; + int8_t *output_data = reinterpret_cast(output_); + + arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims, + &output_dims, output_data); +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..1b6bed3ac2b169c2c5b99c8e26e935915a9c939c --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h @@ -0,0 +1,41 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_ + +#include "micro/model/output_shape.h" +#include "micro/ops/nhwc/base/pooling_base.h" + +namespace micro { +namespace ops { + +class ArmPoolingInt8Op : public PoolingBase { + private: + void MaxPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw); + void AvgPooling(const mifloat *input, + const int32_t *filter_hw, + const int32_t *stride_hw, + const int32_t *dilation_hw, + const int32_t *pad_hw); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_ diff --git a/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc new file mode 100644 index 0000000000000000000000000000000000000000..e1d44bf1cdeb0aba53d0e14bf8bf9f1707034179 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.cc @@ -0,0 +1,82 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h" + +#include + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/op_context.h" +#include "micro/model/net_def.h" +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +namespace micro { +namespace ops { + +MaceStatus ArmSoftmaxInt8Op::OnInit() { + data_format_ = static_cast( + GetArgByName("data_format", static_cast(NHWC))); + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + MACE_ASSERT(input_dim_size_ == 2); + + output_ = GetOutputData(OUTPUT); + + bool use_log = GetArgByName("use_log", false); + MACE_ASSERT1(!use_log, "The argument \"use_log\" is unsupported"); + + return MACE_SUCCESS; +} + +MaceStatus ArmSoftmaxInt8Op::Run() { + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); + // TODO(ZhangZhimin): Workarounds for AUTO data format + if (NHWC == data_format_ || AUTO == data_format_) { // NHWC + return RunForNHWC(); + } else { + MACE_NOT_IMPLEMENTED; + return MACE_UNSUPPORTED; + } +} + +MaceStatus ArmSoftmaxInt8Op::RunForNHWC() { + int32_t class_size = input_dims_[input_dim_size_ - 1]; + + const int8_t *input_data = reinterpret_cast(input_); + int8_t *output_data = reinterpret_cast(output_); + + int32_t num_rows = input_dims_[0]; + + QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT); + + int kInputDeltaIntBits = 5; + int32_t scale_q = static_cast( + base::min(static_cast(input_quantize_info.scale) * + (1 << (31 - kInputDeltaIntBits)), + (1ll << 31) - 1.0)); + int32_t mult; + int32_t shift; + QuantizeMultiplier(scale_q, &mult, &shift); + int32_t diff_min = -128; + + arm_softmax_s8(input_data, num_rows, class_size, mult, shift, diff_min, + output_data); + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h new file mode 100644 index 0000000000000000000000000000000000000000..00f33863e6ca79e0c2570c11050322671337e318 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h @@ -0,0 +1,47 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { + +class ArmSoftmaxInt8Op : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + MaceStatus RunForNHWC(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + + DataFormat data_format_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_ diff --git a/micro/ops/nhwc/cmsis_nn/dequantize.cc b/micro/ops/nhwc/cmsis_nn/dequantize.cc new file mode 100644 index 0000000000000000000000000000000000000000..9e4be8bcd872b51f5611feb9c0f5a18e13969971 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/dequantize.cc @@ -0,0 +1,56 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/dequantize.h" + +#include "micro/base/logging.h" +#include "micro/base/utils.h" +#include "micro/framework/op_context.h" +#include "micro/framework/operator.h" +#include "micro/model/net_def.h" + +namespace micro { +namespace ops { + +MaceStatus DequantizeOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + output_ = GetOutputData(OUTPUT); + + return MACE_SUCCESS; +} + +MaceStatus DequantizeOp::Run() { + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); + + QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT); + + float scale = input_quantize_info.scale; + int32_t zero_point = input_quantize_info.zero; + + int32_t element_size = 1; + for (uint32_t i = 0; i < input_dim_size_; ++i) { + element_size *= input_dims_[i]; + } + for (int32_t i = 0; i < element_size; ++i) { + output_[i] = scale * (input_[i] - zero_point); + } + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/dequantize.h b/micro/ops/nhwc/cmsis_nn/dequantize.h new file mode 100644 index 0000000000000000000000000000000000000000..9459e2d11779d3157590e5db992ec71faf62fe0a --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/dequantize.h @@ -0,0 +1,43 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { + +class DequantizeOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const int8_t *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + mifloat *output_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_ + diff --git a/micro/ops/nhwc/cmsis_nn/quantize.cc b/micro/ops/nhwc/cmsis_nn/quantize.cc new file mode 100644 index 0000000000000000000000000000000000000000..406672efd55d71c28360462887cc4376a938071a --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/quantize.cc @@ -0,0 +1,64 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/quantize.h" + +#include +#include "micro/base/logging.h" +#include "micro/base/utils.h" + +namespace micro { +namespace ops { + +inline int8_t SaturateInt8(float value) { + int rounded_value = static_cast(value); + if (rounded_value <= -128) { + return -128; + } else if (rounded_value >= 127) { + return 127; + } else { + return static_cast(rounded_value); + } +} + +MaceStatus QuantizeOp::OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + output_ = GetOutputData(OUTPUT); + + return MACE_SUCCESS; +} + +MaceStatus QuantizeOp::Run() { + MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); + QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT); + float recip_scale = 1.0f / output_quantize_info.scale; + int32_t zero_point = output_quantize_info.zero; + + int32_t element_size = 1; + for (uint32_t i = 0; i < input_dim_size_; ++i) { + element_size *= input_dims_[i]; + } + + for (int32_t i = 0; i < element_size; ++i) { + output_[i] = SaturateInt8(roundf(recip_scale * input_[i] + zero_point)); + } + + return MACE_SUCCESS; +} + +} // namespace ops +} // namespace micro diff --git a/micro/ops/nhwc/cmsis_nn/quantize.h b/micro/ops/nhwc/cmsis_nn/quantize.h new file mode 100644 index 0000000000000000000000000000000000000000..c9f7dda2f2bb17fd2dca7d719d081a624dc8cf1f --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/quantize.h @@ -0,0 +1,42 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_ + +#include "micro/framework/operator.h" + +namespace micro { +namespace ops { + +class QuantizeOp : public framework::Operator { + public: + MaceStatus OnInit(); + MaceStatus Run(); + + private: + const mifloat *input_; + const int32_t *input_dims_; + uint32_t input_dim_size_; + + int8_t *output_; + + MACE_OP_INPUT_TAGS(INPUT); + MACE_OP_OUTPUT_TAGS(OUTPUT); +}; + +} // namespace ops +} // namespace micro + +#endif // MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_ diff --git a/micro/ops/nhwc/cmsis_nn/utilities.cc b/micro/ops/nhwc/cmsis_nn/utilities.cc new file mode 100644 index 0000000000000000000000000000000000000000..8845ff3c7e33d5a3a1abb81531b79114244eacf7 --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/utilities.cc @@ -0,0 +1,40 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "micro/ops/nhwc/cmsis_nn/utilities.h" + +#include + +void QuantizeMultiplier(double double_multiplier, + int32_t *quantized_multiplier, + int32_t *shift) { + if (double_multiplier == 0.) { + *quantized_multiplier = 0; + *shift = 0; + return; + } + const double q = frexp(double_multiplier, reinterpret_cast(shift)); + int64_t q_fixed = static_cast(round(q * (1ll << 31))); + + if (q_fixed == (1ll << 31)) { + q_fixed /= 2; + ++*shift; + } + + if (*shift < -31) { + *shift = 0; + q_fixed = 0; + } + *quantized_multiplier = static_cast(q_fixed); +} diff --git a/micro/ops/nhwc/cmsis_nn/utilities.h b/micro/ops/nhwc/cmsis_nn/utilities.h new file mode 100644 index 0000000000000000000000000000000000000000..4eb7beaf16374e4e35cf290200e94ec9ee14e04d --- /dev/null +++ b/micro/ops/nhwc/cmsis_nn/utilities.h @@ -0,0 +1,24 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_ +#define MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_ + +#include "micro/base/types.h" + +void QuantizeMultiplier(double double_multiplier, + int32_t *quantized_multiplier, + int32_t *shift); + +#endif // MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_ diff --git a/micro/ops/nhwc/pooling_ref.cc b/micro/ops/nhwc/pooling_ref.cc index 270a7c0f782e9faebbaa5347ca0221e50f266dca..c3f97694e3575f7b35907fc797d5b31a94866cf3 100644 --- a/micro/ops/nhwc/pooling_ref.cc +++ b/micro/ops/nhwc/pooling_ref.cc @@ -49,7 +49,7 @@ void PoolingRefOp::MaxPooling(const mifloat *input, } for (int32_t fh = 0; fh < filter_hw[0]; ++fh) { int32_t inh = inh_addr + dilation_hw[0] * fh; - if (inh < 0 && inh >= in_height) { + if (inh < 0 || inh >= in_height) { continue; } int32_t in_h_base = (in_b_base + inh) * in_width; diff --git a/micro/ops/reshape.cc b/micro/ops/reshape.cc deleted file mode 100644 index 26e80d794197d4ab6be47fa33499b4ca22536baf..0000000000000000000000000000000000000000 --- a/micro/ops/reshape.cc +++ /dev/null @@ -1,108 +0,0 @@ -// Copyright 2020 The MACE Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "micro/ops/reshape.h" - -#include "micro/base/logging.h" -#include "micro/base/utils.h" -#include "micro/framework/scratch_buffer.h" - -namespace micro { -namespace ops { - -namespace { - -MaceStatus ValidShapeData(const int32_t *input_dims, - const uint32_t input_dim_size, - int32_t *shape_data, - const uint32_t shape_data_size) { - MACE_ASSERT( - input_dims != NULL && shape_data != NULL); - int32_t unknown_idx = -1; - int32_t product = 1; - const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims); - - for (uint32_t i = 0; i < shape_data_size; ++i) { - if (shape_data[i] == -1) { - MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1"); - unknown_idx = i; - shape_data[i] = 1; - } else { - MACE_ASSERT2(shape_data[i] >= 0, "Shape must be non-negative: ", - shape_data[i]); - if (shape_data[i] == 0) { - MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range."); - shape_data[i] = input_dims[i]; - } - product *= shape_data[i]; - } - } - - if (unknown_idx != -1) { - MACE_ASSERT1(product != 0, - "Cannot infer shape if there is zero shape size."); - const int32_t missing = input_size / product; - MACE_ASSERT1(missing * product == input_size, - "Input size not match reshaped tensor size"); - shape_data[unknown_idx] = missing; - } - - return MACE_SUCCESS; -} - -} // namespace - -MaceStatus ReshapeOp::OnInit() { - input_ = GetInputData(INPUT); - input_dims_ = GetInputShapeDims(INPUT); - input_dim_size_ = GetInputShapeDimSize(INPUT); - - shape_ = GetInputData(SHAPE); - shape_dims_ = GetInputShapeDims(SHAPE); - shape_dim_size_ = GetInputShapeDimSize(SHAPE); - - output_ = GetOutputData(OUTPUT); - return MACE_SUCCESS; -} - -MaceStatus ReshapeOp::Run() { - const int32_t input_data_size = - base::GetShapeSize(input_dim_size_, input_dims_); - const int32_t shape_data_size = - base::GetShapeSize(shape_dim_size_, shape_dims_); - - int32_t *shape_data = - ScratchBuffer(engine_config_).GetBuffer(shape_data_size); - base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t)); - - MACE_RETURN_IF_ERROR(ValidShapeData(input_dims_, input_dim_size_, - shape_data, shape_data_size)); - -#ifndef MACE_MICRO_NDEBUG - const int32_t output_data_size = base::accumulate_multi( - shape_data, 0, static_cast(shape_data_size)); - if (input_data_size != output_data_size) { - LOG(FATAL) << "input_data_size(" << input_data_size - << ") != output_data_size(" << output_data_size - << "), please check the model."; - } -#endif - - // TODO(luxuhui): optimize this method by reusing buffer - base::memcpy(output_, input_, input_data_size * sizeof(mifloat)); - return ResizeOutputShape(OUTPUT, shape_data_size, shape_data); -} - -} // namespace ops -} // namespace micro diff --git a/micro/ops/reshape.h b/micro/ops/reshape.h index 0e907b2f6dad0563ff035a727ee44cec57503766..dfaf73cdad883fadc7c6526b199dc89261d885d2 100644 --- a/micro/ops/reshape.h +++ b/micro/ops/reshape.h @@ -15,17 +15,104 @@ #ifndef MICRO_OPS_RESHAPE_H_ #define MICRO_OPS_RESHAPE_H_ +#include "micro/base/utils.h" #include "micro/framework/operator.h" +#include "micro/framework/scratch_buffer.h" namespace micro { namespace ops { + +namespace internal { + +inline MaceStatus ValidShapeData(const int32_t *input_dims, + const uint32_t input_dim_size, + int32_t *shape_data, + const uint32_t shape_data_size) { + MACE_ASSERT(input_dims != NULL && shape_data != NULL); + int32_t unknown_idx = -1; + int32_t product = 1; + const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims); + + for (uint32_t i = 0; i < shape_data_size; ++i) { + if (shape_data[i] == -1) { + MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1"); + unknown_idx = i; + shape_data[i] = 1; + } else { + MACE_ASSERT2(shape_data[i] >= 0, + "Shape must be non-negative: ", shape_data[i]); + if (shape_data[i] == 0) { + MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range."); + shape_data[i] = input_dims[i]; + } + product *= shape_data[i]; + } + } + + if (unknown_idx != -1) { + MACE_ASSERT1(product != 0, + "Cannot infer shape if there is zero shape size."); + const int32_t missing = input_size / product; + MACE_ASSERT1(missing * product == input_size, + "Input size not match reshaped tensor size"); + shape_data[unknown_idx] = missing; + } + + return MACE_SUCCESS; +} + +} // namespace internal + + +template class ReshapeOp : public framework::Operator { public: - MaceStatus OnInit(); - MaceStatus Run(); + typedef T value_type; + + MaceStatus OnInit() { + input_ = GetInputData(INPUT); + input_dims_ = GetInputShapeDims(INPUT); + input_dim_size_ = GetInputShapeDimSize(INPUT); + + shape_ = GetInputData(SHAPE); + shape_dims_ = GetInputShapeDims(SHAPE); + shape_dim_size_ = GetInputShapeDimSize(SHAPE); + + output_ = GetOutputData(OUTPUT); + return MACE_SUCCESS; + } + + MaceStatus Run() { + const int32_t input_data_size = + base::GetShapeSize(input_dim_size_, input_dims_); + const int32_t shape_data_size = + base::GetShapeSize(shape_dim_size_, shape_dims_); + + int32_t *shape_data = + ScratchBuffer(engine_config_).GetBuffer(shape_data_size); + base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t)); + + MACE_RETURN_IF_ERROR(internal::ValidShapeData(input_dims_, input_dim_size_, + shape_data, shape_data_size)); + +#ifndef MACE_MICRO_NDEBUG + const int32_t output_data_size = base::accumulate_multi( + shape_data, 0, static_cast(shape_data_size)); + if (input_data_size != output_data_size) { + LOG(FATAL) << "input_data_size(" << input_data_size + << ") != output_data_size(" << output_data_size + << "), please check the model."; + } +#endif + + // TODO(luxuhui): optimize this method by reusing buffer + base::memcpy(output_, input_, + input_data_size * sizeof(ReshapeOp::value_type)); + return ResizeOutputShape(OUTPUT, shape_data_size, shape_data); + } private: - const mifloat *input_; + const value_type *input_; const int32_t *input_dims_; uint32_t input_dim_size_; @@ -33,7 +120,7 @@ class ReshapeOp : public framework::Operator { const int32_t *shape_dims_; uint32_t shape_dim_size_; - mifloat *output_; + value_type *output_; MACE_OP_INPUT_TAGS(INPUT, SHAPE); MACE_OP_OUTPUT_TAGS(OUTPUT); diff --git a/micro/ops/softmax.cc b/micro/ops/softmax.cc index 26a91f9019c15cef32bcdf28f3bcc78fcf90e825..925ed8f9ee7c5d9d9c5bc2aa88e3dce6819eb2c9 100644 --- a/micro/ops/softmax.cc +++ b/micro/ops/softmax.cc @@ -36,7 +36,8 @@ MaceStatus SoftmaxOp::OnInit() { MaceStatus SoftmaxOp::Run() { MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_)); - if (NHWC == data_format_) { // NHWC + // TODO(ZhangZhimin): Walkarounds for AUTO data format + if (NHWC == data_format_ || AUTO == data_format_) { // NHWC return RunForNHWC(); } else { MACE_NOT_IMPLEMENTED; diff --git a/micro/pretrained_models/keras/README.md b/micro/pretrained_models/keras/README.md new file mode 100644 index 0000000000000000000000000000000000000000..145ff4fe5eec32b9283199348acde9c0d0596e25 --- /dev/null +++ b/micro/pretrained_models/keras/README.md @@ -0,0 +1,11 @@ +# Tensorflow Keras Models + +MACE Micro supports Keras models of Tensorflow 2.x + +## HAR + +The model is from . + +## MNIST + +The mnist_keras.py depends on tensorflow 2.x and tensorflow_model_optimization. You can run this script to generate "mnist.h5" and "mnist-int8.h5" models diff --git a/micro/pretrained_models/keras/har/har-int8.yml b/micro/pretrained_models/keras/har/har-int8.yml new file mode 100644 index 0000000000000000000000000000000000000000..4fe0806c0faf3e35ad65b06cc5c548f4b7bf64da --- /dev/null +++ b/micro/pretrained_models/keras/har/har-int8.yml @@ -0,0 +1,28 @@ +library_name: har +target_abis: [host] +model_graph_format: file +model_data_format: file +models: + har_int8: + platform: keras + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5 + model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd + subgraphs: + - input_tensors: + - conv2d_1_input:0 + input_shapes: + - 1,90,3,1 + input_ranges: + - -5,15 + output_tensors: + - dense_3/Softmax:0 + output_shapes: + - 1,6 + runtime: cpu + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 + quantize: 1 + quantize_schema: int8 + quantize_range_file: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.range diff --git a/micro/pretrained_models/keras/har/har.yml b/micro/pretrained_models/keras/har/har.yml new file mode 100644 index 0000000000000000000000000000000000000000..c817b1a8231a457771c03406cffb44a45af705a9 --- /dev/null +++ b/micro/pretrained_models/keras/har/har.yml @@ -0,0 +1,24 @@ +library_name: har +target_abis: [host] +model_graph_format: file +model_data_format: file +models: + har: + platform: keras + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5 + model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd + subgraphs: + - input_tensors: + - conv2d_1_input:0 + input_shapes: + - 1,90,3,1 + output_tensors: + - dense_3/Softmax:0 + output_shapes: + - 1,6 + runtime: cpu + data_type: fp32_fp32 + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 diff --git a/micro/pretrained_models/keras/mnist/mnist-int8.yml b/micro/pretrained_models/keras/mnist/mnist-int8.yml new file mode 100644 index 0000000000000000000000000000000000000000..5693182c8d5fc876c450d113d67dc8a4449d170a --- /dev/null +++ b/micro/pretrained_models/keras/mnist/mnist-int8.yml @@ -0,0 +1,27 @@ +library_name: mnist +target_abis: [host] +model_graph_format: file +model_data_format: file +models: + mnist_int8: + platform: keras + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/mnist/mnist_int8.h5 + model_sha256_checksum: f56ae3b94c114719683c3bc55351f871d371e874d3a4d3224cc5299717e8b7fc + subgraphs: + - input_tensors: + - conv2d_input:0 + input_shapes: + - 1,28,28,1 + input_ranges: + - 0,1 + output_tensors: + - quant_dense_1/Softmax:0 + output_shapes: + - 1,10 + runtime: cpu + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 + quantize: 1 + quantize_schema: int8 diff --git a/micro/pretrained_models/keras/mnist/mnist.yml b/micro/pretrained_models/keras/mnist/mnist.yml new file mode 100644 index 0000000000000000000000000000000000000000..8331a240617886953dfbaf185eb9e82b3ccd614a --- /dev/null +++ b/micro/pretrained_models/keras/mnist/mnist.yml @@ -0,0 +1,25 @@ +library_name: mnist +target_abis: [host] +model_graph_format: file +model_data_format: file +models: + mnist: + platform: keras + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/mnist/mnist.h5 + model_sha256_checksum: 85f2ffe02e1b9dd2d6ad3826b91ac134fed15b838bb92a1010f67c19d55b1f65 + subgraphs: + - input_tensors: + - conv2d_input:0 + input_shapes: + - 1,28,28,1 + output_tensors: + - dense_1/Softmax:0 + output_shapes: + - 1,10 + runtime: cpu + data_type: fp32_fp32 + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 + quantize: 0 diff --git a/micro/pretrained_models/keras/mnist/mnist_keras.py b/micro/pretrained_models/keras/mnist/mnist_keras.py new file mode 100644 index 0000000000000000000000000000000000000000..531bb2ecf1670d15fda09fc8691223d340cb0232 --- /dev/null +++ b/micro/pretrained_models/keras/mnist/mnist_keras.py @@ -0,0 +1,83 @@ +# Refer to https://www.tensorflow.org/model_optimization/guide + +import tensorflow.compat.v2 as tf +import tensorflow_datasets as tfds +import tensorflow_model_optimization as tfmot + + +def normalize_img(image, label): + """Normalizes images: `uint8` -> `float32`.""" + return tf.cast(image, tf.float32) / 255.0, label + + +tfds.disable_progress_bar() +tf.enable_v2_behavior() + +(ds_train, ds_test), ds_info = tfds.load( + "mnist", + split=["train", "test"], + shuffle_files=True, + as_supervised=True, + with_info=True, +) + +ds_train = ds_train.map( + normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE +) +ds_train = ds_train.cache() +ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples) +ds_train = ds_train.batch(128) +ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE) + +ds_test = ds_test.map( + normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE +) +ds_test = ds_test.batch(128) +ds_test = ds_test.cache() +ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE) + +model = tf.keras.models.Sequential( + [ + tf.keras.layers.Conv2D( + filters=32, kernel_size=3, activation="relu", padding="same" + ), + tf.keras.layers.DepthwiseConv2D( + kernel_size=3, activation="relu", padding="same" + ), + tf.keras.layers.MaxPool2D(pool_size=2), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(128, activation="relu"), + tf.keras.layers.Dense(10, activation="softmax"), + ] +) +model.compile( + loss="sparse_categorical_crossentropy", + optimizer=tf.keras.optimizers.Adam(0.001), + metrics=["accuracy"], +) + +model.fit( + ds_train, + epochs=6, + validation_data=ds_test, +) + +model.save("mnist.h5") + +quantize_model = tfmot.quantization.keras.quantize_model + +quantization_aware_model = quantize_model(model) + +quantization_aware_model.compile( + optimizer="adam", + loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), + metrics=["accuracy"], +) + +quantization_aware_model.fit( + ds_train, + epochs=6, + validation_data=ds_test, +) + +quantization_aware_model.save("mnist-int8.h5") diff --git a/micro/pretrained_models/tensorflow/README.md b/micro/pretrained_models/tensorflow/README.md new file mode 100644 index 0000000000000000000000000000000000000000..e476f09e433af538e9923b9635572fcf36db8fee --- /dev/null +++ b/micro/pretrained_models/tensorflow/README.md @@ -0,0 +1,5 @@ +# Tensorflow frozen models + +## KWS + +The model is from . diff --git a/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8-bf16.yml b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8-bf16.yml new file mode 100644 index 0000000000000000000000000000000000000000..9475c63d47af017dccf36a2e28f069a913f88474 --- /dev/null +++ b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8-bf16.yml @@ -0,0 +1,25 @@ +library_name: kws-tc_resnet8 +target_abis: [host] +model_graph_format: file +model_data_format: file +models: + kws_tc_resnet8_bf16: + platform: tensorflow + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/tensorflow/kws/kws-tc_resnet8.pb + model_sha256_checksum: c552cf79cb64d3c755ae7d867c1c78b13f55f7589d46def1f70ce657c0db0d79 + subgraphs: + - input_tensors: + - input + input_shapes: + - 1,98,40,1 + output_tensors: + - output/softmax + output_shapes: + - 1,12 + runtime: cpu + data_type: bf16_fp32 + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 + quantize: 0 diff --git a/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml new file mode 100644 index 0000000000000000000000000000000000000000..bdf38ffb5a4764425742ed6973d495d87e53efb6 --- /dev/null +++ b/micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml @@ -0,0 +1,25 @@ +library_name: kws-tc_resnet8 +target_abis: [host] +model_graph_format: file +model_data_format: file +models: + kws_tc_resnet8: + platform: tensorflow + model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/tensorflow/kws/kws-tc_resnet8.pb + model_sha256_checksum: c552cf79cb64d3c755ae7d867c1c78b13f55f7589d46def1f70ce657c0db0d79 + subgraphs: + - input_tensors: + - input + input_shapes: + - 1,98,40,1 + output_tensors: + - output/softmax + output_shapes: + - 1,12 + runtime: cpu + data_type: fp32_fp32 + limit_opencl_kernel_time: 0 + nnlib_graph_mode: 0 + obfuscate: 0 + winograd: 0 + quantize: 0 diff --git a/micro/test/CMakeLists.txt b/micro/test/CMakeLists.txt index 27d7f069bd132ef7c7ffe0bd53d704d1cbe67e55..3552a56650dd86636da9a137e37476cc1b70892d 100644 --- a/micro/test/CMakeLists.txt +++ b/micro/test/CMakeLists.txt @@ -1,7 +1,6 @@ add_subdirectory(ccutils) if(NOT HEXAGON) - include(${PROJECT_SOURCE_DIR}/third_party/googletest/googletest.cmake) add_subdirectory(ccunit) endif() diff --git a/micro/test/ccunit/CMakeLists.txt b/micro/test/ccunit/CMakeLists.txt index f760593893b629e69aea968e1a292da32aab74cb..46a5eac21dfbf7b19cfca2dd7ad96abc5ca1d138 100644 --- a/micro/test/ccunit/CMakeLists.txt +++ b/micro/test/ccunit/CMakeLists.txt @@ -1,5 +1,3 @@ - - add_executable(micro_ops_test micro/ops/stack_test.cc micro/ops/reshape_test.cc @@ -20,25 +18,23 @@ add_executable(micro_ops_test micro/ops/softmax_test.cc micro/ops/bias_add_test.cc micro/ops/expand_dims_test.cc + micro/ops/concat_test.cc ) + +if(MACE_MICRO_ENABLE_CMSIS) + target_link_libraries(micro_ops_test + PRIVATE micro_ops_nhwc_cmsis_nn + ) + target_compile_options(micro_ops_test + PRIVATE "-DMACE_MICRO_ENABLE_CMSIS=ON" + ) +endif() + target_link_libraries(micro_ops_test PRIVATE micro_base - PRIVATE micro_ops_for_test + PRIVATE micro_ops + PRIVATE micro_framework_for_optest PRIVATE micro_ccutils PRIVATE gtest PRIVATE gtest_main ) - -if(MICRO_MODEL_NAME) - add_executable(micro_cc_test - micro/model/net_def_test.cc - micro/framework/graph_test.cc - micro/codegen/engine_test.cc - ) - target_link_libraries(micro_cc_test - micro_engine - gtest - gtest_main - ) - target_compile_definitions(micro_cc_test PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}") -endif() diff --git a/micro/test/ccunit/micro/codegen/engine_test.cc b/micro/test/ccunit/micro/codegen/engine_test.cc index 60f2841f7426773f79027cd9d0f9307d80e57168..f21c3eb6812369a0888604ab02709aa85b24e15e 100644 --- a/micro/test/ccunit/micro/codegen/engine_test.cc +++ b/micro/test/ccunit/micro/codegen/engine_test.cc @@ -33,8 +33,9 @@ class EngineTest : public ::testing::Test { void OutputAllInfo() { MaceMicroEngine *micro_engine = NULL; - MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(µ_engine) - == MACE_SUCCESS && micro_engine != NULL); + MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(µ_engine) == + MACE_SUCCESS && + micro_engine != NULL); float input_buffer[1 * 1 * 128 * 9] = {0}; int32_t input_shape[] = {1, 1, 128, 9}; diff --git a/micro/test/ccunit/micro/ops/eltwise_test.cc b/micro/test/ccunit/micro/ops/eltwise_test.cc index 4d0fe7914f3edb2a796bbeabbe549e8be2f5e5a0..49cf75236447c19cf55d4ff8895d06efca3ef29d 100644 --- a/micro/test/ccunit/micro/ops/eltwise_test.cc +++ b/micro/test/ccunit/micro/ops/eltwise_test.cc @@ -14,8 +14,10 @@ #include "gtest/gtest.h" #include "micro/ops/eltwise.h" +#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/substitute_op.h" +#include "micro/ops/test_quantize_utils.h" #include "micro/ops/test_utils.h" namespace micro { @@ -494,6 +496,91 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) { dims1121, output_9, expect_9, dims1123); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestEltwiseQuantInt8(const int32_t *input_dims, + const uint32_t input_dim_size, + eltwise::Type type) { + int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims); + float *input0 = new float[shape_size]; + float *input1 = new float[shape_size]; + FillNormalRandomInput(input0, shape_size); + FillNormalRandomInput(input1, shape_size); + float *expect_output = new float[shape_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + EltwiseOp eltwsie_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input_dims, input_dim_size) + .AddInput(input1, input_dims, input_dim_size) + .AddArg("type", static_cast(type)) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + eltwsie_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + eltwsie_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input0_int8 = new int8_t[shape_size]; + int8_t *input1_int8 = new int8_t[shape_size]; + int8_t *output_int8 = new int8_t[shape_size]; + float *output = new float[shape_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, shape_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8(input1, shape_size, input1_int8, &input_quant_info1.scale, + &input_quant_info1.zero); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, shape_size, &output_quant_info.scale, + &output_quant_info.zero); + + ArmEltwiseInt8Op eltwsie_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8 + .AddInput(input0_int8, input_dims, input_dim_size, input_quant_info0) + .AddInput(input1_int8, input_dims, input_dim_size, input_quant_info1) + .AddArg("type", static_cast(type)) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + eltwsie_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + eltwsie_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + Dequantize(output_int8, shape_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(EltwiseOpTest, QuantInt8) { + const int32_t input_dims0[4] = {1, 32, 32, 16}; + TestEltwiseQuantInt8(input_dims0, 4, eltwise::SUM); + const int32_t input_dims1[4] = {2, 31, 31, 17}; + TestEltwiseQuantInt8(input_dims1, 4, eltwise::SUM); + const int32_t input_dims2[2] = {1, 31}; + TestEltwiseQuantInt8(input_dims2, 2, eltwise::SUM); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/matmul_test.cc b/micro/test/ccunit/micro/ops/matmul_test.cc index 4661352a97fa0c96b9403cc9d56bdc34e17a6282..86a0a0592d7803c5f20bead5fe279332aca55c56 100644 --- a/micro/test/ccunit/micro/ops/matmul_test.cc +++ b/micro/test/ccunit/micro/ops/matmul_test.cc @@ -15,8 +15,10 @@ #include "gtest/gtest.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/matmul.h" +#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h" #include "micro/ops/substitute_op.h" #include "micro/ops/test_utils.h" +#include "micro/ops/test_quantize_utils.h" namespace micro { namespace ops { @@ -94,6 +96,94 @@ TEST_F(MatMulOpTest, SimpleCPU) { Simple2(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestMatMulQuantInt8(int32_t lhs_rows, int32_t lhs_cols, int32_t rhs_cols) { + uint32_t input0_size = lhs_rows * lhs_cols; + uint32_t input1_size = lhs_cols * rhs_cols; + uint32_t output_size = lhs_rows * rhs_cols; + float *input0 = new float[input0_size]; + float *input1 = new float[input1_size]; + FillNormalRandomInput(input0, input0_size); + FillNormalRandomInput(input1, input1_size); + float *expect_output = new float[output_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + const int32_t input0_dims[2] = {lhs_rows, lhs_cols}; + // mat0 * tranpose(mat1) + const int32_t input1_dims[2] = {rhs_cols, lhs_cols}; + + MatMulOp matmul_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, 2) + .AddInput(input1, input1_dims, 2) + .AddArg("transpose_a", false) + .AddArg("transpose_b", true) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + matmul_op.Init(NULL, reinterpret_cast(&substitude_op), + NULL); + matmul_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input0_int8 = new int8_t[input0_size]; + int8_t *input1_int8 = new int8_t[input1_size]; + int8_t *output_int8 = new int8_t[output_size]; + float *output = new float[output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8, + &input_quant_info1.scale); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, output_size, &output_quant_info.scale, + &output_quant_info.zero); + + ArmMatMulInt8Op matmul_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8.AddInput(input0_int8, input0_dims, 2, input_quant_info0) + .AddInput(input1_int8, input1_dims, 2, input_quant_info1) + .AddArg("transpose_a", false) + .AddArg("transpose_b", true) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + matmul_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + matmul_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(MatMulOpTest, QuantInt8) { + TestMatMulQuantInt8(1, 8, 4); + TestMatMulQuantInt8(1, 1001, 63); + // WARNING(ZhangZhimin): Batch inputs is unsupported + // TestMatMulQuantInt8(3, 100, 100); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc index 067420dc0b81cb9649175597600a231bc3a39066..e26b8cae28bf30248f3462e71c78e052ba5c875f 100644 --- a/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc +++ b/micro/test/ccunit/micro/ops/nhwc/conv_2d_test.cc @@ -15,8 +15,10 @@ #include "gtest/gtest.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/nhwc/conv_2d_ref.h" +#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h" #include "micro/ops/substitute_op.h" #include "micro/ops/test_utils.h" +#include "micro/ops/test_quantize_utils.h" namespace micro { namespace ops { @@ -315,6 +317,141 @@ TEST_F(Conv2dOpTest, CPUConv1x1) { TestConv1x1(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestConv2dQuantInt8(const int32_t batch, + const int32_t out_channels, + const int32_t in_channels, + const int32_t in_height, + const int32_t in_width, + const int32_t kernel_height, + const int32_t kernel_width, + enum Padding padding_type, + const int32_t stride_height, + const int32_t stride_width, + const int32_t dilation_height, + const int32_t dilation_width) { + uint32_t input0_size = batch * in_height * in_width * in_channels; + uint32_t input1_size = + out_channels * kernel_height * kernel_width * in_channels; + uint32_t max_output_size = batch * out_channels * + (in_height + kernel_height * dilation_height) * + (in_width + kernel_width * dilation_width); + int32_t bias_size = out_channels; + float *input0 = new float[input0_size]; + float *input1 = new float[input1_size]; + float *bias = new float[bias_size]; + FillNormalRandomInput(input0, input0_size); + FillNormalRandomInput(input1, input1_size); + FillNormalRandomInput(bias, bias_size); + float *expect_output = new float[max_output_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels}; + const int32_t input1_dims[4] = {out_channels, kernel_height, kernel_width, + in_channels}; + const int32_t bias_dims[1] = {bias_size}; + + const int32_t strides[2] = {stride_height, stride_width}; + const int32_t dilations[2] = {dilation_height, dilation_width}; + + Conv2dRefOp conv2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, 4) + .AddInput(input1, input1_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + conv2d_op.Init(NULL, reinterpret_cast(&substitude_op), + NULL); + conv2d_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + uint32_t exepct_output_size = + base::GetShapeSize(expect_output_dim_size, expect_output_dims); + + int8_t *input0_int8 = new int8_t[input0_size]; + int8_t *input1_int8 = new int8_t[input1_size]; + int32_t *bias_int32 = new int32_t[bias_size]; + int8_t *output_int8 = new int8_t[max_output_size]; + float *output = new float[max_output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8, + &input_quant_info1.scale); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale, + &output_quant_info.zero); + float bias_scale = input_quant_info0.scale * input_quant_info1.scale; + QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32); + + ArmConv2dInt8Op conv2d_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0) + .AddInput(input1_int8, input1_dims, 4, input_quant_info1) + .AddInput(bias_int32, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + conv2d_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + conv2d_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims); + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] bias; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] bias_int32; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(Conv2dOpTest, QuantInt8) { + TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, VALID, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, FULL, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 32, 54, 3, 3, FULL, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 64, 14, 13, 5, 5, SAME, 2, 2, 1, 1); + TestConv2dQuantInt8(1, 128, 257, 28, 28, 3, 3, SAME, 1, 1, 1, 1); + TestConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1); + TestConv2dQuantInt8(1, 2, 1, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1); + TestConv2dQuantInt8(1, 128, 1, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1); + + // dilations is unsupported + // TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 2); + // TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 1); + + // batch must be 1 + // TestConv2dQuantInt8(2, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1); + // TestConv2dQuantInt8(4, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc index 3583f4c4f128a7aee1f5db6f91aacb6f5b4a361c..7f62ffef6b3bc879da741d89a5b780551464540c 100644 --- a/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc +++ b/micro/test/ccunit/micro/ops/nhwc/depthwise_conv_2d_test.cc @@ -15,8 +15,10 @@ #include "gtest/gtest.h" #include "micro/ops/gtest_utils.h" #include "micro/ops/nhwc/depthwise_conv_2d_ref.h" +#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h" #include "micro/ops/substitute_op.h" #include "micro/ops/test_utils.h" +#include "micro/ops/test_quantize_utils.h" namespace micro { namespace ops { @@ -107,6 +109,146 @@ TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) { MultiC2ValidTest(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestDepthwiseConv2dQuantInt8(const int32_t batch, + const int32_t multiplier, + const int32_t in_channels, + const int32_t in_height, + const int32_t in_width, + const int32_t kernel_height, + const int32_t kernel_width, + enum Padding padding_type, + const int32_t stride_height, + const int32_t stride_width, + const int32_t dilation_height, + const int32_t dilation_width) { + uint32_t input0_size = batch * in_height * in_width * in_channels; + uint32_t input1_size = + multiplier * kernel_height * kernel_width * in_channels; + uint32_t max_output_size = batch * multiplier * in_channels * + (in_height + kernel_height * dilation_height) * + (in_width + kernel_width * dilation_width); + int32_t bias_size = multiplier * in_channels; + float *input0 = new float[input0_size]; + float *input1 = new float[input1_size]; + float *bias = new float[bias_size]; + FillNormalRandomInput(input0, input0_size); + FillNormalRandomInput(input1, input1_size); + FillNormalRandomInput(bias, bias_size); + float *expect_output = new float[max_output_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels}; + const int32_t input1_dims[4] = {multiplier, kernel_height, kernel_width, + in_channels}; + const int32_t bias_dims[1] = {bias_size}; + + const int32_t strides[2] = {stride_height, stride_width}; + const int32_t dilations[2] = {dilation_height, dilation_width}; + + DepthwiseConv2dRefOp depthwise_conv2d_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input0, input0_dims, 4) + .AddInput(input1, input1_dims, 4) + .AddInput(bias, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + depthwise_conv2d_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + depthwise_conv2d_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + uint32_t exepct_output_size = + base::GetShapeSize(expect_output_dim_size, expect_output_dims); + + int8_t *input0_int8 = new int8_t[input0_size]; + int8_t *input1_int8 = new int8_t[input1_size]; + int32_t *bias_int32 = new int32_t[bias_size]; + int8_t *output_int8 = new int8_t[max_output_size]; + float *output = new float[max_output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info0; + QuantizeInfo input_quant_info1; + AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale, + &input_quant_info0.zero); + AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8, + &input_quant_info1.scale); + QuantizeInfo output_quant_info = {0.0f, 0}; + AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale, + &output_quant_info.zero); + float bias_scale = input_quant_info0.scale * input_quant_info1.scale; + QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32); + + ArmDepthwiseConv2dInt8Op depthwise_conv2d_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0) + .AddInput(input1_int8, input1_dims, 4, input_quant_info1) + .AddInput(bias_int32, bias_dims, 1) + .AddArg("padding", padding_type) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + depthwise_conv2d_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + depthwise_conv2d_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims); + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input0; + delete[] input1; + delete[] bias; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input0_int8; + delete[] input1_int8; + delete[] bias_int32; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(DepthwiseConv2dOpTest, QuantInt8) { + TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, VALID, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, SAME, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, FULL, 1, 1, 1, 1); + + TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 5, 5, SAME, 2, 2, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 256, 28, 28, 3, 3, SAME, 1, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1); + + TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1); + TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1); + + // dilations is unsupported + // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 3, 3, VALID, 1, 1, 2, 2); + // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 1, 3, 5); + // TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 3, 3, 1); + + // batch must be 1 + // TestDepthwiseConv2dQuantInt8(3, 1, 128, 56, 56, 3, 3, SAME, 2, 2); + + // multiplier must be 1 + // TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 1, 1); + // TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 2, 2); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc index d7f7db329a8d98eafe7d65f383f0c72c2f6a3044..74e3f15e8e3fe461b86c6b0228ff1156e3402a80 100644 --- a/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc +++ b/micro/test/ccunit/micro/ops/nhwc/pooling_test.cc @@ -16,7 +16,9 @@ #include "micro/ops/gtest_utils.h" #include "micro/ops/nhwc/pooling_ref.h" #include "micro/ops/nhwc/pooling_s4.h" +#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h" #include "micro/ops/substitute_op.h" +#include "micro/ops/test_quantize_utils.h" #include "micro/ops/test_utils.h" namespace micro { @@ -203,6 +205,134 @@ TEST_F(PoolingOpTest, TestPoolingOpSameAvg) { TestPoolingOpSameAvg(); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestPoolingQuantInt8(const int32_t *input_dims, + const uint32_t input_dim_size, + const int32_t *kernels, + const int32_t *strides, + Padding padding, + PoolingType pooling_type) { + int32_t input_size = base::GetShapeSize(input_dim_size, input_dims); + int32_t max_output_size = input_dims[0] * input_dims[3] * + (input_dims[1] + kernels[0]) * + (input_dims[2] + kernels[1]); + + float *input = new float[input_size]; + FillNormalRandomInput(input, input_size); + float *expect_output = new float[max_output_size]; + const uint32_t MAX_OUTPUT_DIM_SIZE = 100; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE]; + + const int32_t dilations[2] = {1, 1}; + + PoolingRefOp pooling_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("kernels", kernels, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddArg("padding", padding) + .AddArg("pooling_type", pooling_type) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_DIM_SIZE); + pooling_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + pooling_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input_int8 = new int8_t[input_size]; + int8_t *output_int8 = new int8_t[max_output_size]; + float *output = new float[max_output_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE]; + QuantizeInfo input_quant_info; + AutoQuantizeInt8(input, input_size, input_int8, &input_quant_info.scale, + &input_quant_info.zero); + QuantizeInfo output_quant_info = input_quant_info; + + ArmPoolingInt8Op pooling_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8 + .AddInput(input_int8, input_dims, input_dim_size, input_quant_info) + .AddRepeatArg("strides", strides, 2) + .AddRepeatArg("kernels", kernels, 2) + .AddRepeatArg("dilations", dilations, 2) + .AddArg("padding", padding) + .AddArg("pooling_type", pooling_type) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_DIM_SIZE, + output_quant_info); + pooling_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + pooling_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims); + Dequantize(output_int8, output_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace +TEST_F(PoolingOpTest, Quant) { + const int32_t input_dims0[4] = {1, 7, 7, 1024}; + const int32_t kernels0[2] = {7, 7}; + const int32_t strides0[2] = {1, 1}; + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID, + PoolingType::AVG); + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID, + PoolingType::MAX); + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::FULL, + PoolingType::AVG); + TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::SAME, + PoolingType::MAX); + const int32_t input_dims1[4] = {1, 3, 3, 2}; + const int32_t kernels1[2] = {3, 3}; + const int32_t strides1[2] = {1, 1}; + TestPoolingQuantInt8(input_dims1, 4, kernels1, strides1, Padding::SAME, + PoolingType::AVG); + const int32_t input_dims2[4] = {1, 3, 3, 2}; + const int32_t kernels2[2] = {2, 3}; + const int32_t strides2[2] = {1, 2}; + TestPoolingQuantInt8(input_dims2, 4, kernels2, strides2, Padding::SAME, + PoolingType::MAX); + // WARNING(ZhangZhimin): Batch inputs is unsupported + // const int32_t input_dims3[4] = {3,15,15,128}; + // const int32_t kernels3[2] = {4, 4}; + // const int32_t strides3[2] = {4, 4}; + // TestPoolingQuantInt8(input_dims3, 4, kernels3, strides3, Padding::SAME, + // PoolingType::AVG); + // const int32_t input_dims4[4] = {3,15,15,128}; + // const int32_t kernels4[2] = {4, 4}; + // const int32_t strides4[2] = {4, 4}; + // TestPoolingQuantInt8(input_dims4, 4, kernels4, strides4, Padding::SAME, + // PoolingType::MAX); + const int32_t input_dims5[4] = {1, 31, 31, 127}; + const int32_t kernels5[2] = {2, 2}; + const int32_t strides5[2] = {3, 3}; + TestPoolingQuantInt8(input_dims5, 4, kernels5, strides5, Padding::SAME, + PoolingType::AVG); + const int32_t input_dims6[4] = {1, 31, 31, 127}; + const int32_t kernels6[2] = {2, 2}; + const int32_t strides6[2] = {3, 3}; + TestPoolingQuantInt8(input_dims6, 4, kernels6, strides6, Padding::SAME, + PoolingType::MAX); +} + +#endif + + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccunit/micro/ops/reshape_test.cc b/micro/test/ccunit/micro/ops/reshape_test.cc index aa05281d2d383b4d0bd909d2f3788515ec329693..cbe77e6e6a596176d7003da943d4bebee94b0b4d 100644 --- a/micro/test/ccunit/micro/ops/reshape_test.cc +++ b/micro/test/ccunit/micro/ops/reshape_test.cc @@ -33,7 +33,7 @@ void TestReshapeOp( T *y, int32_t *y_dims, const uint32_t y_dim_size, const T *e, const int32_t *e_dims, const uint32_t e_dim_size) { - ReshapeOp reshape_op; + ReshapeOp reshape_op; framework::SubstituteOp substitude_op; substitude_op.AddInput(input, input_dims, input_dim_size) .AddInput(shape, shape_dims, 1) diff --git a/micro/test/ccunit/micro/ops/softmax_test.cc b/micro/test/ccunit/micro/ops/softmax_test.cc index 0590256fddded792a04adf72b8c2f63ac4deb198..32facb83535e323cbcebdb56a0808e0a7085acae 100644 --- a/micro/test/ccunit/micro/ops/softmax_test.cc +++ b/micro/test/ccunit/micro/ops/softmax_test.cc @@ -13,9 +13,11 @@ // limitations under the License. #include "gtest/gtest.h" -#include "micro/ops/gtest_utils.h" #include "micro/ops/softmax.h" +#include "micro/ops/gtest_utils.h" +#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h" #include "micro/ops/substitute_op.h" +#include "micro/ops/test_quantize_utils.h" #include "micro/ops/test_utils.h" namespace micro { @@ -49,15 +51,89 @@ void Simple(bool use_log = false) { &substitude_op), NULL); softmax_op.Run(); - ExpectTensorNear(output, output_dims, output_dim_size, - expect, expect_dims, output_dim_size, 1e-5); + ExpectTensorNear(output, output_dims, output_dim_size, expect, + expect_dims, output_dim_size, 1e-5); } } // namespace + TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); } TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); } +#ifdef MACE_MICRO_ENABLE_CMSIS + +namespace { + +void TestSoftmaxQuantInt8(const int32_t *input_dims, + const uint32_t input_dim_size, + bool use_log = false) { + int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims); + float *input = new float[shape_size]; + FillNormalRandomInput(input, shape_size); + float *expect_output = new float[shape_size]; + const uint32_t MAX_OUTPUT_NUM = 10; + int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM]; + + SoftmaxOp softmax_op; + framework::SubstituteOp substitude_op; + substitude_op.AddInput(input, input_dims, input_dim_size) + .AddArg("use_log", static_cast(use_log)) + .AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM); + softmax_op.Init( + NULL, reinterpret_cast(&substitude_op), NULL); + softmax_op.Run(); + uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0); + + int8_t *input_int8 = new int8_t[shape_size]; + int8_t *output_int8 = new int8_t[shape_size]; + float *output = new float[shape_size]; + int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM]; + QuantizeInfo input_quant_info; + AutoQuantizeInt8(input, shape_size, input_int8, &input_quant_info.scale, + &input_quant_info.zero); + QuantizeInfo output_quant_info = {1.0f / 255.0f, -128}; + + ArmSoftmaxInt8Op softmax_op_int8; + framework::SubstituteOp substitude_op_int8; + substitude_op_int8 + .AddInput(input_int8, input_dims, input_dim_size, input_quant_info) + .AddArg("use_log", static_cast(use_log)) + .AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info); + softmax_op_int8.Init( + NULL, reinterpret_cast(&substitude_op_int8), + NULL); + softmax_op_int8.Run(); + uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0); + + Dequantize(output_int8, shape_size, output_quant_info.scale, + output_quant_info.zero, output); + + ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size, + output, output_dims, output_dim_size, 0.1); + + delete[] input; + delete[] expect_output; + delete[] expect_output_dims; + delete[] input_int8; + delete[] output_int8; + delete[] output; + delete[] output_dims; +} + +} // namespace + +TEST_F(SoftmaxOpTest, QuantInt8) { + const int32_t input_dims0[2] = {5, 10}; + TestSoftmaxQuantInt8(input_dims0, 2); + const int32_t input_dims1[2] = {50, 100}; + TestSoftmaxQuantInt8(input_dims1, 2); + const int32_t input_dims2[2] = {1, 31}; + TestSoftmaxQuantInt8(input_dims2, 2); +} + +#endif + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccutils/CMakeLists.txt b/micro/test/ccutils/CMakeLists.txt index 8b60050d4a11c16fe6ef8c2e543150524a7c2408..aa9246cfc7688d1ca21753d829e0e1b1e73bc74f 100644 --- a/micro/test/ccutils/CMakeLists.txt +++ b/micro/test/ccutils/CMakeLists.txt @@ -7,6 +7,7 @@ add_library(micro_ccutils target_include_directories(micro_ccutils PUBLIC .) target_link_libraries(micro_ccutils micro_base micro_framework_for_optest) +target_compile_options(micro_ccutils PUBLIC "-std=c++11") if(HEXAGON_STUB) add_library(micro_rpc_stub diff --git a/micro/test/ccutils/micro/ops/operator.test.cc b/micro/test/ccutils/micro/ops/operator.test.cc index 578402b3973ae8b3fc95147dce4be67896af994d..267314940c44910fe9a88ccc70c8175491b8d774 100644 --- a/micro/test/ccutils/micro/ops/operator.test.cc +++ b/micro/test/ccutils/micro/ops/operator.test.cc @@ -105,6 +105,16 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size, return fake_op_->ResizeOutputShape(idx, dim_size, dims); } +QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) { + return fake_op_->GetInputQuantizeInfo(idx); +} + +QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) { + return fake_op_->GetOutputQuantizeInfo(idx); +} + + + #ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC #define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \ template <> \ diff --git a/micro/test/ccutils/micro/ops/substitute_op.cc b/micro/test/ccutils/micro/ops/substitute_op.cc index f65c01ec9f160934b73c01c23de9790d8851d42c..4c8735d1a85d43d4bb214f63b1feedb2488b428d 100644 --- a/micro/test/ccutils/micro/ops/substitute_op.cc +++ b/micro/test/ccutils/micro/ops/substitute_op.cc @@ -24,26 +24,32 @@ namespace framework { SubstituteOp::SubstituteOp() : input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {} -SubstituteOp &SubstituteOp::AddInput( - const void *input, const int32_t *dims, const uint32_t dims_size) { +SubstituteOp &SubstituteOp::AddInput(const void *input, + const int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info) { MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0, "Invalid param"); MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem."); inputs_[input_idx_] = input; input_dims_[input_idx_] = dims; input_dim_sizes_[input_idx_] = dims_size; + input_quant_info_[input_idx_] = quant_info; ++input_idx_; return *this; } -SubstituteOp &SubstituteOp::AddOutput( - void *output, int32_t *dims, const uint32_t dims_size) { +SubstituteOp &SubstituteOp::AddOutput(void *output, + int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info) { MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0, "Invalid param"); MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem."); outputs_[output_idx_] = output; output_dims_[output_idx_] = dims; output_dim_sizes_[output_idx_] = dims_size; + output_quant_info_[output_idx_] = quant_info; ++output_idx_; return *this; } @@ -86,6 +92,14 @@ const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) { return output_dims_[idx]; } +QuantizeInfo SubstituteOp::GetInputQuantizeInfo(uint32_t idx) { + return input_quant_info_[idx]; +} + +QuantizeInfo SubstituteOp::GetOutputQuantizeInfo(uint32_t idx) { + return output_quant_info_[idx]; +} + MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx, uint32_t input_dim_size, const int32_t *input_dims) { diff --git a/micro/test/ccutils/micro/ops/substitute_op.h b/micro/test/ccutils/micro/ops/substitute_op.h index 0f5e60d471fb7a6c07bdb31d33d5d03b71ccba56..4b822d7a6b03f0ca90782bbaecc38170b3d42445 100644 --- a/micro/test/ccutils/micro/ops/substitute_op.h +++ b/micro/test/ccutils/micro/ops/substitute_op.h @@ -16,6 +16,7 @@ #define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_ #include "micro/base/logging.h" +#include "micro/base/types.h" #include "micro/base/utils.h" #include "micro/include/public/micro.h" @@ -43,9 +44,13 @@ class SubstituteOp { ~SubstituteOp() {} SubstituteOp &AddInput(const void *input, - const int32_t *dims, const uint32_t dims_size); + const int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info = QuantizeInfo{0.0f, 0}); SubstituteOp &AddOutput(void *output, - int32_t *dims, const uint32_t dims_size); + int32_t *dims, + const uint32_t dims_size, + QuantizeInfo quant_info = QuantizeInfo{0.0f, 0}); template SubstituteOp &AddArg(const char *name, T value) { @@ -106,6 +111,9 @@ class SubstituteOp { const int32_t *input_dims); MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx); + QuantizeInfo GetInputQuantizeInfo(uint32_t idx); + QuantizeInfo GetOutputQuantizeInfo(uint32_t idx); + template const T *GetInputData(uint32_t idx) { return static_cast(DoGetInputData(idx)); @@ -120,11 +128,13 @@ class SubstituteOp { const void *inputs_[kMaxInputNum]; const int32_t *input_dims_[kMaxInputNum]; uint32_t input_dim_sizes_[kMaxInputNum]; + QuantizeInfo input_quant_info_[kMaxInputNum]; uint32_t input_idx_; void *outputs_[kMaxOutputNum]; int32_t *output_dims_[kMaxOutputNum]; uint32_t output_dim_sizes_[kMaxOutputNum]; + QuantizeInfo output_quant_info_[kMaxOutputNum]; uint32_t output_idx_; // for arg diff --git a/micro/test/ccutils/micro/ops/test_quantize_utils.h b/micro/test/ccutils/micro/ops/test_quantize_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..d15792cf45416826168b3764cc3a9720a1e55c9f --- /dev/null +++ b/micro/test/ccutils/micro/ops/test_quantize_utils.h @@ -0,0 +1,129 @@ +// Copyright 2020 The MACE Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_ +#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_ + +#include +#include + +#include + +#include "micro/base/logging.h" +#include "micro/common/global_buffer.h" +#include "micro/include/public/micro.h" +#include "micro/port/api.h" + +namespace micro { +namespace ops { +namespace test { + +template +inline Q Saturate(float value) { + int rounded_value = static_cast(value); + if (rounded_value <= std::numeric_limits::lowest()) { + return std::numeric_limits::lowest(); + } else if (rounded_value >= std::numeric_limits::max()) { + return std::numeric_limits::max(); + } else { + return static_cast(rounded_value); + } +} + +inline void FindMinMax(const float *input, + const uint32_t size, + float *min_val, + float *max_val) { + float max_v = base::lowest(); + float min_v = base::highest(); + for (uint32_t i = 0; i < size; ++i) { + max_v = base::max(max_v, input[i]); + min_v = base::min(min_v, input[i]); + } + *min_val = min_v; + *max_val = max_v; +} + +template +inline void QuantizeWithScaleAndZeropoint(const float *input, + const uint32_t size, + float scale, + int32_t zero_point, + Q *output) { + float recip_scale = 1 / scale; + for (uint32_t i = 0; i < size; ++i) { + output[i] = Saturate(roundf(zero_point + recip_scale * input[i])); + } +} + +inline void AdjustRangeInt8(const float *input, + const uint32_t size, + float *scale, + int32_t *zero_point) { + float in_min_data; + float in_max_data; + FindMinMax(input, size, &in_min_data, &in_max_data); + in_max_data = base::max(0.f, in_max_data); + in_min_data = base::min(0.f, in_min_data); + + *scale = (in_max_data - in_min_data) / 255; + *zero_point = int8_t(-in_min_data / *scale - 128); +} + +inline void AdjustRangeInt8Symmetric(const float *input, + const uint32_t size, + float *scale) { + float in_min_data; + float in_max_data; + FindMinMax(input, size, &in_min_data, &in_max_data); + in_max_data = base::max(0.f, in_max_data); + in_min_data = base::min(0.f, in_min_data); + + float max_abs = base::max(base::abs(in_max_data), base::abs(in_min_data)); + + *scale = max_abs / 127.0f; +} + +inline void AutoQuantizeInt8(const float *input, + const uint32_t size, + int8_t *output, + float *scale, + int32_t *zero_point) { + AdjustRangeInt8(input, size, scale, zero_point); + QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output); +} + +inline void AutoQuantizeInt8Symmetric(const float *input, + const uint32_t size, + int8_t *output, + float *scale) { + AdjustRangeInt8Symmetric(input, size, scale); + QuantizeWithScaleAndZeropoint(input, size, *scale, 0, output); +} + +inline void Dequantize(const int8_t *input, + const uint32_t size, + const float scale, + const int32_t zero_point, + float *output) { + for (uint32_t i = 0; i < size; ++i) { + output[i] = static_cast(scale * (input[i] - zero_point)); + } +} + +} // namespace test +} // namespace ops +} // namespace micro + +#endif // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_ diff --git a/micro/test/ccutils/micro/ops/test_utils.cc b/micro/test/ccutils/micro/ops/test_utils.cc index 7cbe5163e5383e1bcb0da3be9784991c66846d3d..bb6cd0f3edd287f2540ca3197427a727a9007f4c 100644 --- a/micro/test/ccutils/micro/ops/test_utils.cc +++ b/micro/test/ccutils/micro/ops/test_utils.cc @@ -15,6 +15,8 @@ #include "micro/ops/test_utils.h" +#include + namespace micro { namespace ops { namespace test { @@ -67,6 +69,30 @@ void FillRandomInput(void *input, const int32_t shape_size) { } } +void FillUniformRandomInput(float *input, + const int32_t shape_size, + float low, + float up) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_real_distribution dis(low, up); + for (int n = 0; n < shape_size; ++n) { + input[n] = dis(gen); + } +} + +void FillNormalRandomInput(float *input, + const int32_t shape_size, + float mean, + float std) { + std::random_device rd; + std::mt19937 gen(rd()); + std::normal_distribution dis(mean, std); + for (int n = 0; n < shape_size; ++n) { + input[n] = dis(gen); + } +} + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/test/ccutils/micro/ops/test_utils.h b/micro/test/ccutils/micro/ops/test_utils.h index fc64e0b7c33dbe10d1b52afd9c6eb7c737d9326b..91c29025997bfe6107154d18e52a02736a256fe0 100644 --- a/micro/test/ccutils/micro/ops/test_utils.h +++ b/micro/test/ccutils/micro/ops/test_utils.h @@ -38,6 +38,16 @@ T *input = common::test::GetGlobalBuffer()->GetBuffer(shape_size); \ micro::ops::test::FillRandomInput(input, shape_size * sizeof(T)) #endif +void FillUniformRandomInput(float *input, + const int32_t shape_size, + float low = -50.0f, + float up = 50.0f); + +void FillNormalRandomInput(float *input, + const int32_t shape_size, + float mean = 0.0f, + float std = 1.0f); + } // namespace test } // namespace ops } // namespace micro diff --git a/micro/third_party/CMSIS_5 b/micro/third_party/CMSIS_5 new file mode 160000 index 0000000000000000000000000000000000000000..378acfb6490a82ba90e1ffb4bfd4e602668b180a --- /dev/null +++ b/micro/third_party/CMSIS_5 @@ -0,0 +1 @@ +Subproject commit 378acfb6490a82ba90e1ffb4bfd4e602668b180a diff --git a/micro/third_party/gflags b/micro/third_party/gflags new file mode 160000 index 0000000000000000000000000000000000000000..a386bd0f204cf99db253b3e84c56795dea8c397f --- /dev/null +++ b/micro/third_party/gflags @@ -0,0 +1 @@ +Subproject commit a386bd0f204cf99db253b3e84c56795dea8c397f diff --git a/micro/third_party/gflags/COPYING.txt b/micro/third_party/gflags/COPYING.txt deleted file mode 100644 index d15b0c24134de8ce0185ac22cb2dd96e23911fab..0000000000000000000000000000000000000000 --- a/micro/third_party/gflags/COPYING.txt +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (c) 2006, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/micro/third_party/gflags/gflags.cmake b/micro/third_party/gflags/gflags.cmake deleted file mode 100644 index 2a4f0343d901f70406c97dfae2c63917381890b3..0000000000000000000000000000000000000000 --- a/micro/third_party/gflags/gflags.cmake +++ /dev/null @@ -1,50 +0,0 @@ -INCLUDE(ExternalProject) - -set(GFLAGS_SRCS_DIR "${MACE_THIRD_PARTY_DIR}/gflags") -set(GFLAGS_INSTALL_DIR "${MACE_THIRD_PARTY_DIR}/install/gflags") -set(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE) - -if(MSVC) - set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) -else(MSVC) - set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE) -endif(MSVC) - -include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR}) - -# Mirror of https://github.com/gflags/gflags/archive/v2.2.2.zip -set(GFLAGS_URL "https://cnbj1.fds.api.xiaomi.com/mace/third-party/gflags/v2.2.2.zip") -set(GFLAGS_HASH "SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5") - -ExternalProject_Add( - gflags_gflags - URL_HASH "${GFLAGS_HASH}" - URL "${GFLAGS_URL}" - PREFIX ${GFLAGS_SRCS_DIR} - UPDATE_COMMAND "" - BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} - -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} - -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} - -DBUILD_STATIC_LIBS=ON - -DBUILD_TESTING=OFF - -DCMAKE_BUILD_TYPE=Release - -DCMAKE_GENERATOR=${CMAKE_GENERATOR} - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} -) - -if(MSVC) - add_custom_command(TARGET gflags_gflags POST_BUILD - COMMAND if $==1 (${CMAKE_COMMAND} -E copy ${GFLAGS_INSTALL_DIR}/lib/gflags_static_debug.lib ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib) - ) -endif(MSVC) - -add_library(gflags STATIC IMPORTED GLOBAL) -set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES}) -add_dependencies(gflags gflags_gflags) - -if(MSVC) - set_target_properties(gflags - PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES - Shlwapi.lib) -endif(MSVC) diff --git a/micro/third_party/googletest b/micro/third_party/googletest new file mode 160000 index 0000000000000000000000000000000000000000..e6e2d3b7614ff4e6017d8968bd4c3f579133666e --- /dev/null +++ b/micro/third_party/googletest @@ -0,0 +1 @@ +Subproject commit e6e2d3b7614ff4e6017d8968bd4c3f579133666e diff --git a/micro/third_party/googletest/LICENSE b/micro/third_party/googletest/LICENSE deleted file mode 100644 index 1941a11f8ce94389160b458927a29ba217542818..0000000000000000000000000000000000000000 --- a/micro/third_party/googletest/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -Copyright 2008, Google Inc. -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/micro/third_party/googletest/googletest.BUILD b/micro/third_party/googletest/googletest.BUILD deleted file mode 100644 index 4612f3ba6a33621d9810ae63926f7ca7b59489dc..0000000000000000000000000000000000000000 --- a/micro/third_party/googletest/googletest.BUILD +++ /dev/null @@ -1,32 +0,0 @@ -licenses(["notice"]) - -exports_files(["LICENSE"]) - -cc_library( - name = "gtest", - srcs = [ - "googletest/src/gtest-all.cc", - "googlemock/src/gmock-all.cc", - ], - hdrs = glob([ - "**/*.h", - "googletest/src/*.cc", - "googlemock/src/*.cc", - ]), - includes = [ - "googlemock", - "googletest", - "googletest/include", - "googlemock/include", - ], - linkopts = ["-pthread"], - visibility = ["//visibility:public"], -) - -cc_library( - name = "gtest_main", - srcs = ["googlemock/src/gmock_main.cc"], - linkopts = ["-pthread"], - visibility = ["//visibility:public"], - deps = [":gtest"], -) diff --git a/micro/third_party/googletest/googletest.cmake b/micro/third_party/googletest/googletest.cmake deleted file mode 100644 index bb5e02e55cfbf2896e3347d4848710e12ef1bf62..0000000000000000000000000000000000000000 --- a/micro/third_party/googletest/googletest.cmake +++ /dev/null @@ -1,52 +0,0 @@ - enable_testing() - - include(ExternalProject) - - set(GTEST_SOURCES_DIR ${MACE_THIRD_PARTY_DIR}/gtest) - set(GTEST_INSTALL_DIR ${MACE_THIRD_PARTY_DIR}/install/gtest) - set(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE) - - include_directories(SYSTEM ${GTEST_INCLUDE_DIR}) - - if(MSVC) - set(GTEST_LIBRARIES - "${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE) - set(GTEST_MAIN_LIBRARIES - "${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE) - else(MSVC) - set(GTEST_LIBRARIES - "${GTEST_INSTALL_DIR}/lib/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE) - set(GTEST_MAIN_LIBRARIES - "${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE) - endif(MSVC) - - # Mirror of "https://github.com/google/googletest/archive/release-1.8.0.zip" - set(GTEST_URL "https://cnbj1.fds.api.xiaomi.com/mace/third-party/googletest/googletest-release-1.8.0.zip") - set(GTEST_HASH "SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf") - - ExternalProject_Add( - extern_gtest - URL_HASH "${GTEST_HASH}" - URL "${GTEST_URL}" - PREFIX ${GTEST_SOURCES_DIR} - UPDATE_COMMAND "" - BUILD_BYPRODUCTS ${GTEST_LIBRARIES} ${GTEST_MAIN_LIBRARIES} - CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} - -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} - -DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR} - -DBUILD_GMOCK=ON - -Dgtest_disable_pthreads=ON - -Dgtest_force_shared_crt=ON - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - -DCMAKE_GENERATOR=${CMAKE_GENERATOR} - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} - ) - - add_library(gtest STATIC IMPORTED GLOBAL) - set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES}) - add_dependencies(gtest extern_gtest) - - add_library(gtest_main STATIC IMPORTED GLOBAL) - set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES}) - add_dependencies(gtest_main extern_gtest) diff --git a/micro/third_party/third_party.cmake b/micro/third_party/third_party.cmake deleted file mode 100644 index a5b0fcdb80c35a94051342d153fefc44c44d2e80..0000000000000000000000000000000000000000 --- a/micro/third_party/third_party.cmake +++ /dev/null @@ -1,42 +0,0 @@ -set(MACE_THIRD_PARTY_DIR "${PROJECT_BINARY_DIR}/third_party" CACHE STRING "Third party libraries download & build directories.") - -# Forwarding the cross compile flags -set(THIRD_PARTY_EXTRA_CMAKE_ARGS - -DCMAKE_C_FLAGS=${MACE_CC_FLAGS} - -DCMAKE_CXX_FLAGS=${MACE_CC_FLAGS} -) - -if(CMAKE_TOOLCHAIN_FILE) - set(THIRD_PARTY_EXTRA_CMAKE_ARGS - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} - -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} - ) -endif(CMAKE_TOOLCHAIN_FILE) - -if(CROSSTOOL_ROOT) - set(THIRD_PARTY_EXTRA_CMAKE_ARGS - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} - -DCROSSTOOL_ROOT=${CROSSTOOL_ROOT} - ) -endif(CROSSTOOL_ROOT) - -if(ANDROID_ABI) - set(THIRD_PARTY_EXTRA_CMAKE_ARGS - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} - -DANDROID_ABI=${ANDROID_ABI} - ) -endif(ANDROID_ABI) - -if(ANDROID_NATIVE_API_LEVEL) - set(THIRD_PARTY_EXTRA_CMAKE_ARGS - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} - -DANDROID_NATIVE_API_LEVEL=${ANDROID_NATIVE_API_LEVEL} - ) -endif(ANDROID_NATIVE_API_LEVEL) - -if(PLATFORM) - set(THIRD_PARTY_EXTRA_CMAKE_ARGS - ${THIRD_PARTY_EXTRA_CMAKE_ARGS} - -DPLATFORM=${PLATFORM} - ) -endif(PLATFORM) diff --git a/micro/tools/CMakeLists.txt b/micro/tools/CMakeLists.txt index 8e52b4b42d053743d3aafe26535ac0d46f833dfb..fd28f7a821feab461f859f38a68c2973a7f6599f 100644 --- a/micro/tools/CMakeLists.txt +++ b/micro/tools/CMakeLists.txt @@ -1,7 +1,7 @@ if(MICRO_MODEL_NAME) - include (${PROJECT_SOURCE_DIR}/third_party/gflags/gflags.cmake) add_executable(micro_run_static micro_run.cc) - target_link_libraries(micro_run_static micro_engine gflags) + target_link_libraries(micro_run_static micro models gflags) + target_compile_options(micro_run_static PRIVATE "-std=c++11") target_compile_definitions(micro_run_static PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}") if(NOT ANDROID) target_link_libraries(micro_run_static pthread) diff --git a/micro/tools/build_docker.sh b/micro/tools/build_docker.sh new file mode 100755 index 0000000000000000000000000000000000000000..815e354bbf25c74428b5de65bf3f95f21ab47d7e --- /dev/null +++ b/micro/tools/build_docker.sh @@ -0,0 +1,7 @@ +#! /bin/bash + +cd docker/mace-micro-dev + +docker build . -f mace-micro-dev.dockerfile --tag mace-micro-dev + +cd ../.. \ No newline at end of file diff --git a/micro/tools/ci/build_mbed_example.sh b/micro/tools/ci/build_mbed_example.sh new file mode 100755 index 0000000000000000000000000000000000000000..39f10b02714eeb8d348739e6614f2cb4828d3a06 --- /dev/null +++ b/micro/tools/ci/build_mbed_example.sh @@ -0,0 +1,17 @@ +#! /bin/bash + +python tools/python/convert.py --config micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml --enable_micro || exit -1 + +./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh \ +-DARM_CPU=cortex-m7 \ +-DMACE_MICRO_ENABLE_CMSIS=ON \ +-DMACE_MICRO_ENABLE_HARDFP=OFF || exit -1 + +cp build/micro/gcc-arm-none-eabi/install micro/examples/classifier -r + +cd micro/examples/classifier + +mbed deploy || exit -1 +mbed compile -t GCC_ARM -m NUCLEO_F767ZI -D MICRO_MODEL_NAME=kws_tc_resnet8 -D MICRO_DATA_NAME=kws || exit -1 + +cd ../../.. \ No newline at end of file diff --git a/micro/tools/ci/cross_build.sh b/micro/tools/ci/cross_build.sh new file mode 100755 index 0000000000000000000000000000000000000000..6216e047edcb9254d093b080dd79e6655347954c --- /dev/null +++ b/micro/tools/ci/cross_build.sh @@ -0,0 +1,25 @@ +#! /bin/bash + +git submodule update --init . + +echo "Builds host float32" +rm -rf build/micro +./micro/tools/cmake/cmake-build-host.sh \ +-DMACE_MICRO_ENABLE_TESTS=ON \ +-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1 + +echo "Builds host bfloat16" +rm -rf build/micro +./micro/tools/cmake/cmake-build-host.sh \ +-DMACE_MICRO_ENABLE_BFLOAT16=ON \ +-DMACE_MICRO_ENABLE_TESTS=ON \ +-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1 + +echo "Builds gcc arm cortex-m7" +rm -rf build/micro +./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh \ +-DARM_CPU=cortex-m7 \ +-DMACE_MICRO_ENABLE_TESTS=OFF \ +-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1 + +cd .. \ No newline at end of file diff --git a/micro/tools/ci/host_build_and_run_examples.sh b/micro/tools/ci/host_build_and_run_examples.sh new file mode 100755 index 0000000000000000000000000000000000000000..2a8a4dce0001bd74410b33ffab20a43779bd9b30 --- /dev/null +++ b/micro/tools/ci/host_build_and_run_examples.sh @@ -0,0 +1,24 @@ +#! /bin/bash + +python tools/python/convert.py --config micro/pretrained_models/tensorflow/kws/kws-tc_resnet8.yml --enable_micro || exit -1 + +rm -rf build/micro +./micro/tools/cmake/cmake-build-host.sh \ +-DMACE_MICRO_ENABLE_EXAMPLES=ON -DMICRO_MODEL_NAME=kws_tc_resnet8 -DMICRO_DATA_NAME=kws \ +-DMACE_MICRO_ENABLE_TESTS=OFF \ +-DMACE_MICRO_ENABLE_CMSIS=OFF || exit -1 + +./build/micro/host/examples/classifier/kws_tc_resnet8 + +python3 tools/python/convert.py --config micro/pretrained_models/keras/mnist/mnist-int8.yml --enable_micro || exit -1 + +rm -rf build/micro +./micro/tools/cmake/cmake-build-host.sh \ +-DMACE_MICRO_ENABLE_CMSIS=ON \ +-DMACE_MICRO_ENABLE_EXAMPLES=ON \ +-DMICRO_MODEL_NAME=mnist_int8 -DMICRO_DATA_NAME=mnist \ +-DMACE_MICRO_ENABLE_TESTS=OFF || exit -1 + +./build/micro/host/examples/classifier/mnist_int8 + +cd .. \ No newline at end of file diff --git a/micro/tools/ci/host_build_and_run_tests.sh b/micro/tools/ci/host_build_and_run_tests.sh new file mode 100755 index 0000000000000000000000000000000000000000..3f152f6d38c853ec15401e3337066b2b6751bb25 --- /dev/null +++ b/micro/tools/ci/host_build_and_run_tests.sh @@ -0,0 +1,16 @@ +#! /bin/bash + +git submodule update --init . + +rm -rf build/micro +./micro/tools/cmake/cmake-build-host.sh \ +-DMACE_MICRO_ENABLE_TESTS=ON \ +-DMACE_MICRO_ENABLE_CMSIS=ON || exit -1 + +echo "MACE Micro ut" +build/micro/host/test/ccunit/micro_ops_test || exit -1 + +echo "MACE Micro benchmark" +build/micro/host/test/ccbenchmark/micro_cc_benchmark || exit -1 + +cd .. \ No newline at end of file diff --git a/micro/tools/ci/model_convert.sh b/micro/tools/ci/model_convert.sh new file mode 100755 index 0000000000000000000000000000000000000000..c18acf4e7306007701e6421bf8b177a30b6ba7ba --- /dev/null +++ b/micro/tools/ci/model_convert.sh @@ -0,0 +1,43 @@ +#! /bin/bash + +rm -rf mace-models +rm -rf build/micro + +GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@git.n.xiaomi.com:applied-machine-learning/sysml/mace-models.git + +git submodule update --init . || exit -1 + +CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml +python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn || exit -1 +python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark || exit -1 + +CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml +python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn || exit -1 + +CONF_FILE=mace-models/micro-models/keras/mnist/mnist.yml +python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name mnist || exit -1 + +CONF_FILE=mace-models/micro-models/keras/mnist/mnist-int8.yml +python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name mnist_int8 || exit -1 + +CONF_FILE=mace-models/micro-models/keras/har/har.yml +python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har || exit -1 + +CONF_FILE=mace-models/micro-models/keras/har/har-int8.yml +python3 tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python3 tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_int8 || exit -1 + +CONF_FILE=mace-models/micro-models/tensorflow/kws/kws-tc_resnet8.yml +python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name kws_tc_resnet8 || exit -1 + +CONF_FILE=mace-models/micro-models/tensorflow/kws/kws-tc_resnet8-bf16.yml +python tools/python/convert.py --config=${CONF_FILE} --enable_micro || exit -1 +python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name kws_tc_resnet8_bf16 || exit -1 + +rm -rf mace-models diff --git a/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh b/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh index d8b5379eb083228ca36e01412277b365ea06cbc8..44011c8d77ed09c17099fe091201b0557b02adbf 100755 --- a/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh +++ b/micro/tools/cmake/cmake-build-arm64-v8a-hexagon-stub.sh @@ -1,17 +1,20 @@ #!/bin/bash + if [ -z "$ANDROID_NDK_HOME" ]; then echo "ANDROID_NDK_HOME is undefined"; + exit -1; fi if [ -z "$HEXAGON_SDK_ROOT" ]; then echo "HEXAGON_SDK_ROOT is undefined"; + exit -1; fi -BUILD_DIR=build/cmake-build/arm64-v8a +BUILD_DIR=build/micro/arm64-v8a mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} -cmake ../../.. \ +cmake ../../../micro \ -DANDROID_ABI="arm64-v8a" \ -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK_HOME}/build/cmake/android.toolchain.cmake \ -DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT} \ @@ -20,9 +23,10 @@ cmake ../../.. \ -DANDROID_STL=c++_shared \ -DMACE_ENABLE_RPCMEM=ON \ -DCMAKE_INSTALL_PREFIX=install \ + -DMACE_MICRO_ENABLE_EXAMPLES=OFF \ -DHEXAGON_STUB=ON \ $@ || exit 1 -cmake --build . -- -j || exit 1 +cmake --build . --target install --target install -- -j || exit 1 cd ../../.. diff --git a/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh b/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh index 45d098c19d69c8f70f1ae8caf6931d4a86f122c0..1b241cb0a8dc04871ecf2e05a098b2d717cbb97a 100755 --- a/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh +++ b/micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh @@ -1,18 +1,17 @@ #!/bin/bash -if [ -z "$GCC_ARM_ROOT" ]; then - echo "GCC_ARM_ROOT is undefined"; -fi -BUILD_DIR=build/cmake-build/gcc-arm-none-eabi +BUILD_DIR=build/micro/gcc-arm-none-eabi + mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} -cmake ../../.. \ - -DGCC_ARM_ROOT=${GCC_ARM_ROOT} \ +cmake ../../../micro \ -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchain/gcc-arm-none-eabi.cmake \ + -DMACE_MICRO_ENABLE_CMSIS=ON \ -DCMAKE_INSTALL_PREFIX=install \ + -DMACE_MICRO_ENABLE_TESTS=OFF \ $@ || exit 1 -cmake --build . -- -j || exit 1 +cmake --build . --target install -- -j || exit 1 cd ../../.. diff --git a/micro/tools/cmake/cmake-build-hexagon6.sh b/micro/tools/cmake/cmake-build-hexagon6.sh index 1c78408954f84d20a17135720755870fdd7bd044..620f7f6bdada62822390bbc165b70ffe8d0bd00f 100755 --- a/micro/tools/cmake/cmake-build-hexagon6.sh +++ b/micro/tools/cmake/cmake-build-hexagon6.sh @@ -10,17 +10,18 @@ if [ -z "$HEXAGON_SDK_ROOT" ]; then echo "HEXAGON_SDK_ROOT is undefined"; fi -BUILD_DIR=build/cmake-build/hexagon6 +BUILD_DIR=build/micro/hexagon6 mkdir -p ${BUILD_DIR} cd ${BUILD_DIR} -cmake ../../.. \ +cmake ../../../micro \ -DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT} \ -DHEXAGON_TOOLS=${HEXAGON_TOOLS} \ + -DMACE_MICRO_ENABLE_EXAMPLES=OFF \ -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchain/hexagon6.toolchain.cmake \ -DCMAKE_INSTALL_PREFIX=install \ $@ || exit 1 -cmake --build . -- -j || exit 1 +cmake --build . --target install -- -j || exit 1 cd ../../.. diff --git a/micro/tools/cmake/cmake-build-hexagon8.sh b/micro/tools/cmake/cmake-build-hexagon8.sh index 7baa821f040d3247f04ea8caf8d5e9992ae10b07..08f32dcfc0c4938547ad988633e42a8bd4fa6aed 100755 --- a/micro/tools/cmake/cmake-build-hexagon8.sh +++ b/micro/tools/cmake/cmake-build-hexagon8.sh @@ -10,16 +10,18 @@ if [ -z "$HEXAGON_SDK_ROOT" ]; then echo "HEXAGON_SDK_ROOT is undefined"; fi -BUILD_DIR=build/cmake-build/hexagon8 -mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +BUILD_DIR=build/micro/hexagon8 +mkdir -p ${BUILD_DIR} +cd ${BUILD_DIR} -cmake ../../.. \ +cmake ../../../micro \ -DHEXAGON_SDK_ROOT=${HEXAGON_SDK_ROOT} \ -DHEXAGON_TOOLS=${HEXAGON_TOOLS} \ + -DMACE_MICRO_ENABLE_EXAMPLES=OFF \ -DCMAKE_TOOLCHAIN_FILE=./cmake/toolchain/hexagon8.toolchain.cmake \ -DCMAKE_INSTALL_PREFIX=install \ $@ || exit 1 -cmake --build . -- -j || exit 1 +cmake --build . --target install -- -j || exit 1 cd ../../.. diff --git a/micro/tools/cmake/cmake-build-host.sh b/micro/tools/cmake/cmake-build-host.sh index d9ec5b7bbd8a1886c479473c1cada1446775a151..9f5503255a1ecce8c46862477395c9a96dc3dec8 100755 --- a/micro/tools/cmake/cmake-build-host.sh +++ b/micro/tools/cmake/cmake-build-host.sh @@ -1,12 +1,12 @@ #!/bin/bash -BUILD_DIR=build/cmake-build/host + +BUILD_DIR=build/micro/host mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} -cmake ../../.. \ - -DMACE_MICRO_ENABLE_TESTS=ON \ +cmake ../../../micro \ -DCMAKE_INSTALL_PREFIX=install \ $@ || exit 1 -cmake --build . -- -j || exit 1 +cmake --build . --target install -- -j || exit 1 cd ../../.. diff --git a/tools/cmake/cmake-generate-proto-py-host.sh b/tools/cmake/cmake-generate-proto-py-host.sh new file mode 100755 index 0000000000000000000000000000000000000000..5573a1f9f6c911e2d24139a59905a795764f933a --- /dev/null +++ b/tools/cmake/cmake-generate-proto-py-host.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [[ -z "$BUILD_DIR" ]]; then + BUILD_DIR=build/cmake-build/host +fi + +mkdir -p ${BUILD_DIR} && cd ${BUILD_DIR} +cmake ../../.. +make mace_proto_py micro_mem_proto_py -j +cd ../../.. diff --git a/tools/converter.py b/tools/converter.py index c8e38cdffdd5f6198074a3cd6885460c73b853e1..e7a5f05d11ef15d041ba5e6b4d8aab6bc71c7667 100644 --- a/tools/converter.py +++ b/tools/converter.py @@ -61,6 +61,7 @@ PlatformTypeStrs = [ "caffe", "onnx", "megengine", + "keras", "pytorch", ] PlatformType = Enum('PlatformType', [(ele, ele) for ele in PlatformTypeStrs], diff --git a/tools/cpplint.sh b/tools/cpplint.sh index 93e275a0d7df0830b50f89ae81f11ff938b08ddd..07555867d0f96225cbf6c81fe9d80b60136428d4 100755 --- a/tools/cpplint.sh +++ b/tools/cpplint.sh @@ -8,4 +8,12 @@ cpplint --linelength=80 --counting=detailed --root=test/ccutils $(find test/ccut cpplint --linelength=80 --counting=detailed --root=test/ccunit $(find test/ccunit -name "*.h" -or -name "*.cc") cpplint --linelength=80 --counting=detailed --root=test/ccbenchmark $(find test/ccbenchmark -name "*.h" -or -name "*.cc") -cpplint --linelength=80 --counting=detailed $(find ./micro -path ./micro/codegen -prune -o -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/base -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed $(find micro/framework -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed $(find micro/include -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed $(find micro/model -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/ops -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed $(find micro/port -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed --filter=-build/include_what_you_use $(find micro/test \( -path micro/test/ccbenchmark/codegen -or -path micro/test/ccbaseline/codegen \) -prune -o -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed $(find micro/tools -name "*.h" -or -name "*.cc") +cpplint --linelength=80 --counting=detailed --filter=-build/include_subdir $(find micro/examples \( -path micro/examples/classifier/mbed-os -or -path micro/examples/classifier/data -or -path micro/examples/classifier/install -or -path micro/examples/classifier/BUILD \) -prune -name "*.cc" -or -name "*.h") diff --git a/tools/python/convert.py b/tools/python/convert.py index b0ba9a2c54bcfcef334417acdea53a98688f18e4..2d9a4f8b79b16ef9016a70af0dcbc07b408f4d42 100644 --- a/tools/python/convert.py +++ b/tools/python/convert.py @@ -123,6 +123,8 @@ def convert_model(conf, quantize_stat): option.change_concat_ranges = conf[ModelKeys.change_concat_ranges] if ModelKeys.cl_mem_type in conf: option.cl_mem_type = conf[ModelKeys.cl_mem_type] + if ModelKeys.platform in conf: + option.platform = conf[ModelKeys.platform] if ModelKeys.runtime in conf: option.device = conf[ModelKeys.runtime] if option.device == DeviceType.CPU_GPU: @@ -190,6 +192,10 @@ def convert_model(conf, quantize_stat): from transform import megengine_converter converter = megengine_converter.MegengineConverter( option, conf["model_file_path"]) + elif platform == Platform.KERAS: + from transform import keras_converter + converter = keras_converter.KerasConverter( + option, conf["model_file_path"]) elif platform == Platform.PYTORCH: from transform import pytorch_converter converter = pytorch_converter.PytorchConverter( diff --git a/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 b/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 index 2b0e6c8c17952060ec34deb72b9cd8320922d5d8..8fb470df5b1904cf06ff3829283609500b435a29 100644 --- a/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 +++ b/tools/python/micro/jinja2_files/micro_graph_data.h.jinja2 @@ -20,7 +20,7 @@ namespace micro { namespace {{model_tag}} { uint8_t kGraphData[{{ data_size }}] = { - {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%} + {{ hex_bytes_string }} }; } // namespace {{model_tag}} diff --git a/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 b/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 index 4b664b3952141fd7473dfeade39a2a89739dfc3e..f702429ee4db70c88fe65307c1d8f52b3ead0165 100644 --- a/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 +++ b/tools/python/micro/jinja2_files/micro_model_data.h.jinja2 @@ -20,7 +20,7 @@ namespace micro { namespace {{model_tag}} { const uint8_t kModelData[{{ data_size }}] = { - {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%} + {{ hex_bytes_string }} }; } // namespace {{model_tag}} diff --git a/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 b/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 index 5380ca11ca72b1ec5033a14bfd979ee8ba912562..b9b7380a8aaf6b435215a41f1994daa94d7f4c00 100644 --- a/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 +++ b/tools/python/micro/jinja2_files/micro_net_def.h.jinja2 @@ -20,7 +20,7 @@ namespace micro { namespace {{model_tag}} { uint8_t kNetDef[{{ data_size }}] = { - {% for d in embed_data %}{{"0x%02X, " % d }}{%endfor%} + {{ hex_bytes_string }} }; } // namespace {{model_tag}} diff --git a/tools/python/micro/micro_codegen.py b/tools/python/micro/micro_codegen.py index 4646b62d1e358741f02e6408204b3dfed8aebf5b..bd3e5c15fc6e5c4696b28b328418e776e5aae158 100644 --- a/tools/python/micro/micro_codegen.py +++ b/tools/python/micro/micro_codegen.py @@ -57,14 +57,31 @@ class MicroCodeGen: with open(output_path, "w") as f: f.write(source) + def gen_micro_source_from_array(self, model_tag, embed_data, + jinja_file_name, output_path): + cwd = os.path.dirname(__file__) + j2_env = Environment(loader=FileSystemLoader(cwd), trim_blocks=True) + + template_name = JINJA2_DIR + jinja_file_name + + hex_bytes_string = ", ".join(map(hex, embed_data)) + + source = j2_env.get_template(template_name).render( + model_tag=model_tag, + hex_bytes_string=hex_bytes_string, + data_size=len(embed_data), + ) + with open(output_path, "w") as f: + f.write(source) + def gen_net_def_data(self, model_tag, model_def_data, output_path): embed_data = np.frombuffer(model_def_data, dtype=np.uint8) - self.gen_micro_source_from_bytes( - model_tag, embed_data, 'micro_net_def.h.jinja2', output_path) + self.gen_micro_source_from_array(model_tag, embed_data, + 'micro_net_def.h.jinja2', output_path) def gen_graph_data(self, model_tag, graph_data, output_path): embed_data = np.frombuffer(graph_data, dtype=np.uint8) - self.gen_micro_source_from_bytes(model_tag, embed_data, + self.gen_micro_source_from_array(model_tag, embed_data, 'micro_graph_data.h.jinja2', output_path) @@ -82,7 +99,7 @@ class MicroCodeGen: def gen_model_data(self, model_tag, model_param_data, output_path): embed_data = np.frombuffer(model_param_data, dtype=np.uint8) - self.gen_micro_source_from_bytes(model_tag, embed_data, + self.gen_micro_source_from_array(model_tag, embed_data, 'micro_model_data.h.jinja2', output_path) diff --git a/tools/python/micro/micro_op_converter.py b/tools/python/micro/micro_op_converter.py index a2c691eef649d0fe2a8c86b93f86efbbd5820d9f..865b261f6594be6e089a2fd7160c3c5a9d847207 100644 --- a/tools/python/micro/micro_op_converter.py +++ b/tools/python/micro/micro_op_converter.py @@ -33,6 +33,9 @@ class MicroOpConverter: def convert_filters_format(self): arg_format = ConverterUtil.get_arg(self.net_def, MaceKeyword.mace_filter_format_str) + if (arg_format.i == DataFormat.OHWI.value): + return + mace_check(arg_format.i == DataFormat.OIHW.value, "Invalid model") arg_format.i = DataFormat.OHWI.value @@ -40,7 +43,8 @@ class MicroOpConverter: for op in self.net_def.op: # OIHW => OHWI if (op.type == MaceOp.Conv2D.name or - op.type == MaceOp.DepthwiseConv2d.name) and \ + op.type == MaceOp.DepthwiseConv2d.name or + op.type == MaceOp.FullyConnected.name) and \ op.input[1] not in transposed_filter: print("transform filter: %s" % op.type) filter = self._consts[op.input[1]] diff --git a/tools/python/micro/micro_support_ops.py b/tools/python/micro/micro_support_ops.py index 5f9bb5f7cfd3f048fd4140d893bd95844d0c10b3..080b7d39792e3a0e7890b0e54622401610e5f527 100644 --- a/tools/python/micro/micro_support_ops.py +++ b/tools/python/micro/micro_support_ops.py @@ -67,6 +67,9 @@ McSupportedOps = [ MaceOp.Eltwise.name, mace_pb2.DT_FLOAT, None), OpDescriptor('micro/ops/eltwise.h', 'EltwiseOp', MaceOp.Eltwise.name, mace_pb2.DT_INT32, None), + OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h', + 'ArmEltwiseInt8Op', + MaceOp.Eltwise.name, mace_pb2.DT_INT8, None), OpDescriptor('micro/ops/activation.h', 'ActivationOp', MaceOp.Activation.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), OpDescriptor('micro/ops/strided_slice.h', 'StridedSliceOp', @@ -92,8 +95,12 @@ McSupportedOps = [ DataFormat.NHWC), OpDescriptor('micro/ops/shape.h', 'ShapeOp', MaceOp.Shape.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), - OpDescriptor('micro/ops/reshape.h', 'ReshapeOp', MaceOp.Reshape.name, + OpDescriptor('micro/ops/reshape.h', 'ReshapeOp', + MaceOp.Reshape.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), + OpDescriptor('micro/ops/reshape.h', 'ReshapeOp', + MaceOp.Reshape.name, + mace_pb2.DT_INT8, DataFormat.NHWC), OpDescriptor('micro/ops/expand_dims.h', 'ExpandDimsOp', MaceOp.ExpandDims.name, mace_pb2.DT_FLOAT, DataFormat.NHWC), OpDescriptor('micro/ops/concat.h', 'ConcatOp', MaceOp.Concat.name, @@ -118,6 +125,36 @@ McSupportedOps = [ 'DepthwiseConv2dKB1S4Op', MaceOp.DepthwiseConv2d.name, mace_pb2.DT_FLOAT, DataFormat.NHWC, 'kb1s4'), + OpDescriptor('micro/ops/nhwc/cmsis_nn/quantize.h', + 'QuantizeOp', + MaceOp.Quantize.name, mace_pb2.DT_INT8, + DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/cmsis_nn/dequantize.h', + 'DequantizeOp', + MaceOp.Dequantize.name, mace_pb2.DT_INT8, + DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h', + 'ArmConv2dInt8Op', + MaceOp.Conv2D.name, mace_pb2.DT_INT8, + DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h', + 'ArmDepthwiseConv2dInt8Op', + MaceOp.DepthwiseConv2d.name, mace_pb2.DT_INT8, + DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h', + 'ArmPoolingInt8Op', + MaceOp.Pooling.name, mace_pb2.DT_INT8, + DataFormat.NHWC), + OpDescriptor('micro/ops/squeeze.h', 'SqueezeOp', MaceOp.Squeeze.name, + mace_pb2.DT_INT8, None), + OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h', + 'ArmSoftmaxInt8Op', + MaceOp.Softmax.name, mace_pb2.DT_INT8, + DataFormat.NHWC), + OpDescriptor('micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h', + 'ArmMatMulInt8Op', + MaceOp.MatMul.name, mace_pb2.DT_INT8, + DataFormat.NHWC) ] @@ -126,7 +163,9 @@ class OpResolver: self.net_def = pb_model self.op_desc_map = {} self.op_desc_list = [] - if model_conf[ModelKeys.platform] == Platform.TENSORFLOW: + platform = model_conf[ModelKeys.platform] + if platform == Platform.TENSORFLOW or \ + platform == Platform.KERAS: self.default_data_format = DataFormat.NHWC else: self.default_data_format = DataFormat.NCHW @@ -134,7 +173,7 @@ class OpResolver: self.default_data_format) if ModelKeys.quantize in model_conf and \ model_conf[ModelKeys.quantize] == 1: - self.default_data_type = mace_pb2.DT_UINT8 + self.default_data_type = mace_pb2.DT_INT8 else: self.default_data_type = \ model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) @@ -218,7 +257,7 @@ class OpResolver: if not data_type_match: return False op_tag = self.get_op_tag(op_def) - if op_tag != op_desc.tag: + if (op_desc.tag) and (op_tag != op_desc.tag): return False return True @@ -261,6 +300,7 @@ class OpResolver: "not support op type %s, data type is %s, format is %s" % # noqa (op_def.type, self.get_op_data_type(op_def), self.get_op_data_format(op_def))) + if op_def.type not in self.op_desc_map: self.op_desc_map[op_def.type] = [] else: diff --git a/tools/python/micro/scratch_computer.py b/tools/python/micro/scratch_computer.py index 3edaab3f3415be66643e46f6f5195db9bf8b5f99..7599c32c997d41123df49373e0bdc6f04df4770a 100644 --- a/tools/python/micro/scratch_computer.py +++ b/tools/python/micro/scratch_computer.py @@ -22,13 +22,15 @@ from transform.base_converter import MaceOp class ScratchComputer: def __init__(self, net_def, model_conf): self.net_def = net_def + self.model_conf = model_conf if ModelKeys.quantize in model_conf and \ model_conf[ModelKeys.quantize] == 1: self.default_data_type = mace_pb2.DT_UINT8 else: self.default_data_type = mace_pb2.DT_FLOAT self._scratch_map = { - MaceOp.Conv2D: self.scratch_size_no_need, + MaceOp.Conv2D: self.scratch_size_conv, + MaceOp.FullyConnected: self.scratch_size_no_need, MaceOp.Squeeze: self.scratch_size_of_squeeze, MaceOp.Softmax: self.scratch_size_no_need, MaceOp.Eltwise: self.scratch_size_eltwise, @@ -39,7 +41,7 @@ class ScratchComputer: MaceOp.BiasAdd: self.scratch_size_no_need, MaceOp.BatchNorm: self.scratch_size_no_need, MaceOp.Shape: self.scratch_size_no_need, - MaceOp.Reshape: self.scratch_size_no_need, + MaceOp.Reshape: self.scratch_size_of_reshape, MaceOp.ExpandDims: self.scratch_size_of_expand_dims, MaceOp.Concat: self.scratch_size_of_concat, MaceOp.MatMul: self.scratch_size_of_matmul, @@ -47,6 +49,8 @@ class ScratchComputer: MaceOp.DepthwiseConv2d: self.scratch_size_of_depthwise_conv, MaceOp.ArgMax: self.scratch_size_no_need, MaceOp.Cast: self.scratch_size_no_need, + MaceOp.Quantize: self.scratch_size_no_need, + MaceOp.Dequantize: self.scratch_size_no_need, } def compute_size(self): @@ -80,18 +84,55 @@ class ScratchComputer: return 2 elif data_type == mace_pb2.DT_UINT8: return 1 + elif data_type == mace_pb2.DT_INT16: + return 2 + elif data_type == mace_pb2.DT_INT8: + return 1 else: mace_check(False, "Invalid data type: %s" % data_type) + def scratch_size_conv(self, op_def): + if (ModelKeys.quantize in self.model_conf + and self.model_conf[ModelKeys.quantize] == 1): + output_channels = op_def.output_shape[0].dims[3] + cmsis_bias_bytes = \ + self.get_data_bytes(mace_pb2.DT_INT32) * output_channels + + input_dims = self.get_op_input_dims(op_def, 0) + filter_dims = self.get_op_input_dims(op_def, 1) + cmsis_nn_buffer_bytes = \ + 2 \ + * input_dims[3] \ + * filter_dims[2] \ + * filter_dims[1] \ + * self.get_data_bytes(mace_pb2.DT_INT16) + + return cmsis_nn_buffer_bytes + cmsis_bias_bytes + else: + return 0 + def scratch_size_of_expand_dims(self, op_def): output_dim_size = len(op_def.output_shape[0].dims) data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32) return output_dim_size * data_type_bytes def scratch_size_of_matmul(self, op_def): - output_dim_size = len(op_def.output_shape[0].dims) - data_type_bytes = self.get_data_bytes(mace_pb2.DT_INT32) - return output_dim_size * data_type_bytes + if (ModelKeys.quantize in self.model_conf + and self.model_conf[ModelKeys.quantize] == 1): + output_dim_bytes = \ + len(op_def.output_shape[0].dims) \ + * self.get_data_bytes(mace_pb2.DT_INT32) + + cols = op_def.output_shape[0].dims[1] + cmsis_bias_bytes = cols * self.get_data_bytes(mace_pb2.DT_INT32) + + return output_dim_bytes + cmsis_bias_bytes + else: + output_dim_bytes = \ + len(op_def.output_shape[0].dims) \ + * self.get_data_bytes(mace_pb2.DT_INT32) + + return output_dim_bytes def get_op_input_dims(self, op_def, idx): input_name = op_def.input[idx] @@ -107,8 +148,7 @@ class ScratchComputer: def scratch_size_of_pooling(self, op_def): input0_dims = self.get_op_input_dims(op_def, 0) channels = input0_dims[3] - mace_check(channels > 0, - "can not inference pooling's input shape.") + mace_check(channels > 0, "can not inference pooling's input shape.") int_bytes = self.get_data_bytes(mace_pb2.DT_INT32) float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT) @@ -116,14 +156,30 @@ class ScratchComputer: return channels * (int_bytes + float_bytes) def scratch_size_of_depthwise_conv(self, op_def): - filter_dims = self.get_op_input_dims(op_def, 1) - k_batch = filter_dims[0] - block_size = k_batch - if block_size > 4: - block_size = 4 - k_channels = filter_dims[3] - float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT) - return block_size * 4 * k_channels * float_bytes + if (ModelKeys.quantize in self.model_conf + and self.model_conf[ModelKeys.quantize] == 1): + output_channels = op_def.output_shape[0].dims[3] + cmsis_bias_and_quant_bytes = \ + self.get_data_bytes(mace_pb2.DT_INT32) * output_channels * 3 + + input_dims = self.get_op_input_dims(op_def, 0) + filter_dims = self.get_op_input_dims(op_def, 1) + cmsis_nn_buffer_bytes = \ + input_dims[3] \ + * filter_dims[2] \ + * filter_dims[1] \ + * self.get_data_bytes(mace_pb2.DT_INT16) + + return cmsis_nn_buffer_bytes + cmsis_bias_and_quant_bytes + else: + filter_dims = self.get_op_input_dims(op_def, 1) + k_batch = filter_dims[0] + block_size = k_batch + if block_size > 4: + block_size = 4 + k_channels = filter_dims[3] + float_bytes = self.get_data_bytes(mace_pb2.DT_FLOAT) + return block_size * 4 * k_channels * float_bytes def scratch_size_of_squeeze(self, op_def): input0_dims = self.get_op_input_dims(op_def, 0) @@ -136,3 +192,11 @@ class ScratchComputer: def scratch_size_of_concat(self, op_def): # On a 64bit operating system, one pointer data need 8 bytes return len(op_def.input) * self.get_data_bytes(mace_pb2.DT_INT32) * 3 + + def scratch_size_of_reshape(self, op_def): + shape_dims = self.get_op_input_dims(op_def, 1) + shape_size = 1 + for i in range(len(shape_dims)): + shape_size *= shape_dims[i] + + return shape_size * self.get_data_bytes(mace_pb2.DT_INT32) diff --git a/tools/python/micro_converter.py b/tools/python/micro_converter.py index a6a3dc28fc00e60d03da3e27d7ad93377b7c2578..7e2f16828fea59c65ab214b98610bcfb7079d62b 100644 --- a/tools/python/micro_converter.py +++ b/tools/python/micro_converter.py @@ -33,7 +33,6 @@ from utils.util import mace_check NetDefExcludeFields = { 'OperatorDef': [ - 'quantize_info', 'node_id', 'op_id', 'padding', @@ -48,13 +47,15 @@ class MicroConverter: model_name, offset16=False, write_magic=False): self.model_conf = model_conf data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) + # self.net_def.arg + if model_conf.get(ModelKeys.quantize_schema) == "int8": + data_type = mace_pb2.DT_INT8 self.net_def = MicroIoConverter.convert(net_def, data_type) self.model_weights = model_weights self.model_name = model_name self.offset16 = offset16 self.write_magic = write_magic self.code_gen = MicroCodeGen() - data_type = model_conf.get(ModelKeys.data_type, mace_pb2.DT_FLOAT) self.np_data_type = data_type_to_np_dt(data_type, np.float32) self.gen_folder = 'micro/codegen/' util.mkdir_p(self.gen_folder) @@ -146,7 +147,15 @@ class MicroConverter: tmp_workspace_file = "WORKSPACE" os.system("mkdir -p %s && touch %s/%s" % (tmp_dir, tmp_dir, tmp_workspace_file)) - tar_command = "tar --exclude=micro/tools --exclude=micro/test " + tar_command = "tar --exclude=micro/tools" + tar_command += " --exclude=micro/test" + tar_command += " --exclude=micro/build" + tar_command += " --exclude=micro/cmake" + tar_command += " --exclude=micro/codegen" + tar_command += " --exclude=micro/dockerfiles" + tar_command += " --exclude=micro/examples" + tar_command += " --exclude=micro/third_party" + tar_command += " --exclude=micro/pretrained_models" tar_command += " ".join(exclude_list) tar_command += " -zcf " + tar_package_path tar_command += " micro -C %s %s" % (tmp_dir, tmp_workspace_file) diff --git a/tools/python/py_proto/__init__.py b/tools/python/py_proto/__init__.py index c2dfd046da287ce117527f29906242ddff101e0f..c4e6822cf5e54f6fd1148c608537912a43d2c80f 100644 --- a/tools/python/py_proto/__init__.py +++ b/tools/python/py_proto/__init__.py @@ -27,6 +27,7 @@ try: device.execute("bazel version") except: # noqa MaceLogger.warning("No bazel, use cmake.") + device.execute("bash tools/cmake/cmake-generate-proto-py-host.sh") else: try: device.execute("bazel build //mace/proto:mace_py") diff --git a/tools/python/quantize/quantize_util.py b/tools/python/quantize/quantize_util.py index 410c049300605718b35eccb5b9ff25a78a4efb6d..83f8b2e01efcaf003421b7f43f41e5af1face9e0 100644 --- a/tools/python/quantize/quantize_util.py +++ b/tools/python/quantize/quantize_util.py @@ -71,6 +71,14 @@ class QuantizedData(object): self._maxval = maxval +def adjust_range_int8(in_min, in_max): + in_min = min(0.0, in_min) + in_max = max(0.0, in_max) + scale = (in_max - in_min) / 255 + zero = int(-in_min / scale - 128) + return scale, zero, in_min, in_max + + def adjust_range(in_min, in_max, device, non_zero): if device in [DeviceType.HEXAGON.value, DeviceType.HTA.value]: return adjust_range_for_hexagon(in_min, in_max) @@ -153,6 +161,29 @@ def quantize_with_scale_and_zero(data, scale, zero): return quantized_data +def quantize_int8(data): + np_data = np.array(data).astype(float) + in_min = np_data.min() + in_max = np_data.max() + + in_min = min(0.0, in_min) + in_max = max(0.0, in_max) + max_abs = max(abs(in_min), abs(in_max)) + zero = 0 + scale = max_abs / 127 + + output = np.clip((np.round(zero + np_data / scale).astype(np.int32)), + -127, 127) + + quantized_data = QuantizedData() + quantized_data.data = output + quantized_data.scale = scale + quantized_data.zero = zero + quantized_data.minval = -127 * scale + quantized_data.maxval = 127 * scale + return quantized_data + + def quantize(data, device, non_zero): np_data = np.array(data).astype(float) in_min = np_data.min() diff --git a/tools/python/run_micro.py b/tools/python/run_micro.py index e100db2c45e32385d1950a354b390d65f187fa1e..11b9afa07a996e1d38998f949c47525a5040ff84 100644 --- a/tools/python/run_micro.py +++ b/tools/python/run_micro.py @@ -43,11 +43,15 @@ def join_2d_array(xs): def build_engine(model_name, data_type): mace_check(flags.model_name is not None and len(model_name) > 0, "you should specify model name for build.") - command = "cd micro && tools/cmake/cmake-build-host.sh" \ - " -DMICRO_MODEL_NAME=%s -DCMAKE_BUILD_TYPE=Release" % model_name + command = ("micro/tools/cmake/cmake-build-host.sh" + " -DMICRO_MODEL_NAME=%s -DMACE_MICRO_ENABLE_CMSIS=ON" + " -DCMAKE_BUILD_TYPE=Release" % model_name) if data_type == mace_pb2.DT_BFLOAT16: - command += " -DMACE_ENABLE_BFLOAT16=ON" + command += " -DMACE_MICRO_ENABLE_BFLOAT16=ON" print("The current engine's data type is bfloat16.") + else: + command += " -DMACE_MICRO_ENABLE_BFLOAT16=OFF" + device.execute(command) @@ -168,8 +172,9 @@ def run_model_with_conf(flags, args, model_name, model_conf): if flags.vlog_level > 0: envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level] - target = Target("micro/build/cmake-build/host/tools/micro_run_static", [], - opts=opts, envs=envs) + target = Target("build/micro/host/tools/micro_run_static", [], + opts=opts, + envs=envs) run_target.run_target(target_abi, install_dir, target, device_ids="host") diff --git a/tools/python/run_micro_bazel.py b/tools/python/run_micro_bazel.py deleted file mode 100644 index 9008ec02be7faf0245462595ec153d1f0b8bf87e..0000000000000000000000000000000000000000 --- a/tools/python/run_micro_bazel.py +++ /dev/null @@ -1,292 +0,0 @@ -# Copyright 2020 The MACE Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import copy -import numpy as np -import shutil -import tempfile - -from micro_converter import MicroConverter -from py_proto import mace_pb2 -import run_target -from utils import util -from utils import device -from utils import config_parser -from utils.target import Target -from utils.config_parser import ModelKeys -from utils.util import MaceLogger -from utils.util import mace_check -import validate -import layers_validate - - -def join_2d_array(xs): - return ":".join([",".join([str(y) for y in x]) for x in xs]) - - -def build_engine(model_name, data_type): - mace_check(flags.model_name is not None and len(model_name) > 0, - "you should specify model name for build.") - command = "bazel build //micro/tools:micro_run_static" \ - " --config optimization " \ - " --copt \"-DMICRO_MODEL_NAME=%s\"" % model_name - if data_type == mace_pb2.DT_BFLOAT16: - command += " --copt \"-DMACE_ENABLE_BFLOAT16\"" - print("The current engine's data type is bfloat16.") - device.execute(command) - - -def get_model_conf_by_name(flags, conf): - for name, model_conf in conf["models"].items(): - if not flags.model_name or name == flags.model_name: - return model_conf - return None - - -def run_model(flags, args, conf): - model_conf = get_model_conf_by_name(flags, conf) - mace_check(model_conf is not None, "Get model conf failed.") - model_conf = config_parser.normalize_model_config(model_conf) - run_model_with_conf(flags, args, flags.model_name, model_conf) - - -def gen_sub_model_conf(output_config, flags, conf): - model_conf = copy.deepcopy(get_model_conf_by_name(flags, conf)) - model_conf['subgraphs'][0]['output_tensors'] = \ - output_config['output_tensors'] - model_conf['subgraphs'][0]['output_shapes'] = \ - output_config['output_shapes'] - return model_conf - - -def run_layers_validate(flags, args, original_conf): - model_name = flags.model_name - original_model_dir = flags.output + "/" + \ - original_conf['library_name'] + "/model" - model_dir = "/tmp/micro_run/model" - device.execute("mkdir -p %s" % model_dir) - device.execute("cp -p %s/%s.pb %s" % - (original_model_dir, model_name, model_dir)) - params_file_path = "%s/%s.data" % (original_model_dir, model_name) - output_configs = layers_validate.get_layers( - model_dir, model_name, flags.layers) - - for i in range(len(output_configs)): - sub_model_conf = gen_sub_model_conf( - output_configs[i], flags, original_conf) - with open(output_configs[i]['model_file_path'], "rb") as model_file: - net_def = mace_pb2.NetDef() - net_def.ParseFromString(model_file.read()) - with open(params_file_path, "rb") as params_file: - weights = bytearray(params_file.read()) - micro_conf = \ - config_parser.normalize_model_config(sub_model_conf) - MicroConverter(micro_conf, net_def, - weights, model_name).gen_code() - build_engine(model_name, micro_conf[ModelKeys.data_type]) - run_model_with_conf(flags, args, model_name, micro_conf) - - -def run_model_with_conf(flags, args, model_name, model_conf): - target_abi = "host" - dev = device.HostDevice("host", target_abi) - install_dir = "/tmp/micro_run/" + model_name - - if ModelKeys.check_tensors in model_conf: - model_conf[ModelKeys.output_tensors] = model_conf[ - ModelKeys.check_tensors] - model_conf[ModelKeys.output_shapes] = model_conf[ - ModelKeys.check_shapes] - - model_args = {"model_name": model_name, - "input_node": ",".join( - model_conf[ModelKeys.input_tensors]), - "input_shape": join_2d_array( - model_conf[ModelKeys.input_shapes]), - "output_node": ",".join( - model_conf[ModelKeys.output_tensors]), - "output_shape": join_2d_array( - model_conf[ModelKeys.output_shapes]), - "input_data_format": ",".join( - [df.name for df in - model_conf[ModelKeys.input_data_formats]]), - "output_data_format": ",".join( - [df.name for df in - model_conf[ModelKeys.output_data_formats]]) - } - - opts = ["--%s=%s" % (arg_key, arg_val) for arg_key, arg_val in - model_args.items()] + args - - # generate data start - tmp_dir_name = tempfile.mkdtemp() - input_file_prefix = tmp_dir_name + "/" + model_name - if ModelKeys.validation_inputs_data in model_conf: - input_tensor = model_conf[ModelKeys.input_tensors] - input_data = model_conf[ModelKeys.validation_inputs_data] - mace_check(len(input_tensor) == len(input_data), - "len(input_tensor) != len(validate_data") - - for i in range(len(input_tensor)): - util.download_or_get_file( - model_conf[ModelKeys.validation_inputs_data][i], "", - util.formatted_file_name(input_file_prefix, - input_tensor[i])) - else: - generate_input_data(input_file_prefix, - model_conf[ModelKeys.input_tensors], - model_conf[ModelKeys.input_shapes], - model_conf[ModelKeys.input_ranges], - model_conf[ModelKeys.input_data_types]) - - dev.install(Target(tmp_dir_name), install_dir + "/validate_in") - target_input_file = "%s/validate_in/%s" % ( - install_dir, model_name) - target_output_dir = "%s/validate_out" % install_dir - dev.mkdir(target_output_dir) - target_output_file = target_output_dir + "/" + model_name - opts += ["--input_file=%s" % target_input_file, - "--output_file=%s" % target_output_file] - # generate data end - - envs = [] - if flags.vlog_level > 0: - envs += ["MACE_CPP_MIN_VLOG_LEVEL=%s" % flags.vlog_level] - - target = Target("bazel-bin/micro/tools/micro_run_static", [], - opts=opts, envs=envs) - run_target.run_target(target_abi, install_dir, target, - device_ids="host") - - if flags.validate: - validate_model_file = util.download_or_get_model( - model_conf[ModelKeys.model_file_path], - model_conf[ModelKeys.model_sha256_checksum], - tmp_dir_name) - - validate_weight_file = "" - if ModelKeys.weight_file_path in model_conf: - validate_weight_file = util.download_or_get_model( - model_conf[ModelKeys.weight_file_path], - model_conf[ModelKeys.weight_sha256_checksum], - tmp_dir_name) - - dev.pull(Target(target_output_dir), tmp_dir_name + "/validate_out") - output_file_prefix = tmp_dir_name + "/validate_out/" + model_name - validate.validate(model_conf[ModelKeys.platform], - validate_model_file, - validate_weight_file, - input_file_prefix, - output_file_prefix, - model_conf[ModelKeys.input_shapes], - model_conf[ModelKeys.output_shapes], - model_conf[ModelKeys.input_data_formats], - model_conf[ModelKeys.output_data_formats], - model_conf[ModelKeys.input_tensors], - model_conf[ModelKeys.output_tensors], - flags.validate_threshold, - model_conf[ModelKeys.input_data_types], - flags.backend, - "", - "") - shutil.rmtree(tmp_dir_name) - - -def generate_input_data(input_file, input_node, input_shape, input_ranges, - input_data_type): - np.random.seed() - for i in range(len(input_node)): - data = np.random.random(input_shape[i]) * ( - input_ranges[i][1] - input_ranges[i][0]) + input_ranges[i][0] - input_file_name = util.formatted_file_name(input_file, input_node[i]) - MaceLogger.info('Generate input file: %s' % input_file_name) - if input_data_type[i] == mace_pb2.DT_FLOAT: - np_data_type = np.float32 - elif input_data_type[i] == mace_pb2.DT_INT32: - np_data_type = np.int32 - - data.astype(np_data_type).tofile(input_file_name) - - -def parse_args(): - parser = argparse.ArgumentParser() - parser.add_argument( - "--config", - type=str, - default="", - help="yaml conf path" - ) - parser.add_argument( - "--model_name", - type=str, - default="", - help="model name in yaml conf" - ) - parser.add_argument( - "--validate", - action="store_true", - help="enable validate" - ) - parser.add_argument( - "--validate_threshold", - type=float, - default="0.99", - help="validate threshold" - ) - parser.add_argument( - "--layers", - type=str, - default="-1", - help="'start_layer:end_layer' or 'layer', similar to python slice." - " Use with --validate flag.") - parser.add_argument( - "--backend", - type=str, - default="tensorflow", - help="onnx backend framework") - parser.add_argument( - "--build", - action="store_true", - help="if build before run" - ) - parser.add_argument( - '--output', - type=str, - default="build", - help="output dir") - parser.add_argument( - '--vlog_level', - type=int, - default="0", - help="vlog level") - - return parser.parse_known_args() - - -if __name__ == "__main__": - flags, args = parse_args() - conf = config_parser.parse(flags.config) - if flags.build or flags.validate: - micro_conf = config_parser.normalize_model_config( - conf[ModelKeys.models][flags.model_name]) - build_engine(flags.model_name, micro_conf[ModelKeys.data_type]) - if flags.validate and flags.layers != "-1": - run_layers_validate(flags, args, conf) - else: - run_model(flags, args, conf) diff --git a/tools/python/transform/base_converter.py b/tools/python/transform/base_converter.py index 3b6279ff411a9bd7fc2832a40706b1dbe4f6ad7f..73a428c7e65316904aff6b6f5b7e0d0d5b078c17 100644 --- a/tools/python/transform/base_converter.py +++ b/tools/python/transform/base_converter.py @@ -19,6 +19,7 @@ from py_proto import mace_pb2 from utils.config_parser import DataFormat from utils.config_parser import DeviceType +from utils.config_parser import Platform # SAME_LOWER: if the amount of paddings to be added is odd, @@ -88,7 +89,8 @@ class FrameworkType(Enum): CAFFE = 1 ONNX = 2 MEGENGINE = 3 - PYTORCH = 4 + KERAS = 4 + PYTORCH = 5 MaceSupportedOps = [ @@ -294,6 +296,7 @@ class MaceKeyword(object): mace_across_ch_str = 'across_channels' mace_apu_16bit_per_tensor = 'mace_apu_16bit_per_tensor' mace_apu_data_type_arg_str = 'apu_data_type' + mace_int8 = 'int8' class TransformerRule(Enum): @@ -344,6 +347,8 @@ class TransformerRule(Enum): TRANSFORM_SINGLE_BN_TO_DEPTHWISE_CONV = 45 TRANSFORM_MUL_MAX_TO_PRELU = 46 TRANSFORM_EXPAND_DIMS_TO_RESHAPE = 47 + QUANTIZE_FOLD_RELU = 48 + TRANSFORM_KERAS_QUANTIZE_INFO = 49 class ConverterInterface(object): @@ -425,6 +430,7 @@ class ConverterOption(object): self._transformer_option = None self._cl_mem_type = "image" self._quantize_stat = False + self._platform = None @property def input_nodes(self): @@ -482,6 +488,10 @@ class ConverterOption(object): def quantize_stat(self): return self._quantize_stat + @property + def platform(self): + return self._platform + @input_nodes.setter def input_nodes(self, input_nodes): for node in input_nodes.values(): @@ -550,6 +560,10 @@ class ConverterOption(object): def quantize_stat(self, quantize_stat): self._quantize_stat = quantize_stat + @platform.setter + def platform(self, platform): + self._platform = platform + def disable_transpose_filters(self): if TransformerRule.TRANSPOSE_FILTERS in self._transformer_option: self._transformer_option.remove(TransformerRule.TRANSPOSE_FILTERS) @@ -609,7 +623,7 @@ class ConverterOption(object): TransformerRule.UPDATE_DATA_FORMAT, TransformerRule.TRANSPOSE_DATA_FORMAT, # Need to be put after SORT_BY_EXECUTION - TransformerRule.ADD_QUANTIZE_TENSOR_RANGE, + TransformerRule.ADD_QUANTIZE_TENSOR_RANGE ] if self._device == DeviceType.APU.value: self._transformer_option = self._transformer_option + [ @@ -624,12 +638,18 @@ class ConverterOption(object): if self._quantize: self._transformer_option = self._transformer_option + [ # need to be put after ADD_QUANTIZE_TENSOR_RANGE + TransformerRule.QUANTIZE_FOLD_RELU, TransformerRule.QUANTIZE_NODES, TransformerRule.QUANTIZE_WEIGHTS, TransformerRule.SORT_BY_EXECUTION, TransformerRule.CHECK_QUANTIZE_INFO, ] + if self._platform == Platform.KERAS: + self._transformer_option = [ + TransformerRule.TRANSFORM_KERAS_QUANTIZE_INFO + ] + self._transformer_option + class ConverterUtil(object): @staticmethod diff --git a/tools/python/transform/keras_converter.py b/tools/python/transform/keras_converter.py new file mode 100644 index 0000000000000000000000000000000000000000..67c305321d19383f4a344d9c62dd5704df0fa1ed --- /dev/null +++ b/tools/python/transform/keras_converter.py @@ -0,0 +1,402 @@ +import sys +import copy + +from enum import Enum +import six + +from py_proto import mace_pb2 +from transform import base_converter +from transform.base_converter import ActivationType +from transform.base_converter import ConverterUtil +from transform.base_converter import DataFormat +from transform.base_converter import EltwiseType +from transform.base_converter import FrameworkType +from transform.base_converter import MaceOp +from transform.base_converter import MaceKeyword +from transform.base_converter import PoolingType +from transform.base_converter import PaddingMode +from transform.base_converter import PadType +from transform.base_converter import ReduceType +from transform.base_converter import RoundMode +from tensorflow import keras +from tensorflow.python.keras.layers import convolutional +from quantize import quantize_util +from utils.util import mace_check + +import tensorflow as tf +import tensorflow_model_optimization as tfmot +from tensorflow_model_optimization.python.core.\ + quantization.keras.quantize_layer import QuantizeLayer +from tensorflow_model_optimization.python.core.\ + quantization.keras.quantize_wrapper import QuantizeWrapper +from tensorflow_model_optimization.python.core.\ + quantization.keras.quantize_annotate import QuantizeAnnotate + +padding_mode = { + "valid": PaddingMode.VALID, + "same": PaddingMode.SAME + # 'full': PaddingMode.FULL +} + + +def dtype2mtype(dtype): + if dtype == "float32": + return mace_pb2.DT_FLOAT + if dtype == "int32": + return mace_pb2.DT_INT32 + if dtype == "int8": + return mace_pb2.INT8 + + mace_check(False, "data type %s not supported" % dtype) + return None + + +def keras_shape2list(shape): + dims = shape.as_list() + for i in range(len(dims)): + if dims[i] is None: + dims[i] = 1 + + return dims + + +def get_input(keras_op): + if hasattr(keras_op, "input_proxy"): + return keras_op.input_proxy + else: + return keras_op.input + + +def get_output(keras_op): + if hasattr(keras_op, "output_proxy"): + return keras_op.output_proxy + else: + return keras_op.output + + +activation_type = { + "relu": ActivationType.RELU, + # 'relu6': ActivationType.RELUX, + # 'PReLU': ActivationType.PRELU, + # 'TanH': ActivationType.TANH, + "sigmoid": ActivationType.SIGMOID + # 'Clip': ActivationType.RELUX, +} + + +class KerasConverter(base_converter.ConverterInterface): + """A class for convert tensorflow 2.0 keras h5 model to mace model.""" + + def __init__(self, option, src_model_file): + self._op_converters = { + keras.layers.Flatten: self.convert_flatten, + keras.layers.Dense: self.convert_dense, + keras.layers.Conv2D: self.convert_conv2d, + keras.layers.MaxPooling2D: self.convert_maxpooling2d, + keras.layers.Dropout: self.convert_dropout, + keras.layers.DepthwiseConv2D: self.convert_depthwise_conv2d, + keras.layers.Softmax: self.convert_softmax, + QuantizeLayer: self.convert_quantize_layer, + QuantizeWrapper: self.convert_quantize_wrapper, + } + + self._option = option + self._mace_net_def = mace_pb2.NetDef() + ConverterUtil.set_filter_format(self._mace_net_def, DataFormat.HWIO) + ConverterUtil.add_data_format_arg(self._mace_net_def, DataFormat.NHWC) + + with tfmot.quantization.keras.quantize_scope(): + self._keras_model = keras.models.load_model(src_model_file) + + def run(self): + for op in self._keras_model.layers: + mace_check( + type(op) in self._op_converters, + "Mace does not support keras op type %s yet" % type(op)) + self._op_converters[type(op)](op) + + return self._mace_net_def + + def convert_general_op(self, keras_op): + op = self._mace_net_def.op.add() + op.name = keras_op.name + data_type_arg = op.arg.add() + data_type_arg.name = "T" + data_type_arg.i = dtype2mtype(keras_op.dtype) + framework_type_arg = op.arg.add() + framework_type_arg.name = MaceKeyword.mace_framework_type_str + framework_type_arg.i = FrameworkType.KERAS.value + ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) + + return op + + def convert_general_op_with_input_output(self, keras_op): + op = self._mace_net_def.op.add() + op.name = keras_op.name + data_type_arg = op.arg.add() + data_type_arg.name = "T" + data_type_arg.i = dtype2mtype(keras_op.dtype) + framework_type_arg = op.arg.add() + framework_type_arg.name = MaceKeyword.mace_framework_type_str + framework_type_arg.i = FrameworkType.KERAS.value + ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) + + op.input.append(get_input(keras_op).name) + op.output.append(get_output(keras_op).name) + output_shape = op.output_shape.add() + output_shape.dims.extend(keras_shape2list(get_output(keras_op).shape)) + + return op + + def convert_flatten(self, keras_op): + op = self.convert_general_op_with_input_output(keras_op) + op.type = MaceOp.Reshape.name + + dim_arg = op.arg.add() + dim_arg.name = MaceKeyword.mace_dim_str + dim_arg.ints.extend([0, -1]) + + return op + + def convert_dense(self, keras_op): + op = self.convert_general_op(keras_op) + op.type = MaceOp.MatMul.name + + op.input.append(get_input(keras_op).name) + + # Adds kernel tensor + op.input.append(keras_op.kernel.name) + kernel = self.add_keras_tensor(keras_op.kernel) + + # Adds bias tensor + if keras_op.use_bias: + op.input.append(keras_op.bias.name) + self.add_keras_tensor(keras_op.bias) + + act_op = self.split_activation_op(keras_op, op) + return [op, act_op] + + def convert_conv2d(self, keras_op): + op = self.convert_general_op(keras_op) + op.type = MaceOp.Conv2D.name + op.input.append(get_input(keras_op).name) + + # Adds kernel tensor + op.input.append(keras_op.kernel.name) + kernel = self.add_keras_tensor(keras_op.kernel) + + # Adds bias tensor + if keras_op.use_bias: + op.input.append(keras_op.bias.name) + self.add_keras_tensor(keras_op.bias) + + padding_arg = op.arg.add() + padding_arg.name = MaceKeyword.mace_padding_str + padding_arg.i = padding_mode[keras_op.padding].value + + strides_arg = op.arg.add() + strides_arg.name = MaceKeyword.mace_strides_str + strides_arg.ints.extend(keras_op.strides) + + dilation_arg = op.arg.add() + dilation_arg.name = MaceKeyword.mace_dilations_str + dilation_arg.ints.extend(keras_op.dilation_rate) + + act_op = self.split_activation_op(keras_op, op) + return [op, act_op] + + def convert_depthwise_conv2d(self, keras_op): + op = self.convert_general_op(keras_op) + op.type = MaceOp.DepthwiseConv2d.name + op.input.append(get_input(keras_op).name) + + # Adds kernel tensor + op.input.append(keras_op.depthwise_kernel.name) + kernel = self.add_keras_tensor(keras_op.depthwise_kernel) + + # Adds bias tensor + if keras_op.use_bias: + op.input.append(keras_op.bias.name) + self.add_keras_tensor(keras_op.bias) + + padding_arg = op.arg.add() + padding_arg.name = MaceKeyword.mace_padding_str + padding_arg.i = padding_mode[keras_op.padding].value + + strides_arg = op.arg.add() + strides_arg.name = MaceKeyword.mace_strides_str + strides_arg.ints.extend(keras_op.strides) + + dilation_arg = op.arg.add() + dilation_arg.name = MaceKeyword.mace_dilations_str + dilation_arg.ints.extend(keras_op.dilation_rate) + + act_op = self.split_activation_op(keras_op, op) + return [op, act_op] + + def convert_maxpooling2d(self, keras_op): + op = self.convert_general_op_with_input_output(keras_op) + op.type = MaceOp.Pooling.name + + pooling_type_arg = op.arg.add() + pooling_type_arg.name = MaceKeyword.mace_pooling_type_str + pooling_type_arg.i = PoolingType.MAX.value + + padding_arg = op.arg.add() + padding_arg.name = MaceKeyword.mace_padding_str + padding_arg.i = padding_mode[keras_op.padding].value + + strides_arg = op.arg.add() + strides_arg.name = MaceKeyword.mace_strides_str + strides_arg.ints.extend(keras_op.strides) + + kernels_arg = op.arg.add() + kernels_arg.name = MaceKeyword.mace_kernel_str + kernels_arg.ints.extend(keras_op.pool_size) + + return op + + def convert_softmax(self, keras_op): + op = self.convert_general_op_with_input_output(keras_op) + op.type = MaceOp.Softmax.name + + return op + + def convert_dropout(self, keras_op): + op = self.convert_general_op_with_input_output(keras_op) + op.type = MaceOp.Identity.name + + return op + + def convert_quantize_layer(self, keras_op): + op = self._mace_net_def.op.add() + op.name = keras_op.name + op.type = MaceOp.Identity.name + op.input.append(get_input(keras_op).name) + op.output.append(get_output(keras_op).name) + output_shape = op.output_shape.add() + output_shape.dims.extend(keras_shape2list(get_output(keras_op).shape)) + + ConverterUtil.add_data_type_arg(op, mace_pb2.DT_FLOAT) + ConverterUtil.add_data_format_arg(op, DataFormat.NHWC) + + output_min = keras_op.weights[0].numpy() + output_max = keras_op.weights[1].numpy() + + self.add_quantize_info(op, output_min, output_max) + + return op + + def convert_quantize_wrapper(self, keras_op_wrapper): + inside_layer = keras_op_wrapper.layer + if isinstance(inside_layer, convolutional.DepthwiseConv2D): + inside_layer.depthwise_kernel = keras_op_wrapper.weights[1] + inside_layer.bias = keras_op_wrapper.weights[0] + elif isinstance(inside_layer, convolutional.Conv): + inside_layer.kernel = keras_op_wrapper.weights[1] + inside_layer.bias = keras_op_wrapper.weights[0] + elif isinstance(inside_layer, keras.layers.Dense): + inside_layer.kernel = keras_op_wrapper.weights[1] + inside_layer.bias = keras_op_wrapper.weights[0] + + # Adds input name for inside layers + inside_layer.input_proxy = keras_op_wrapper.input + inside_layer.output_proxy = keras_op_wrapper.output + + op = self._op_converters[type(inside_layer)](inside_layer) + + if isinstance(inside_layer, (convolutional.Conv, keras.layers.Dense)): + output_min = keras_op_wrapper.weights[6].numpy() + output_max = keras_op_wrapper.weights[7].numpy() + + if not isinstance(op, list): + self.add_quantize_info(op, output_min, output_max) + else: + assert len(op) == 2 + if op[1].type == MaceOp.Softmax.name: + self.add_quantize_info(op[0], output_min, output_max) + else: + self.add_quantize_info(op[1], output_min, output_max) + + return op + + def add_keras_tensor(self, keras_tensor): + tensor = self._mace_net_def.tensors.add() + tensor.name = keras_tensor.name + tensor.dims.extend(keras_tensor.shape) + tensor.data_type = dtype2mtype(keras_tensor.dtype) + tensor.float_data.extend(keras_tensor.numpy().flat) + return tensor + + def split_activation_op(self, keras_op, op): + activation = keras_op.get_config()["activation"] + if "class_name" in activation: + assert activation["class_name"] == "QuantizeAwareActivation" + activation = activation["config"]["activation"] + + if activation == "linear": + op.output.append(get_output(keras_op).name) + output_shape = op.output_shape.add() + output_shape.dims.extend( + keras_shape2list(get_output(keras_op).shape) + ) + + return None + else: + activation_tmp_name = get_output(keras_op).name + "_act" + op.output.append(activation_tmp_name) + output_shape = op.output_shape.add() + output_shape.dims.extend( + keras_shape2list(get_output(keras_op).shape) + ) + + activation_op = self._mace_net_def.op.add() + activation_op.name = keras_op.name + "_act" + if activation == "softmax": + activation_op.type = MaceOp.Softmax.name + else: + activation_op.type = MaceOp.Activation.name + type_arg = activation_op.arg.add() + type_arg.name = MaceKeyword.mace_activation_type_str + type_arg.s = six.b(activation_type[activation].name) + + activation_op.input.append(activation_tmp_name) + activation_op.output.append(get_output(keras_op).name) + output_shape = activation_op.output_shape.add() + output_shape.dims.extend( + keras_shape2list(get_output(keras_op).shape) + ) + + data_type_arg = activation_op.arg.add() + data_type_arg.name = "T" + data_type_arg.i = dtype2mtype(keras_op.dtype) + framework_type_arg = activation_op.arg.add() + framework_type_arg.name = MaceKeyword.mace_framework_type_str + framework_type_arg.i = FrameworkType.KERAS.value + ConverterUtil.add_data_format_arg(activation_op, DataFormat.NHWC) + + return activation_op + + def add_quantize_info(self, op, minval, maxval): + quantize_schema = self._option.quantize_schema + if quantize_schema == MaceKeyword.mace_apu_16bit_per_tensor: + maxval = max(abs(minval), abs(maxval)) + minval = -maxval + scale = maxval / 2 ** 15 + zero = 0 + elif quantize_schema == MaceKeyword.mace_int8: + scale, zero, minval, maxval = quantize_util.adjust_range_int8( + minval, maxval + ) + else: + scale, zero, minval, maxval = quantize_util.adjust_range( + minval, maxval, self._option.device, non_zero=False + ) + + quantize_info = op.quantize_info.add() + quantize_info.minval = minval + quantize_info.maxval = maxval + quantize_info.scale = scale + quantize_info.zero_point = zero + + return quantize_info diff --git a/tools/python/transform/transformer.py b/tools/python/transform/transformer.py index c26f275350718dab74ad65ac68106f65c3859f11..c541e885892890e88e0cf691af31b2ec77b63d8c 100644 --- a/tools/python/transform/transformer.py +++ b/tools/python/transform/transformer.py @@ -33,6 +33,7 @@ from transform.base_converter import MaceTransposableDataFormatOps # noqa from transform.base_converter import PaddingMode from transform.base_converter import ReduceType from transform.base_converter import TransformerRule +from utils.config_parser import Platform from quantize import quantize_util from utils.util import mace_check @@ -121,6 +122,10 @@ class Transformer(base_converter.ConverterInterface): self.transform_mul_max_to_prelu, TransformerRule.TRANSFORM_EXPAND_DIMS_TO_RESHAPE: self.transform_expand_dims_to_reshape, + TransformerRule.QUANTIZE_FOLD_RELU: + self.quantize_fold_relu, + TransformerRule.TRANSFORM_KERAS_QUANTIZE_INFO: + self.transform_keras_quantize_info } self._option = option @@ -1010,7 +1015,7 @@ class Transformer(base_converter.ConverterInterface): """Transform global conv to fc should be placed after transposing input/output and filter""" - if self._option.quantize: + if self._option.quantize or self._option.platform == Platform.KERAS: return net = self._model @@ -1119,9 +1124,10 @@ class Transformer(base_converter.ConverterInterface): transposed_filter = set() transposed_deconv_filter = set() - if ((self._option.quantize and + if (((self._option.quantize and self._option.device == DeviceType.CPU.value) or - self._option.device == DeviceType.APU.value): + self._option.device == DeviceType.APU.value) and + (not self._option.quantize_schema == MaceKeyword.mace_int8)): print("Transpose filters to OHWI") if filter_format == DataFormat.HWIO: transpose_order = [3, 0, 1, 2] @@ -1310,6 +1316,9 @@ class Transformer(base_converter.ConverterInterface): return False def transform_matmul_to_fc(self): + if self._option.platform == Platform.KERAS: + return + net = self._model filter_format = self.filter_format() for op in net.op: @@ -1701,6 +1710,8 @@ class Transformer(base_converter.ConverterInterface): if self._option.quantize_schema == \ MaceKeyword.mace_apu_16bit_per_tensor: data_type_arg.i = mace_pb2.DT_INT16 + elif self._option.quantize_schema == MaceKeyword.mace_int8: + data_type_arg.i = mace_pb2.DT_INT8 else: data_type_arg.i = mace_pb2.DT_UINT8 elif data_type_arg.i == mace_pb2.DT_UINT8: @@ -1715,6 +1726,13 @@ class Transformer(base_converter.ConverterInterface): or op.type == MaceOp.Dequantize.name, "Only Quantization ops support int16, " "but got %s(%s)" % (op.name, op.type)) + elif data_type_arg.i == mace_pb2.DT_INT8 \ + and self._option.quantize_schema == \ + MaceKeyword.mace_int8: + mace_check(op.type == MaceOp.Quantize.name + or op.type == MaceOp.Dequantize.name, + "Only Quantization ops support int8, " + "but got %s(%s)" % (op.name, op.type)) else: mace_check(op.type == MaceOp.Quantize.name, "Quantization only support float ops, " @@ -1739,6 +1757,8 @@ class Transformer(base_converter.ConverterInterface): if self._option.quantize_schema == \ MaceKeyword.mace_apu_16bit_per_tensor: ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT16) + elif self._option.quantize_schema == MaceKeyword.mace_int8: + ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT8) else: ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) ConverterUtil.add_data_format_arg(op_def, input_node.data_format) @@ -1766,6 +1786,8 @@ class Transformer(base_converter.ConverterInterface): if self._option.quantize_schema == \ MaceKeyword.mace_apu_16bit_per_tensor: ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT16) + elif self._option.quantize_schema == MaceKeyword.mace_int8: + ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_INT8) else: ConverterUtil.add_data_type_arg(op_def, mace_pb2.DT_UINT8) ConverterUtil.add_data_format_arg(op_def, output_node.data_format) @@ -1828,6 +1850,10 @@ class Transformer(base_converter.ConverterInterface): quantized_tensor = \ quantize_util.quantize_int16(tensor.float_data) tensor.data_type = mace_pb2.DT_INT16 + elif self._option.quantize_schema == MaceKeyword.mace_int8: + quantized_tensor = quantize_util.quantize_int8( + tensor.float_data) + tensor.data_type = mace_pb2.DT_INT8 else: non_zero = self._option.device == DeviceType.CPU.value quantized_tensor = quantize_util.quantize(tensor.float_data, @@ -1890,6 +1916,9 @@ class Transformer(base_converter.ConverterInterface): minval = -maxval scale = maxval / 2**15 zero = 0 + elif quantize_schema == MaceKeyword.mace_int8: + scale, zero, minval, maxval = quantize_util.adjust_range_int8( + minval, maxval) else: scale, zero, minval, maxval = \ quantize_util.adjust_range(minval, maxval, self._option.device, @@ -2001,6 +2030,9 @@ class Transformer(base_converter.ConverterInterface): min_val = -max_val scale = max_val / 2**15 zero = 0 + elif quantize_schema == MaceKeyword.mace_int8: + scale, zero, min_val, max_val = \ + quantize_util.adjust_range_int8(min_val, max_val) else: scale, zero, min_val, max_val = \ quantize_util.adjust_range(min_val, max_val, @@ -2042,6 +2074,10 @@ class Transformer(base_converter.ConverterInterface): minval = -maxval scale = maxval / 2**15 zero = 0 + elif quantize_schema == MaceKeyword.mace_int8: + scale, zero, minval, maxval = \ + quantize_util.adjust_range_int8( + input_node.range[0], input_node.range[1]) else: scale, zero, minval, maxval = \ quantize_util.adjust_range(input_node.range[0], @@ -2619,3 +2655,38 @@ class Transformer(base_converter.ConverterInterface): del op.arg[:] return True return False + + def quantize_fold_relu(self): + if self._option.quantize_schema != MaceKeyword.mace_int8: + return + + net = self._model + + for op in net.op: + if op.type == MaceOp.Activation.name: + act_type_arg = ConverterUtil.get_arg( + op, MaceKeyword.mace_activation_type_str) + act_type = act_type_arg.s.decode() + + if act_type in ["RELU", "RELUX"]: + producer = self._producer[op.input[0]] + # The type of "producer" is not limited to MatMul, + # you can try other types + if producer.type == MaceOp.MatMul.name: + self.replace_quantize_info(producer, op) + self.safe_remove_node(op, producer) + return True + + return False + + def transform_keras_quantize_info(self): + mace_check(self._option.platform == Platform.KERAS, "For KERAS models") + changed = False + for op in self._model.op: + for i in range(len(op.quantize_info)): + if not op.output[i] in self._quantize_activation_info: + self._quantize_activation_info[op.output[i]] = \ + op.quantize_info[i] + changed = True + + return changed diff --git a/tools/python/utils/config_parser.py b/tools/python/utils/config_parser.py index 36c502c1c8cb323fced11e0ab1b3d22804d16658..5521d4e8e81dc42c8dfffd2f213a6031320fac91 100644 --- a/tools/python/utils/config_parser.py +++ b/tools/python/utils/config_parser.py @@ -151,7 +151,8 @@ class Platform(Enum): CAFFE = 1 ONNX = 2 MEGENGINE = 3 - PYTORCH = 4 + KERAS = 4 + PYTORCH = 5 def parse_platform(str): diff --git a/tools/python/utils/convert_util.py b/tools/python/utils/convert_util.py index ba6a5cce637e1d865dd664ab824e41cd44079012..7b597e3ac346acca039d8e7805965a06aa573977 100644 --- a/tools/python/utils/convert_util.py +++ b/tools/python/utils/convert_util.py @@ -48,6 +48,10 @@ def merge_params(net_def, data_type): data = bytearray( np.array(tensor.int32_data).astype(np.uint8).tolist()) tensor.data_size = len(tensor.int32_data) + elif tensor.data_type == mace_pb2.DT_INT8: + data = bytearray( + np.array(tensor.int32_data).astype(np.uint8).tolist()) + tensor.data_size = len(tensor.int32_data) elif tensor.data_type == mace_pb2.DT_FLOAT16: data = bytearray( np.array(tensor.float_data).astype(np.float16).tobytes()) @@ -85,6 +89,8 @@ def merge_params(net_def, data_type): del tensor.int32_data[:] elif tensor.data_type == mace_pb2.DT_UINT8: del tensor.int32_data[:] + elif tensor.data_type == mace_pb2.DT_INT8: + del tensor.int32_data[:] return net_def, model_data @@ -100,5 +106,7 @@ def data_type_to_np_dt(data_type, default_np_dt): return np.uint8 elif data_type == mace_pb2.DT_BFLOAT16: return np.uint16 + elif data_type == mace_pb2.DT_INT8: + return np.int8 else: return np.float32