提交 c299a141 编写于 作者: 卢旭辉

Merge branch 'micro_cmsis' into 'master'

feat: Integrate CMSIS5 to MACE Micro

See merge request applied-machine-learning/sysml/mace!1305
......@@ -22,12 +22,14 @@ mace/codegen/version/
mace/codegen/engine/
mace/codegen/lib/
micro/codegen/models/
micro/codegen/engines/
examples/android/macelibrary/src/main/cpp/mace/
examples/android/macelibrary/src/main/cpp/include/
examples/android/macelibrary/src/main/cpp/lib/arm64-v8a/
examples/android/macelibrary/src/main/jniLibs/arm64-v8a/
tools/python/py_proto/*_pb2.py
micro/codegen/models/
micro/codegen/engines/
micro/examples/micro
micro/build
\ No newline at end of file
......@@ -19,7 +19,7 @@ cpplint:
pylint:
stage: linting
script:
- pycodestyle $(find -name "*.py")
- pycodestyle . --filename=*.py --exclude=examples,third_party
build_docs:
stage: build
......@@ -111,14 +111,12 @@ mace_cc_test:
DEVICE_CONF_FILE=generic-mobile-devices/devices.yml
fi
- python tools/bazel_adb_run.py --target="//test/ccunit:mace_cc_test" --device_yml=${DEVICE_CONF_FILE} --run_target=True --stdout_processor=unittest_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS
- python tools/bazel_adb_run.py --target="//micro/test/ccunit:micro_ops_test" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a
mace_cc_benchmark:
stage: test
script:
- if [ -z "$TARGET_SOCS" ]; then TARGET_SOCS=random; fi
- python tools/bazel_adb_run.py --target="//test/ccbenchmark:mace_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=armeabi-v7a,arm64-v8a --target_socs=$TARGET_SOCS --args="--filter=.*SIGMOID.*"
- python tools/bazel_adb_run.py --target="//micro/test/ccbenchmark:micro_cc_benchmark" --run_target=True --stdout_processor=ops_benchmark_stdout_processor --target_abis=arm64-v8a
only:
- triggers
......@@ -145,14 +143,6 @@ model_tests:
- python tools/converter.py convert --config=${CONF_FILE} --target_socs=$TARGET_SOCS --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=1 --validate --model_graph_format=code --model_data_format=file
- python tools/converter.py run --config=${CONF_FILE} --target_socs=$TARGET_SOCS --round=5 --model_graph_format=code --model_data_format=file --benchmark
- CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
- python tools/converter.py convert --config=${CONF_FILE} --enable_micro
- python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
- python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --benchmark
- CONF_FILE=mace-models/micro-models/har-cnn/har-cnn-bf16.yml
- python tools/converter.py convert --config=${CONF_FILE} --enable_micro
- python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
- rm -rf mace-models
quantization_tests:
stage: test
......@@ -206,3 +196,4 @@ micro-child:
trigger:
include:
- 'micro/.gitlab-ci.yml'
strategy: depend
[submodule "micro/third_party/CMSIS_5"]
path = micro/third_party/CMSIS_5
url = https://github.com/ARM-software/CMSIS_5.git
shallow = true
[submodule "micro/third_party/googletest"]
path = micro/third_party/googletest
url = https://github.com/google/googletest.git
shallow = true
[submodule "micro/third_party/gflags"]
path = micro/third_party/gflags
url = https://github.com/gflags/gflags.git
shallow = true
FROM ubuntu:18.04
RUN apt-get update
RUN apt-get install -y wget
RUN apt-get install -y g++ gcc
RUN apt-get install -y gcc-arm-none-eabi
RUN apt-get install -y python3 python3-pip git mercurial
RUN wget https://cdn.cnbj1.fds.api.mi-img.com/mace/third-party/cmake-3.18.3-Linux-x86_64.sh
RUN chmod +x cmake-3.18.3-Linux-x86_64.sh && ./cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=/usr
RUN python3 -m pip install -U pip
RUN python3 -m pip install jinja2 pyyaml sh numpy six filelock
RUN python3 -m pip install tensorflow==2.3.0 tensorflow_model_optimization
RUN python3 -m pip install mbed-cli
Basic usage for Micro Controllers
==================================
MACE Micro is a lightweight neural network inference engine for MCUs and low-power DSPs.
At now we support Cortex-M MCUs and Qualcomm Hexagon DSPs. You can get our projects from GitHub.
Build and run an example model
-------------------------------
Get MACE Micro Projects
-----------------------
At first, make sure the environment has been set up correctly already (refer to :doc:`../installation/env_requirement`).
MACE Micro is a sub project of MACE, so you can get it from MACE.
The followings are instructions about how to quickly build and run a provided model in
`MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__.
.. code-block:: sh
Here we use the har-cnn model as an example.
git clone https://github.com/XiaoMi/mace.git
# Inits submodules by yourself
cd mace && git submodule update --init micro && cd ..
**Commands**
Environment Requirements
------------------------
1. Pull `MACE <https://github.com/XiaoMi/mace>`__ project.
On a ubuntu18.04/20.04 PC, do the following steps.
.. code-block:: sh
.. code-block:: sh
git clone https://github.com/XiaoMi/mace.git
cd mace/
git fetch --all --tags --prune
apt-get update
apt-get install -y wget
# Checkout the latest tag (i.e. release version)
tag_name=`git describe --abbrev=0 --tags`
git checkout tags/${tag_name}
apt-get install -y g++
# Required for Cortex-M MCUs
apt-get install -y gcc-arm-none-eabi
apt-get install -y python3 python3-pip
.. note::
python3 -m pip install jinja2 pyyaml sh numpy six filelock
# Installs cmake above 3.13.0
wget https://cdn.cnbj1.fds.api.mi-img.com/mace/third-party/cmake-3.18.3-Linux-x86_64.sh
chmod +x cmake-3.18.3-Linux-x86_64.sh && ./cmake-3.18.3-Linux-x86_64.sh --skip-license --prefix=/usr
It's highly recommended to use a release version instead of master branch.
python3 -m pip install -U pip
# The Tensorflow version depends on your model
# The Tensroflow 1.x frozen model and Tensorflow 2.x Keras model are both supported
python3 -m pip install tensorflow==2.3.0
python3 -m pip install tensorflow_model_optimization
You also can use a docker as the environment.
2. Pull `MACE Model Zoo <https://github.com/XiaoMi/mace-models>`__ project.
.. code-block:: sh
.. code-block:: sh
cd mace/docker/mace-micro-dev
docker build . -f mace-micro-dev.dockerfile --tag mace-micro-dev
cd ../../..
# Maps your workspace to docker container
docker run -ti -v $(pwd):/workspace/ -w /workspace mace-micro-dev
git clone https://github.com/XiaoMi/mace-models.git
Convert a model to c++ code
----------------------------
3. Convert the pre-trained har-cnn model to c++ code.
Here we use a pre-trained model of the MNIST database,
.. code-block:: sh
.. code-block:: sh
cd path/to/mace
# output lib path: build/har-cnn/model/har_cnn_micro.tar.gz
CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml
python tools/python/convert.py --config=$CONF_FILE --enable_micro
cd mace
# Converts a tensorflow 2.x keras model, you need install python3 and tensorflow==2.x additional
python3 tools/python/convert.py --config=micro/pretrained_models/keras/mnist/mnist.yml --enable_micro
4. Build Micro-Controllers engine and models to library on host.
Model config file
-----------------
.. code-block:: sh
The following is a completed model config file,
cd micro
./tools/cmake/cmake-build-host.sh
.. code-block:: sh
.. note::
library_name: har
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
har_int8:
platform: keras
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5
model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd
subgraphs:
- input_tensors:
- "conv2d_1_input:0"
input_shapes:
- 1, 90, 3, 1
input_ranges:
- -5, 15
output_tensors:
- "dense_3/Softmax:0"
output_shapes:
- "1, 6"
runtime: cpu
data_type: fp32_fp32
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 1
quantize_schema: int8
quantize_range_file: /workspace/mace/micro/pretrained_models/keras/har/har.range
- The build result ``build/cmake-build/host/libmicro.a``'s abi is host, if you want to run the model on micro controllers, you should build the code with the right toolchain, for example
For the bfloat16 model,
.. code-block:: sh
cd micro
export HEXAGON_SDK_ROOT=/home/user/Qualcomm/Hexagon_SDK/3.4.1
export HEXAGON_TOOLS=/home/user/Qualcomm/HEXAGON_Tools/6.4.06
./tools/cmake/cmake-build-hexagon6.sh
.. code-block:: yaml
5. Run the model on host.
data_type: bf16_fp32
.. code-block:: sh
For the int8 model,
CONF_FILE=/path/to/mace-models/micro-models/har-cnn/har-cnn.yml
# Run
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build
.. code-block:: yaml
# Test model run time
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --round=100
quantize: 1
quantize_schema: int8
# Required when your model has not quantize info
quantize_range_file: range_file_path
# Validate the correctness by comparing the results against the
# original model and framework, measured with cosine distance for similarity.
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate
# Validate the layers' correctness.
python tools/python/run_micro.py --config $CONF_FILE --model_name har_cnn --build --validate --layers 0:-1
Build MACE Micro and models libraries
--------------------------------------
Deploy your model into applications
Here, we build the MACE Micro engine and models to libraries on a linux host machine. The CMake build parameters depends on your model config file.
For float32 model,
.. code-block:: sh
./micro/tools/cmake/cmake-build-host.sh
For bfloat16 model,
.. code-block:: sh
./micro/tools/cmake/cmake-build-host.sh -DMACE_MICRO_ENABLE_BFLOAT16=ON
.. note::
You can only use either float32 or bfloat16
For int8 model,
.. code-block:: sh
./micro/tools/cmake/cmake-build-host.sh -DMACE_MICRO_ENABLE_CMSIS=ON
Use libraries directly
-----------------------
With these steps, we can find necessary libraries and headers in the "build/micro/host/install" directory, you can use the libraries directly.
.. code-block:: sh
# Builds example
g++ micro/examples/classifier/main.cc -DMICRO_MODEL_NAME=mnist -DMICRO_DATA_NAME=mnist -I build/micro/host/install/include/ -L build/micro/host/install/lib/ -lmicro -lmodels -lmicro -o mnist
# Runs the mnist example
./mnist
Code example
------------------------------------
Please refer to \ ``/mace/micro/tools/micro_run.cc`` for full usage. The following list the key steps.
The following code is the mnist example source files, which the main steps is annotated
.. code-block:: cpp
// Include the headers
#include "micro/include/public/micro.h"
// 1. Create MaceMicroEngine instance
MaceMicroEngine *micro_engine = nullptr;
MaceStatus status = har_cnn::GetMicroEngineSingleton(&micro_engine);
// 1. Create and register Input buffers
std::vector<std::shared_ptr<char>> inputs;
std::vector<int32_t> input_sizes;
for (size_t i = 0; i < input_shapes.size(); ++i) {
input_sizes.push_back(std::accumulate(input_shapes[i].begin(),
input_shapes[i].end(), sizeof(float),
std::multiplies<int32_t>()));
inputs.push_back(std::shared_ptr<char>(new char[input_sizes[i]],
std::default_delete<char[]>()));
}
// TODO: fill data into input buffers
for (size_t i = 0; i < input_names.size(); ++i) {
micro_engine->RegisterInputData(i, inputs[i].get(),
input_shapes[i].data());
#include "data/mnist.h"
#include <cstdio>
// Include MACE Micro header
#include "micro.h"
namespace micro {
namespace minst {
// We use forward declaration to avoid include the special engine header
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
}
} // namespace micro
// 3. Run the model
MaceStatus status = micro_engine->Run();
int main() {
// Step 1, get the mnist micro engine
micro::MaceMicroEngine *micro_engine = NULL;
micro::MaceStatus status =
micro::mnist::GetMicroEngineSingleton(&micro_engine);
// 4. Get the results
for (size_t i = 0; i < output_names.size(); ++i) {
void *output_buffer = nullptr;
const int32_t *output_dims = nullptr;
// Step 2, set input data
static float *input_data = data_mnist_4;
int32_t input_dims[4] = {1, 28, 28, 1};
micro_engine->RegisterInputData(0, input_data, input_dims);
// Step3, run the inference
micro_engine->Run();
// Step 4, get output data
float *output_buffer = NULL;
const int32_t *output_dims = NULL;
uint32_t dim_size = 0;
MaceStatus status =
micro_engine->GetOutputData(i, &output_buffer, &output_dims, &dim_size);
// TODO: the result data is in output_buffer, you can not delete output_buffer.
micro_engine->GetOutputData(
0, reinterpret_cast<void **>(&output_buffer), &output_dims, &dim_size);
for (int32_t i = 0; i < output_dims[1]; ++i) {
printf("%d: %f\n", i, output_buffer[i]);
}
return 0;
}
For more examples, goto the directory "micro/examples"
Performance
-----------
We deploy a `HAR-CNN <https://github.com/Shahnawax/HAR-CNN-Keras>`__ int8 model on the NUCLEO-F767ZI(Cortex-M7) board. Each inference of HAR CNN model takes 12 ms.
\ No newline at end of file
Deploy
======
MACE Micro module is written in c++98 and only depends on <cmath>.
We can write a CMake toolchain file to build the program for the special platform.
For Cortex-M MCU
----------------
Now we deploy the MNIST classifier example on a NUCLEO-F767ZI development with the Mbed OS.
Install a GCC Arm Embedded compiler by the terminal.
.. code-block:: sh
# Installs gcc arm
sudo apt-get install gcc-arm-none-eabi
Refer to <https://os.mbed.com/docs/mbed-os/v6.3/build-tools/install-and-set-up.html/> to install Mbed OS tools.
Now we can convert the model and build the program,
.. code-block:: sh
python3 tools/python/convert.py --config=micro/pretrained_models/keras/mnist/mnist-int8.yml --enable_micro
./micro/tools/cmake/cmake-build-gcc-arm-none-eabi.sh -DARM_CPU=cortex-m7 -DMACE_MICRO_ENABLE_CMSIS=ON -DMACE_MICRO_ENABLE_HARDFP=OFF
The "-DARM_CPU=cortex-{m7|m4|..}" is a necessary CMake variable for different series of Arm MCUs.
You can use the Mace Micro install package("build/micro/gcc-arm-none-eabi/install") in yourself project. Here we use "mbed-cli" to compile it
.. code-block:: sh
# cp the MACE Micro libraries to the workspace directory
cp build/micro/gcc-arm-none-eabi/install micro/examples/classifier -r
cd micro/examples/classifier
# Compile the program
mbed compile -t GCC_ARM -m NUCLEO_F767ZI -D MICRO_MODEL_NAME=mnist_int8 -D MICRO_DATA_NAME=mnist
# Flash the program to the development board
cp BUILD/NUCLEO_F767ZI/GCC_ARM/classifier.bin /media/$USER/NODE_F767ZI
# Connet to the default COM port
sudo chown $USER:$USER /dev/ttyACM0
mbed sterm
Press the reset(black) button to run the example again.
For Hexagon DSP
---------------
In the micro/cmake/toolchain folder, there are two hexagon CMake toolchain files for reference, For more details, please goto <https://developer.qualcomm.com/software/hexagon-dsp-sdk/dsp-processor/>
\ No newline at end of file
Operator lists
===============
Float32 and bfloat16 operators
* batch_norm
* conv_2d
* depthwise_conv_2d
* pooling
* activation
* argmax
* bias_add
* cast
* concat
* eltwise
* expand_dims
* matmul
* reduce
* reshape
* softmax
* squeeze
* stack
* stride_slice
Int8 operators
* conv_2d
* depthwsie_conv_2d
* eltwise
* mat_mul
* pooling
* softmax
* quantize
* dequantize
set(MACE_PROTO_PROTOS mace.proto)
set(MACE_PROTO_SRCS)
set(MACE_PROTO_HDRS)
set(MACE_PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto)
foreach(proto_file ${MACE_PROTO_PROTOS})
macro(generate_proto proto_file)
get_filename_component(proto_file_abs ${proto_file} ABSOLUTE)
get_filename_component(basename ${proto_file} NAME_WE)
set(PROTO_GENERATED_FILES ${basename}.pb.cc ${basename}.pb.h)
list(APPEND MACE_PROTO_SRCS ${basename}.pb.cc)
list(APPEND MACE_PROTO_HDRS ${basename}.pb.h)
set(${basename}_proto_files ${basename}.pb.cc ${basename}.pb.h)
set(${basename}_proto_srcs ${basename}.pb.cc)
add_custom_command(
OUTPUT ${PROTO_GENERATED_FILES}
OUTPUT ${basename}_proto_files
COMMAND ${PROTOC_BIN} --cpp_out ${CMAKE_CURRENT_BINARY_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
COMMENT "Generating ${PROTO_GENERATED_FILES} from ${proto_file}"
COMMENT "Generating ${basename}_proto_files from ${proto_file}"
DEPENDS protoc_bin
VERBATIM
)
set(PROTO_GENERATED_PY_FILES ${MACE_PROTO_PYTHON_DIR}/${basename}_pb2.py)
set(PROTO_PYTHON_DIR ${PROJECT_SOURCE_DIR}/tools/python/py_proto)
set(PROTO_GENERATED_PY_FILES ${PROTO_PYTHON_DIR}/${basename}_pb2.py)
add_custom_command(
OUTPUT ${PROTO_GENERATED_PY_FILES}
COMMAND ${PROTOC_BIN} --python_out ${MACE_PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
COMMAND ${PROTOC_BIN} --python_out ${PROTO_PYTHON_DIR} -I ${CMAKE_CURRENT_SOURCE_DIR} ${proto_file_abs}
COMMENT "Generating ${PROTO_GENERATED_PY_FILES} from ${proto_file}"
DEPENDS protoc_bin
VERBATIM
)
endforeach()
add_custom_target(mace_proto_src DEPENDS ${PROTO_GENERATED_FILES}
add_custom_target(${basename}_proto_cpp DEPENDS ${basename}_proto_files
COMMENT "Checking if re-generation is required")
add_custom_target(mace_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES})
add_custom_target(${basename}_proto_py ALL DEPENDS ${PROTO_GENERATED_PY_FILES})
endmacro()
generate_proto(mace.proto)
generate_proto(micro_mem.proto)
add_library(proto ${MACE_PROTO_SRCS})
add_library(proto ${mace_proto_srcs})
add_dependencies(proto mace_proto_cpp)
set_source_files_properties(
${mace_proto_srcs}
PROPERTIES GENERATED TRUE
)
target_link_libraries(proto libprotobuf_lite)
install(TARGETS proto ARCHIVE DESTINATION lib)
......@@ -16,6 +16,7 @@ enum DataType {
DT_FLOAT16 = 5;
DT_BFLOAT16 = 6;
DT_INT16 = 7;
DT_INT8 = 8;
}
enum MemoryType {
......
build
test/**/codegen
before_script:
- git submodule deinit -f .
- git submodule sync
- git submodule update --init .
stages:
- convert
- build
......@@ -6,23 +11,18 @@ stages:
model-convert:
stage: convert
script:
- rm -rf mace-models
- GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" git clone git@github.com:XiaoMi/mace-models.git
- >
- CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
- python tools/python/convert.py --config=${CONF_FILE} --enable_micro
- bash micro/tools/ci/model_convert.sh
artifacts:
paths:
- mace-models
untracked: true
host-build:
cross-build:
stage: build
script:
- cd micro && ./tools/cmake/cmake-build-host.sh -DMICRO_MODEL_NAME=har_cnn
- bash micro/tools/ci/cross_build.sh
- bash micro/tools/ci/host_build_and_run_examples.sh
- bash micro/tools/ci/host_build_and_run_tests.sh
# The mbed-cli protobuf version conflicts with others
# - bash micro/tools/ci/build_mbed_example.sh
host-test:
stage: test
script:
- CONF_FILE=mace-models/micro-models/har-cnn/har-cnn.yml
- python tools/python/run_micro.py --config $CONF_FILE --build --validate --model_name har_cnn
cmake_minimum_required(VERSION 3.7 FATAL_ERROR)
message("CMAKE_VERSION: ${CMAKE_VERSION}")
project(micro C CXX)
cmake_minimum_required(VERSION 3.13 FATAL_ERROR)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
project(micro C CXX ASM)
# CMSIS_5 requires C99
set(CMAKE_C_STANDARD 99)
add_compile_options("-Wall;-Wextra")
option(MACE_MICRO_ENABLE_CMSIS "Whether to enable cmsis driver" OFF)
option(MACE_MICRO_ENABLE_BFLOAT16 "Whether to enable bfloat16 support" OFF)
option(MACE_MICRO_ENABLE_TESTS "Whether to enable Mace Micro tests" OFF)
option(MACE_MICRO_ENABLE_EXAMPLES "Whether to enable Mace Micro examples" OFF)
if(MACE_MICRO_GCC_ARM)
include(cmake/config_gcc_arm.cmake)
endif()
#set CMAKE_BUILD_TYPE default value as Release
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release"
CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel."
FORCE)
endif()
if(MACE_MICRO_ENABLE_CMSIS)
function(compilerSpecificCompileOptions PROJECTNAME ROOT)
target_compile_options(${PROJECTNAME}
PRIVATE "-Wno-unused-parameter"
PRIVATE "-Wno-sign-compare"
PRIVATE "-Wno-strict-aliasing"
PRIVATE "-Wno-unused-variable"
)
endfunction()
set(ROOT ${CMAKE_CURRENT_SOURCE_DIR}/third_party/CMSIS_5)
include_directories(${ROOT}/CMSIS/Core/Include)
add_subdirectory(${ROOT}/CMSIS/DSP/Source EXCLUDE_FROM_ALL)
add_subdirectory(${ROOT}/CMSIS/NN/Source EXCLUDE_FROM_ALL)
target_include_directories(CMSISDSP INTERFACE ${ROOT}/CMSIS/Core/Include)
target_include_directories(CMSISNN INTERFACE ${ROOT}/CMSIS/Core/Include)
include_directories(third_party/CMSIS_5/CMSIS/Core/Include)
endif()
if(HEXAGON6)
# Does not work with "-O3"
......@@ -13,10 +53,7 @@ if(MACE_MICRO_ARM_NONE)
add_definitions(-DMACE_MICRO_ARM_NONE)
endif()
option(MACE_ENABLE_BFLOAT16 "Whether to enable bfloat16 support" OFF)
option(MACE_MICRO_ENABLE_TESTS "Whether to enable Mace Micro tests" ON)
if(MACE_ENABLE_BFLOAT16)
if(MACE_MICRO_ENABLE_BFLOAT16)
add_definitions(-DMACE_ENABLE_BFLOAT16)
endif()
......@@ -24,15 +61,12 @@ if(MACE_MICRO_NDEBUG)
add_definitions(-DMACE_MICRO_NDEBUG)
endif()
include(third_party/third_party.cmake)
add_subdirectory(include)
add_subdirectory(port)
add_subdirectory(base)
add_subdirectory(model)
add_subdirectory(framework)
add_subdirectory(ops)
add_subdirectory(codegen)
file(GLOB micro_base_srcs base/*.cc)
file(GLOB micro_codegen_models_srcs codegen/models/**/*.cc)
......@@ -41,15 +75,13 @@ file(GLOB micro_framework_srcs framework/*.cc)
file(GLOB micro_models_srcs model/*.cc)
file(GLOB micro_ops_nhwc_base_srcs ops/nhwc/base/*.cc)
file(GLOB micro_ops_nhwc_srcs ops/nhwc/*.cc)
file(GLOB micro_ops_nhwc_cmsis_nn_srcs ops/nhwc/cmsis_nn/*.cc)
file(GLOB micro_ops_srcs ops/*.cc)
file(GLOB micro_ops_utils_srcs ops/utils/*.cc)
file(GLOB micro_port_srcs port/*.cc)
# To build a single library
add_library(micro
list(APPEND micro_src
${micro_base_srcs}
${micro_codegen_models_srcs}
${micro_codegen_engines_srcs}
${micro_framework_srcs}
${micro_models_srcs}
${micro_ops_srcs}
......@@ -58,22 +90,56 @@ add_library(micro
${micro_ops_utils_srcs}
${micro_port_srcs}
)
target_include_directories(micro PUBLIC ..)
if(MACE_MICRO_ENABLE_CMSIS)
list(APPEND micro_src ${micro_ops_nhwc_cmsis_nn_srcs})
endif()
add_library(micro ${micro_src})
target_include_directories(micro PUBLIC .. PUBLIC include/public)
install(TARGETS micro
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)
install(FILES include/public/micro.h DESTINATION include)
if(MACE_MICRO_ENABLE_CMSIS)
target_link_libraries(micro PRIVATE CMSISNN)
install(TARGETS
CMSISNNReshape
CMSISNNBasicMaths
CMSISNNConcatenation
CMSISNNFullyConnected
CMSISNNConvolutions
CMSISNNActivation
CMSISNNPooling
CMSISNNSoftmax
CMSISNNSupport
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin
)
endif()
add_subdirectory(codegen)
if(HEXAGON OR HEXAGON_STUB)
include(cmake/find_hexagon_sdk.cmake)
endif()
if(NOT HEXAGON)
if(NOT HEXAGON AND MICRO_MODEL_NAME)
add_subdirectory(third_party/gflags EXCLUDE_FROM_ALL)
add_subdirectory(tools)
endif()
if(MACE_MICRO_ENABLE_TESTS)
add_subdirectory(third_party/googletest EXCLUDE_FROM_ALL)
add_subdirectory(test)
endif(MACE_MICRO_ENABLE_TESTS)
endif()
if(MACE_MICRO_ENABLE_EXAMPLES)
add_subdirectory(examples)
endif()
......@@ -30,7 +30,7 @@ const int32_t kInt8ValueBufferLength = 4;
const int32_t kFloatValueBufferLength = 21;
inline bool IsValidLogLevel(const LogLevel level) {
return level >= CLEAN && level < INVALID_MAX;
return level < INVALID_MAX;
}
char LogLevelToShortStr(LogLevel level) {
......
......@@ -18,6 +18,7 @@
#include <stdint.h>
#include "micro/include/public/micro.h"
#include "micro/include/port/define.h"
namespace micro {
......
......@@ -52,6 +52,35 @@ MACE_MAPPING_DATA_TYPE_AND_ENUM(int32_t, DT_INT32);
MACE_MAPPING_DATA_TYPE_AND_ENUM(BFloat16, DT_BFLOAT16);
#endif
struct QuantizeInfo {
float scale;
int32_t zero;
};
namespace ops {
namespace eltwise { // for redefine
enum Type {
SUM = 0,
SUB = 1,
PROD = 2,
DIV = 3,
MIN = 4,
MAX = 5,
NEG = 6,
ABS = 7,
SQR_DIFF = 8,
POW = 9,
EQUAL = 10,
FLOOR_DIV = 11,
CLIP = 12,
SIGN = 13,
NONE = 14,
};
} // namespace eltwise
} // namespace ops
} // namespace micro
#endif // MICRO_BASE_TYPES_H_
......@@ -105,5 +105,25 @@ float log(float x) {
return ::log(x);
}
template <typename T>
const T &max(const T &a, const T &b) {
return (a < b) ? b : a;
}
template <typename T>
const T &min(const T &a, const T &b) {
return (a < b) ? a : b;
}
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size) {
while (dim_size-- > 0) {
if (dims0[dim_size] != dims1[dim_size])
return false;
}
return true;
}
} // namespace base
} // namespace micro
......@@ -26,6 +26,8 @@ uint32_t strlen(const char *str);
int32_t strcmp(const char *str1, const char *str2);
void memcpy(void *dst, const void *src, uint32_t bytes);
int32_t GetShapeSize(uint32_t dim_size, const int32_t *dims);
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size);
float sqrt(float x);
int32_t ceil(float f);
int32_t floor(float f);
......
if(NOT ARM_CPU)
message(FATAL_ERROR "please set ARM_CPU, such as: -DARM_CPU=cortex-m4. We set -mcpu=${ARM_CPU}")
endif()
add_compile_options("-mcpu=${ARM_CPU};-mthumb")
add_compile_options("-ffunction-sections;-fdata-sections")
# floating-point ABI
option(MACE_MICRO_ENABLE_HARDFP "Whether to use hard float-point ABI" ON)
if(MACE_MICRO_ENABLE_HARDFP)
add_compile_options("-mfloat-abi=hard")
else()
add_compile_options("-mfloat-abi=softfp")
endif()
# FPU
if (ARM_CPU STREQUAL "cortex-m55" )
add_compile_options("-mfpu=fpv5-d16")
add_link_options("-mfpu=fpv5-d16")
endif()
if (ARM_CPU STREQUAL "cortex-m33" )
add_compile_options("-mfpu=fpv5-sp-d16")
add_link_options("-mfpu=fpv5-sp-d16")
endif()
if (ARM_CPU STREQUAL "cortex-m7" )
add_compile_options("-mfpu=fpv5-d16")
add_link_options("-mfpu=fpv5-d16")
endif()
if (ARM_CPU STREQUAL "cortex-m4" )
add_compile_options("-mfpu=fpv4-sp-d16")
add_link_options("-mfpu=fpv4-sp-d16")
endif()
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_C_COMPILER "${GCC_ARM_ROOT}/arm-none-eabi-gcc")
set(CMAKE_CXX_COMPILER "${GCC_ARM_ROOT}/arm-none-eabi-g++")
set(CMAKE_AR "${GCC_ARM_ROOT}/arm-none-eabi-ar" CACHE FILEPATH "Archiver")
set(CMAKE_LINKER "${GCC_ARM_ROOT}/arm-none-eabi-ld")
set(CMAKE_EXE_LINKER_FLAGS "--specs=nosys.specs" CACHE INTERNAL "")
set(MACE_MICRO_ARM_NONE ON)
set(CMAKE_SYSTEM_NAME Generic)
set(CMAKE_SYSTEM_PROCESSOR arm)
set(CMAKE_FIND_ROOT_PATH "${GCC_ARM_ROOT}")
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
find_program(CMAKE_C_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe)
find_program(CMAKE_CXX_COMPILER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe)
find_program(CMAKE_ASM_COMPILER NAMES arm-none-eabi-gcc arm-none-eabi-gcc.exe)
find_program(CMAKE_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe)
find_program(CMAKE_CXX_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe)
find_program(CMAKE_C_COMPILER_AR NAMES arm-none-eabi-gcc-ar arm-none-eabi-gcc-ar.exe)
find_program(CMAKE_LINKER NAMES arm-none-eabi-g++ arm-none-eabi-g++.exe)
find_program(ELF2BIN NAMES arm-none-eabi-objcopy arm-none-eabi-objcopy.exe)
# Force compiler settings
SET(CMAKE_C_COMPILER_WORKS TRUE)
SET(CMAKE_CXX_COMPILER_WORKS TRUE)
set(MACE_MICRO_GCC_ARM ON)
file(GLOB_RECURSE generated_models_srcs models *.cc)
add_library(generated_models
${generated_models_srcs}
)
target_link_libraries(generated_models
micro_framework
micro_include
micro_model
micro_ops
)
file(GLOB_RECURSE micro_engine_srcs engines micro_engine_factory.cc)
add_library(micro_engine
${micro_engine_srcs}
)
target_link_libraries(micro_engine
micro_framework
micro_model
micro_ops
generated_models
)
file(GLOB_RECURSE micro_engine_c_srcs engines micro_engine_c_interface.cc)
add_library(micro_engine_c
# Use ".keep.cc" as a source file when there are no model source files in "models" directory
add_library(models
${generated_models_srcs}
${micro_engine_srcs}
${micro_engine_c_srcs}
)
target_link_libraries(micro_engine_c
micro_engine
target_link_libraries(models
micro
)
install(TARGETS models
ARCHIVE DESTINATION lib
)
\ No newline at end of file
add_subdirectory(classifier)
\ No newline at end of file
mbed-os
BUILD
install
mbed_app.json
__pycache__
mbed_settings.py
\ No newline at end of file
TARGET=NUCLEO_F767ZI
ROOT=.
if(NOT MICRO_MODEL_NAME OR NOT MICRO_DATA_NAME)
message(FATAL_ERROR "MICRO_MODEL_NAME or MICRO_DATA_NAME is undefined")
endif()
add_executable(${MICRO_MODEL_NAME} main.cc)
target_compile_options(${MICRO_MODEL_NAME} PRIVATE "-Wno-error")
target_link_libraries(${MICRO_MODEL_NAME} micro models)
target_compile_definitions(${MICRO_MODEL_NAME} PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}")
target_compile_definitions(${MICRO_MODEL_NAME} PRIVATE "-DMICRO_DATA_NAME=${MICRO_DATA_NAME}")
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_EXAMPLES_CLASSIFIER_DATA_H_
#define MICRO_EXAMPLES_CLASSIFIER_DATA_H_
#include "data/har.h"
#include "data/kws.h"
#include "data/mnist.h"
#include "stdint.h"
namespace mnist {
const float *input = data_mnist_4;
const int32_t input_dims[4] = {1, 28, 28, 1};
} // namespace mnist
namespace har {
const float *input = data_har_standing;
const int32_t input_dims[4] = {1, 90, 3, 1};
} // namespace har
namespace kws {
const float *input = data_kws_yes;
const int32_t input_dims[4] = {1, 98, 40, 1};
} // namespace kws
#endif // MICRO_EXAMPLES_CLASSIFIER_DATA_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_EXAMPLES_DATA_HAR_H_
#define MICRO_EXAMPLES_DATA_HAR_H_
static float data_har_jogging[270] = {
5.012288, 11.264028, 0.95342433, -0.6946377, 12.680544, 0.50395286,
4.903325, 10.882658, -0.08172209, -0.61291564, 18.496431, 3.0237172,
-1.1849703, 12.108489, 7.205164, 1.3756552, -2.4925237, -6.510526,
-0.61291564, 10.56939, 5.706926, -0.50395286, 13.947236, 7.0553403,
-8.430995, 11.413852, 5.134871, 0.95342433, 1.3756552, 1.6480621,
-8.19945, 19.57244, 2.7240696, 1.4165162, 5.7886477, 2.982856,
-1.879608, -2.982856, -0.29964766, -6.1291566, 6.851035, -8.158588,
5.829509, 18.0061, 8.539958, 6.2789803, 2.982856, 2.9147544,
-1.56634, 8.308413, -1.4573772, 3.5276701, 13.593107, 9.425281,
-2.0294318, -5.706926, -10.18802, 2.7649305, 10.337844, -9.724928,
3.568531, 13.6748295, 1.5390993, -0.50395286, 3.8681788, 3.718355,
-2.3018389, 1.6889231, 0.08172209, -3.568531, 19.57244, 6.510526,
-0.8036005, -3.2961242, -4.630918, 0.50395286, 10.841797, 13.525005,
5.706926, 15.595298, 6.1700177, -8.662541, 7.273266, 4.0180025,
-1.334794, 1.2258313, 2.3699405, -4.5900574, 19.57244, 4.7126403,
3.8681788, 3.759216, 0.84446156, -1.7978859, 1.5390993, 8.730643,
7.668256, 11.264028, -1.3075534, -2.3699405, 14.2877445, 8.281172,
2.7240696, 1.4573772, 0.88532263, -3.5957718, 18.659876, -0.6537767,
3.9499009, 4.140586, 3.990762, 0.46309182, -2.4108016, 2.4108016,
3.7864566, 14.137921, -3.1463003, 3.336985, 19.231932, 6.5513873,
5.6660647, 3.7864566, 0.53119355, 0.23154591, 0.7627395, 0.7627395,
-4.8216033, 19.57244, 8.158588, 1.8387469, -1.1168685, -2.7921712,
-3.2961242, 10.079058, 13.824653, 11.604536, 17.079916, 1.334794,
-3.173541, 14.015338, 5.706926, 0.61291564, 1.1168685, 2.5606253,
-7.8861814, 19.57244, 1.9885708, 3.1463003, 5.243834, 4.671779,
-3.0237172, -4.3312707, -3.336985, -0.08172209, 11.917805, -7.8861814,
-1.0351465, 14.818938, 4.6036777, -2.4516625, 2.5333846, 3.486809,
-1.3756552, 2.070293, -0.19068487, -2.4925237, 19.57244, 6.469665,
1.4573772, -5.243834, -4.372132, -1.4165162, 9.80665, 5.7477865,
-1.2666923, 14.709975, 6.2108784, -3.6774938, 3.173541, 3.7864566,
1.8387469, 2.7649305, -1.7570249, -1.2666923, 19.313654, 6.3198414,
2.4108016, -7.6546354, -6.1291566, -0.61291564, 16.358038, 4.944186,
0.040861044, 17.502148, 2.5333846, -7.6546354, 7.8180795, 4.372132,
-1.2666923, 0.7218784, 0.8036005, -5.012288, 19.57244, 5.5162406,
1.9477097, 2.7921712, 2.070293, -5.053149, 1.6480621, 7.6273947,
9.384419, 13.443283, 1.0351465, -5.434519, 13.211738, 6.4424243,
-0.61291564, 1.879608, 1.4165162, 4.7126403, -6.5513873, -6.0201936,
-1.7570249, 9.302697, -6.428804, -0.9125633, 10.501288, -0.27240697,
2.6014864, 19.381754, 4.440233, 5.7886477, 3.214402, 1.1441092,
-1.9885708, 12.4489975, -2.7240696, 1.4165162, 16.780268, 8.471856,
0.42223078, -8.267551, -7.3549876, -3.568531, 10.95076, -0.8036005,
-4.671779, 11.727119, 0.38136974, -2.1383946, 1.6889231, 3.5276701,
-1.334794, 2.4925237, -0.3405087, -2.9147544, 19.57244, 7.5865335,
3.5276701, -3.9499009, -1.920469, -4.0588636, 10.038197, 14.2877445};
static float data_har_walking[270] = {
-0.99, 11.45, -3.0645783, 1.18, 14.94, -3.718355,
1.27, 13.82, -1.2258313, -0.15, 11.14, -2.1111538,
-1.38, 8.05, -0.84446156, -1.99, 5.94, 0.14982383,
-0.08, 4.94, 0.88532263, -0.27, 4.14, 2.2609777,
-3.26, 6.44, 4.1814466, -5.75, 13.02, 7.273266,
-2.37, 10.65, 8.008764, -0.46, 15.94, 0.7218784,
1.8, 6.13, -1.1168685, -4.75, 10.84, -3.0645783,
-1.46, 8.39, 0.88532263, 1.33, 7.78, -0.46309182,
-3.72, 8.47, -0.7218784, -3.72, 8.47, -0.7218784,
-1.88, 7.63, -0.08172209, -1.12, 9.3, -0.10896278,
-2.37, 10.95, -0.8036005, -4.06, 12.3, -0.7627395,
-3.41, 14.52, -0.7218784, 0.34, 12.22, -3.7864566,
0.76, 15.32, -2.6014864, -0.04, 13.53, -1.1849703,
-0.53, 9.72, -2.1792557, 0.11, 5.52, -1.6480621,
0.38, 4.06, 0.46309182, 0.04, 3.26, 0.14982383,
-3.34, 5.83, 4.862464, -6.05, 13.14, 7.668256,
-0.91, 11.14, 11.073342, -0.5, 16.13, -0.9125633,
-0.27, 7.7, -1.1849703, -3.45, 9.28, -2.1383946,
-2.03, 9.04, -0.53119355, 2.03, 6.89, -0.5720546,
-2.18, 7.5, -1.3756552, -1.8, 7.21, -0.0,
-1.57, 9.96, 0.08172209, -3.21, 12.07, -0.14982383,
-5.09, 12.22, -0.7627395, -2.68, 14.98, -3.173541,
1.99, 12.79, -3.2961242, 0.84, 14.82, -2.2609777,
0.69, 13.21, -2.2609777, -1.08, 9.15, -1.2258313,
-0.95, 4.9, -0.7627395, -0.11, 4.67, 0.19068487,
0.61, 3.49, 0.08172209, -1.84, 5.48, 5.134871,
-5.6, 14.06, 7.3958488, -1.08, 12.03, 8.308413,
1.73, 14.56, 2.9147544, -0.76, 5.94, -5.325556,
-5.6, 12.83, -0.0, 0.04, 6.66, -0.9942854,
1.65, 7.89, -0.6537767, -2.3, 7.93, -2.3426998,
-1.92, 8.24, -0.040861044, -1.42, 9.96, -0.14982383,
-3.72, 11.5, 0.14982383, -4.59, 12.18, -0.5720546,
-2.79, 14.25, -3.2961242, 3.15, 13.02, -3.1054392,
1.46, 14.94, -2.2201166, -2.22, 12.49, -2.1111538,
-1.42, 9.53, -1.607201, -0.11, 6.17, -0.8036005,
0.34, 4.71, 0.10896278, 1.04, 3.49, 0.53119355,
-1.99, 5.05, 3.255263, -6.66, 14.29, 7.082581,
-3.87, 10.04, 9.765789, -1.5, 18.39, -0.6946377,
2.37, 5.01, -0.5720546, -5.24, 10.76, -3.173541,
-1.46, 8.2, 0.53119355, 2.6, 6.97, -0.040861044,
-3.53, 8.85, -1.879608, -1.23, 7.06, -0.23154591,
-1.53, 11.3, 0.23154591, -2.53, 11.65, -0.6946377,
-3.83, 12.34, -0.50395286, -2.96, 13.25, -3.173541,
2.83, 13.25, -3.173541, 0.65, 14.41, -1.1441092,
-0.89, 11.8, -2.6014864, -1.18, 7.21, -1.334794};
static float data_har_standing[270] = {
3.17, 9.28, 1.1441092, 3.3, 9.23, 1.1168685,
3.21, 9.3, 1.1849703, 3.17, 9.28, 1.0760075,
3.17, 9.34, 1.1168685, 3.26, 9.28, 1.1168685,
3.21, 9.3, 1.1168685, 3.21, 9.23, 1.1168685,
3.17, 9.28, 1.1168685, 3.15, 9.28, 1.1849703,
3.17, 9.34, 1.1168685, 3.21, 9.28, 1.1849703,
3.21, 9.3, 1.0760075, 3.15, 9.34, 1.1168685,
3.21, 9.28, 1.0760075, 3.21, 9.34, 1.1441092,
3.26, 9.3, 1.1441092, 3.17, 9.34, 1.1168685,
3.21, 9.3, 1.1168685, 3.21, 9.28, 1.1168685,
3.26, 9.28, 1.1849703, 3.17, 9.3, 1.1168685,
3.21, 9.28, 1.1168685, -1.88, 9.85, -0.23154591,
-0.19, 9.92, -0.5720546, -0.61, 10.27, -0.88532263,
0.76, 10.57, -1.7570249, 0.42, 9.47, -1.1168685,
0.38, 9.47, -1.9477097, -1.04, 10.65, -1.525479,
-1.92, 9.51, -0.5720546, -1.31, 9.85, -0.53119355,
-0.08, 9.92, -1.7570249, 1.73, 9.77, -0.8036005,
1.5, 9.92, -1.4573772, 1.27, 10.5, -1.879608,
0.61, 10.12, -1.9885708, -0.53, 9.77, -1.879608,
-0.42, 9.62, -1.6480621, 0.65, 10.42, -2.2201166,
0.65, 10.42, -2.2201166, 1.61, 9.38, -1.8387469,
1.61, 9.43, -1.525479, 1.61, 9.43, -1.525479,
0.95, 10.27, -1.3075534, 0.19, 10.38, -1.1849703,
0.31, 9.81, -1.4165162, 1.12, 9.62, -1.6889231,
1.23, 9.85, -1.6480621, 1.04, 9.7, -1.8387469,
0.57, 9.89, -2.0294318, 0.65, 9.96, -1.9885708,
0.95, 9.96, -1.7570249, 1.42, 10, -1.7297841,
1.69, 9.89, -1.525479, 1.46, 10, -1.4165162,
0.69, 9.77, -1.6889231, 0.08, 9.96, -1.9477097,
-0.08, 10.19, -2.1111538, 0.38, 9.72, -1.9885708,
0.93, 10.12, -2.1111538, 1.33, 9.62, -1.9885708,
1.08, 9.85, -1.9477097, 0.8, 9.77, -1.7570249,
0.69, 10.34, -1.6889231, 0.72, 9.66, -1.3075534,
0.69, 10, -1.3756552, 0.93, 9.62, -1.4573772,
0.76, 10.12, -1.607201, 0.93, 9.72, -1.7978859,
0.76, 10.23, -1.9885708, 0.76, 9.23, -1.920469,
0.57, 10.34, -2.1383946, 0.99, 9.58, -1.879608,
1.33, 10.04, -1.7978859, 1.61, 9.85, -1.4165162,
0.61, 10.15, -0.88532263, 0.53, 9.58, -1.4573772,
0.15, 10.19, -1.920469, 0.34, 9.85, -1.334794,
0.8, 10.31, -1.7978859, 0.69, 9.53, -1.9477097,
0.8, 9.92, -1.879608, 0.5, 10.04, -1.1849703,
1.12, 9.43, -1.7978859, 1.31, 10.27, -1.2666923,
1.5, 9.77, -1.607201, 0.46, 10.04, -0.9125633,
0.31, 9.85, -1.0760075, 0.61, 10.19, -1.1849703};
#endif // MICRO_EXAMPLES_DATA_HAR_H_
此差异已折叠。
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_EXAMPLES_DATA_MNIST_H_
#define MICRO_EXAMPLES_DATA_MNIST_H_
// clang-format off
static float data_mnist_4[28*28] = {
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.8,0.4,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.5,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.7,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.1,0.8,1. ,0.6,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.1,1. ,1. ,0.5,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.7,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,0.8,0.1,0. ,0. ,0. ,0. ,0. ,0.6,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,1. ,0.8,0. ,0. ,0. ,0. ,0. ,0. ,0.7,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.4,1. ,0.8,0.1,0. ,0. ,0. ,0. ,0.2,1. ,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,1. ,0.9,0.7,0.5,0.6,0.2,0.6,1. ,1. ,0.6,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.5,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,1. ,0.8,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.5,0.5,0.5,0.5,0.5,0.9,1. ,0.6,0.1,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.9,0.9,0.1,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,0.9,1. ,0.3,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.3,1. ,0.9,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.8,1. ,0.2,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.8,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.2,1. ,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.1,0.9,1. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.,
0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0. ,0.
};
// clang-format on
#endif // MICRO_EXAMPLES_DATA_MNIST_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdio>
#include "data.h"
#include "micro.h"
namespace micro {
namespace MICRO_MODEL_NAME {
MaceStatus GetMicroEngineSingleton(MaceMicroEngine **engine);
}
} // namespace micro
int main() {
micro::MaceMicroEngine *micro_engine = NULL;
micro::MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine);
micro_engine->RegisterInputData(0, MICRO_DATA_NAME::input,
MICRO_DATA_NAME::input_dims);
micro_engine->Run();
float *output_buffer = NULL;
const int32_t *output_dims = NULL;
uint32_t dim_size = 0;
micro_engine->GetOutputData(0, reinterpret_cast<void **>(&output_buffer),
&output_dims, &dim_size);
int32_t output_total_size = 1;
for (int32_t i = 0; i < dim_size; ++i) {
output_total_size *= output_dims[i];
}
for (int32_t i = 0; i < output_total_size; ++i) {
printf("%d: %f\n", i, output_buffer[i]);
}
return 0;
}
https://github.com/ARMmbed/mbed-os/#0db72d0cf26539016efbe38f80d6f2cb7a3d4414
......@@ -74,6 +74,8 @@ uint32_t Operator::GetInputSize() {
}
const void *Operator::DoGetInputData(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
const void *data = NULL;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
......@@ -94,6 +96,8 @@ const void *Operator::DoGetInputData(uint32_t idx) {
}
uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
uint32_t dim_size = 0;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
......@@ -115,6 +119,8 @@ uint32_t Operator::GetInputShapeDimSize(uint32_t idx) {
}
const int32_t *Operator::GetInputShapeDims(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
const int32_t *dims = NULL;
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
......@@ -138,14 +144,20 @@ uint32_t Operator::GetOutputSize() {
}
DataType Operator::GetOutputDataType(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
return op_def_->output_type(idx);
}
void *Operator::DoGetOutputData(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
return engine_config_->tensor_mem_ + op_def_->mem_offset(idx);
}
uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
uint32_t dim_size = 0;
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
......@@ -156,6 +168,8 @@ uint32_t Operator::GetOutputShapeDimSize(uint32_t idx) {
}
const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
const int32_t *dims = NULL;
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
......@@ -167,6 +181,8 @@ const int32_t *Operator::GetOutputShapeDims(uint32_t idx) {
MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
const int32_t *dims) {
MACE_ASSERT(idx < GetOutputSize());
model::OutputShape *output_shape =
const_cast<model::OutputShape *>(op_context_->output_resize_shape(idx));
#ifndef MACE_MICRO_NDEBUG
......@@ -201,6 +217,44 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
return MACE_SUCCESS;
}
QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
MACE_ASSERT(idx < GetInputSize());
QuantizeInfo quantize_info = {0.0f, 0};
const OpIOInfo *input_info = op_context_->input_info(idx);
const uint32_t op_def_idx = input_info->op_def_idx_;
if (kIdxConstTensor == op_def_idx) {
const model::ConstTensor *const_tensor =
engine_config_->net_def_->tensor(input_info->output_idx_);
quantize_info.scale = const_tensor->scale();
quantize_info.zero = const_tensor->zero_point();
return quantize_info;
} else if (kIdxModelInput == op_def_idx) {
MACE_ASSERT1(false, "Unexpected, the model input has no quantize info");
} else {
const model::OperatorDef *pre_op_def =
engine_config_->net_def_->op(op_def_idx);
model::QuantizeActivationInfo quantize_activation_info =
pre_op_def->quantize_info(input_info->output_idx_);
quantize_info.scale = quantize_activation_info.scale();
quantize_info.zero = quantize_activation_info.zero_point();
return quantize_info;
}
return quantize_info;
}
QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) {
MACE_ASSERT(idx < GetOutputSize());
QuantizeInfo quantize_info;
model::QuantizeActivationInfo quantize_activation_info =
op_def_->quantize_info(idx);
quantize_info.scale = quantize_activation_info.scale();
quantize_info.zero = quantize_activation_info.zero_point();
return quantize_info;
}
#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
......
......@@ -17,8 +17,8 @@
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/include/public/micro.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/include/public/micro.h"
namespace micro {
......@@ -84,6 +84,9 @@ class Operator {
const int32_t *input_dims);
MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
QuantizeInfo GetInputQuantizeInfo(uint32_t idx);
QuantizeInfo GetOutputQuantizeInfo(uint32_t idx);
template<typename T>
const T *GetInputData(uint32_t idx) {
return static_cast<const T *>(DoGetInputData(idx));
......@@ -101,7 +104,7 @@ class Operator {
const model::OperatorDef *op_def_;
MaceMicroEngineConfig *engine_config_;
private:
protected:
OpContext *op_context_;
};
......
......@@ -17,7 +17,9 @@
#include <stdint.h>
#include "micro/include/port/define.h"
#ifndef NULL
#define NULL 0
#endif
namespace micro {
......@@ -61,7 +63,7 @@ class Graph;
class Operator;
} // namespace framework
struct MACE_API MaceMicroEngineConfig {
struct MaceMicroEngineConfig {
model::NetDef *net_def_;
const uint8_t *model_data_;
framework::Graph *graph_;
......@@ -73,7 +75,7 @@ struct MACE_API MaceMicroEngineConfig {
uint32_t scratch_buffer_size_;
};
class MACE_API MaceMicroEngine {
class MaceMicroEngine {
public:
MaceMicroEngine() {}
~MaceMicroEngine() {}
......
......@@ -32,7 +32,14 @@ union Sphinx {
class BFloat16 {
public:
BFloat16();
BFloat16() {}
explicit BFloat16(float value) { data_ = Sphinx(value).i >> 16; }
explicit BFloat16(int value) {
data_ = Sphinx(static_cast<float>(value)).i >> 16;
}
operator float() const {
return Sphinx(static_cast<uint32_t>(data_ << 16)).f;
......
......@@ -23,11 +23,22 @@ MACE_DEFINE_STRING_FUNC(OperatorDef, name, name_)
MACE_DEFINE_STRING_FUNC(OperatorDef, type, type_)
MACE_DEFINE_OBJECT_FUNC(OperatorDef, int32_t, device_type)
MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, Argument, arg, args_)
MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef, OutputShape,
output_shape, output_shapes_)
MACE_DEFINE_PTR_ARRAY_FUNC(OperatorDef,
OutputShape,
output_shape,
output_shapes_)
MACE_DEFINE_ARRAY_FUNC(OperatorDef, DataType, output_type, output_types_)
MACE_DEFINE_ARRAY_FUNC(OperatorDef,
QuantizeActivationInfo,
quantize_info,
quantize_info_);
// the mem_offset is the mem_id in proto file
MACE_DEFINE_ARRAY_FUNC(OperatorDef, int32_t, mem_offset, mem_offsets_)
MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, scale);
MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, int32_t, zero_point);
MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, minval);
MACE_DEFINE_OBJECT_FUNC(QuantizeActivationInfo, float, maxval);
} // namespace model
} // namespace micro
......@@ -23,6 +23,22 @@
namespace micro {
namespace model {
class QuantizeActivationInfo {
public:
MACE_DEFINE_HARD_CODE_MAGIC(QuantizeActivationInfo)
MACE_DECLARE_OBJECT_FUNC(float, scale);
MACE_DECLARE_OBJECT_FUNC(int32_t, zero_point);
MACE_DECLARE_OBJECT_FUNC(float, minval);
MACE_DECLARE_OBJECT_FUNC(float, maxval);
private:
SerialFloat scale_;
SerialInt32 zero_point_;
SerialFloat minval_;
SerialFloat maxval_;
};
class OperatorDef : public Serialize {
public:
MACE_DEFINE_HARD_CODE_MAGIC(OperatorDef)
......@@ -35,6 +51,7 @@ class OperatorDef : public Serialize {
MACE_DECLARE_PTR_ARRAY_FUNC(Argument, arg);
MACE_DECLARE_PTR_ARRAY_FUNC(OutputShape, output_shape);
MACE_DECLARE_ARRAY_FUNC(DataType, output_type);
MACE_DECLARE_ARRAY_FUNC(QuantizeActivationInfo, quantize_info);
// the mem_offset is the mem_id in proto file
MACE_DECLARE_ARRAY_FUNC(int32_t, mem_offset);
......@@ -48,6 +65,7 @@ class OperatorDef : public Serialize {
SerialArray<Argument> args_;
SerialArray<OutputShape> output_shapes_;
SerialArray<DataType> output_types_;
SerialArray<QuantizeActivationInfo> quantize_info_;
SerialArray<SerialInt32> mem_offsets_;
};
......
set(MICRO_OPS_SRCS
shape.cc
reduce.cc
reshape.cc
matmul.cc
nhwc/depthwise_conv_2d_ref.cc
nhwc/conv_2d_c4_s4.cc
......@@ -31,12 +30,13 @@ set(MICRO_OPS_SRCS
activation.cc
)
add_subdirectory(nhwc)
add_library(micro_ops
${MICRO_OPS_SRCS}
)
target_link_libraries(micro_ops
micro_base
micro_framework
PRIVATE micro_base
)
......
......@@ -19,14 +19,6 @@
namespace micro {
namespace ops {
namespace eltwise {
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size) {
while (--dim_size > 0) {
if (dims0[dim_size] != dims1[dim_size])
return false;
}
return true;
}
int32_t GetIndex(const int32_t *shape,
const int32_t *index, int32_t dim_size) {
......
......@@ -19,31 +19,13 @@
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/base/types.h"
namespace micro {
namespace ops {
namespace eltwise { // for redefine
enum Type {
SUM = 0,
SUB = 1,
PROD = 2,
DIV = 3,
MIN = 4,
MAX = 5,
NEG = 6,
ABS = 7,
SQR_DIFF = 8,
POW = 9,
EQUAL = 10,
FLOOR_DIV = 11,
CLIP = 12,
SIGN = 13,
NONE = 14,
};
namespace eltwise {
bool ShapeIsEqual(const int32_t *dims0,
const int32_t *dims1, uint32_t dim_size);
int32_t GetIndex(const int32_t *shape, const int32_t *index, int32_t dim_size);
void IncreaseIndex(const int32_t *shape, int32_t **index, int32_t dim_size);
template<typename T>
......@@ -202,9 +184,8 @@ class EltwiseOp : public framework::Operator {
if (input1_size == 1) {
TensorScalarEltwise(type_, input0_, input1_[0],
input0_size, swapped, output_ptr);
} else if (eltwise::ShapeIsEqual(input0_dims_,
input1_shape,
input0_dim_size_)) {
} else if (base::ShapeIsEqual(input0_dims_, input1_shape,
input0_dim_size_)) {
TensorEltwise(type_, input0_, input1_, input0_size,
swapped, output_ptr);
} else if (need_general_broadcast) {
......
......@@ -40,10 +40,8 @@ class MatMulOp : public framework::Operator {
uint32_t input_b_dim_size_;
const mifloat *bias_;
#ifndef MACE_MICRO_NDEBUG
const int32_t *bias_dims_;
uint32_t bias_dim_size_;
#endif
mifloat *output_;
......
if(MACE_MICRO_ENABLE_CMSIS)
add_subdirectory(cmsis_nn)
endif()
add_library(micro_ops_nhwc_cmsis_nn
arm_conv_2d_int8.cc
arm_pooling_int8.cc
arm_softmax_int8.cc
arm_mat_mul_int8.cc
arm_eltwise_int8.cc
arm_depthwise_conv_2d_int8.cc
dequantize.cc
quantize.cc
utilities.cc
)
target_link_libraries(micro_ops_nhwc_cmsis_nn
PRIVATE micro_base
PRIVATE CMSISNN
)
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h"
#include <arm_nnfunctions.h>
#include "micro/base/logger.h"
#include "micro/framework/op_context.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/model/const_tensor.h"
#include "micro/model/net_def.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
MaceStatus ArmConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
MACE_ASSERT(filter_dims_[0] == output_dims[3] &&
input_dims_[3] == filter_dims_[3]);
QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
QuantizeInfo filter_quantize_info = GetInputQuantizeInfo(FILTER);
QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
double double_multiplier = input_quantize_info.scale *
filter_quantize_info.scale /
output_quantize_info.scale;
int32_t multiplier;
int32_t shift;
QuantizeMultiplier(double_multiplier, &multiplier, &shift);
cmsis_nn_conv_params conv_params;
/// input_offset is negative
conv_params.input_offset = -input_quantize_info.zero;
conv_params.output_offset = output_quantize_info.zero;
conv_params.activation.min = -128;
conv_params.activation.max = 127;
conv_params.stride.w = strides_[1];
conv_params.stride.h = strides_[0];
conv_params.padding.w = padding_sizes_[1] / 2;
conv_params.padding.h = padding_sizes_[0] / 2;
conv_params.dilation.w = dilations_[1];
conv_params.dilation.h = dilations_[0];
ScratchBuffer scratch_buffer(engine_config_);
cmsis_nn_per_channel_quant_params quant_params;
quant_params.multiplier = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
quant_params.shift = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
for (int32_t i = 0; i < output_dims[3]; ++i) {
quant_params.multiplier[i] = multiplier;
quant_params.shift[i] = shift;
}
MACE_ASSERT(input_dims_[0] == 1);
MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
input_dims.w = input_dims_[2];
input_dims.c = input_dims_[3];
const int8_t *input_data = reinterpret_cast<const int8_t *>(input_);
cmsis_nn_dims filter_dims;
filter_dims.n = filter_dims_[0];
filter_dims.h = filter_dims_[1];
filter_dims.w = filter_dims_[2];
filter_dims.c = filter_dims_[3];
const int8_t *filter_data = reinterpret_cast<const int8_t *>(filter_);
cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_dims[3];
int32_t *bias_data =
const_cast<int32_t *>(reinterpret_cast<const int32_t *>(bias_));
if (bias_data == NULL) {
bias_data = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
for (int32_t i = 0; i < bias_dims.c; ++i) {
bias_data[i] = 0;
}
}
cmsis_nn_dims cmn_output_dims;
cmn_output_dims.n = output_dims[0];
cmn_output_dims.h = output_dims[1];
cmn_output_dims.w = output_dims[2];
cmn_output_dims.c = output_dims[3];
int8_t *output_data = reinterpret_cast<int8_t *>(output_);
cmsis_nn_context cmn_context;
cmn_context.size = arm_convolve_wrapper_s8_get_buffer_size(
&conv_params, &input_dims, &filter_dims, &cmn_output_dims);
if (cmn_context.size > 0) {
cmn_context.buf = scratch_buffer.GetBuffer<int8_t>(cmn_context.size);
} else {
cmn_context.buf = NULL;
}
arm_status status = arm_convolve_wrapper_s8(
&cmn_context, &conv_params, &quant_params, &input_dims, input_data,
&filter_dims, filter_data, &bias_dims, bias_data, &cmn_output_dims,
output_data);
MACE_ASSERT(status == ARM_MATH_SUCCESS)
<< "failed in arm_convolve_wrapper_s8";
return MACE_SUCCESS;
}
MaceStatus ArmConv2dInt8Op::Run() {
int32_t output_dims[4] = {0};
InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims);
ResizeOutputShape(0, 4, output_dims);
MACE_RETURN_IF_ERROR(Compute(output_dims));
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_
#define MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_
#include "micro/ops/nhwc/base/conv_2d_base.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class ArmConv2dInt8Op : public Conv2dBase {
public:
virtual MaceStatus Run();
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_CONV_2D_INT8_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h"
#include <arm_nnfunctions.h>
#include "micro/base/logger.h"
#include "micro/framework/op_context.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/model/const_tensor.h"
#include "micro/model/net_def.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
MaceStatus ArmDepthwiseConv2dInt8Op::Compute(int32_t (&output_dims)[4]) {
QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
QuantizeInfo filter_quantize_info = GetInputQuantizeInfo(FILTER);
QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
double double_multiplier = input_quantize_info.scale *
filter_quantize_info.scale /
output_quantize_info.scale;
int32_t multiplier;
int32_t shift;
QuantizeMultiplier(double_multiplier, &multiplier, &shift);
cmsis_nn_dw_conv_params dw_conv_params;
dw_conv_params.ch_mult = filter_dims_[0];
/// input_offset is negative
dw_conv_params.input_offset = -input_quantize_info.zero;
dw_conv_params.output_offset = output_quantize_info.zero;
dw_conv_params.activation.min = -128;
dw_conv_params.activation.max = 127;
dw_conv_params.stride.w = strides_[1];
dw_conv_params.stride.h = strides_[0];
dw_conv_params.padding.w = padding_sizes_[1] / 2;
dw_conv_params.padding.h = padding_sizes_[0] / 2;
dw_conv_params.dilation.w = dilations_[1];
dw_conv_params.dilation.h = dilations_[0];
ScratchBuffer scratch_buffer(engine_config_);
cmsis_nn_per_channel_quant_params quant_params;
quant_params.multiplier = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
quant_params.shift = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
for (int32_t i = 0; i < output_dims[3]; ++i) {
quant_params.multiplier[i] = multiplier;
quant_params.shift[i] = shift;
}
MACE_ASSERT(input_dims_[0] == 1);
MACE_ASSERT(filter_dims_[0] == 1);
MACE_ASSERT(dilations_[0] == 1 && dilations_[1] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
input_dims.w = input_dims_[2];
input_dims.c = input_dims_[3];
const int8_t *input_data = reinterpret_cast<const int8_t *>(input_);
cmsis_nn_dims filter_dims;
filter_dims.n = filter_dims_[0];
filter_dims.h = filter_dims_[1];
filter_dims.w = filter_dims_[2];
filter_dims.c = filter_dims_[3];
const int8_t *filter_data = reinterpret_cast<const int8_t *>(filter_);
cmsis_nn_dims bias_dims;
bias_dims.n = 1;
bias_dims.h = 1;
bias_dims.w = 1;
bias_dims.c = output_dims[3];
int32_t *bias_data =
const_cast<int32_t *>(reinterpret_cast<const int32_t *>(bias_));
if (bias_data == NULL) {
bias_data = scratch_buffer.GetBuffer<int32_t>(output_dims[3]);
for (int32_t i = 0; i < bias_dims.c; ++i) {
bias_data[i] = 0;
}
}
cmsis_nn_dims cmn_output_dims;
cmn_output_dims.n = output_dims[0];
cmn_output_dims.h = output_dims[1];
cmn_output_dims.w = output_dims[2];
cmn_output_dims.c = filter_dims.c * filter_dims.n;
int8_t *output_data = reinterpret_cast<int8_t *>(output_);
cmsis_nn_context cmn_context;
cmn_context.size = arm_depthwise_conv_wrapper_s8_get_buffer_size(
&dw_conv_params, &input_dims, &filter_dims, &cmn_output_dims);
if (cmn_context.size > 0) {
cmn_context.buf = scratch_buffer.GetBuffer<int8_t>(cmn_context.size);
} else {
cmn_context.buf = NULL;
}
arm_status status = arm_depthwise_conv_wrapper_s8(
&cmn_context, &dw_conv_params, &quant_params, &input_dims, input_data,
&filter_dims, filter_data, &bias_dims, bias_data, &cmn_output_dims,
output_data);
MACE_ASSERT(status == ARM_MATH_SUCCESS)
<< "failed in arm_convolve_wrapper_s8";
return MACE_SUCCESS;
}
MaceStatus ArmDepthwiseConv2dInt8Op::Run() {
int32_t output_dims[4] = {0};
InitPaddingAndOutputSize(input_dims_, filter_dims_, FLOOR, output_dims);
output_dims[3] *= input_dims_[3];
ResizeOutputShape(0, 4, output_dims);
MACE_RETURN_IF_ERROR(Compute(output_dims));
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_
#define MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_
#include "micro/ops/nhwc/base/depthwise_conv_2d_base.h"
#include "micro/ops/utils/activation.h"
namespace micro {
namespace ops {
class ArmDepthwiseConv2dInt8Op : public DepthwiseConv2dBase {
public:
virtual MaceStatus Run();
private:
MaceStatus Compute(int32_t (&output_dims)[4]);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_DEPTHWISE_CONV_2D_INT8_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h"
#include <arm_nnfunctions.h>
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
MaceStatus ArmEltwiseInt8Op::OnInit() {
MACE_ASSERT(GetInputSize() == 2);
input0_ = GetInputData<int8_t>(INPUT0);
input0_dims_ = GetInputShapeDims(INPUT0);
input0_dim_size_ = GetInputShapeDimSize(INPUT0);
input1_ = GetInputData<int8_t>(INPUT1);
input1_dims_ = GetInputShapeDims(INPUT1);
input1_dim_size_ = GetInputShapeDimSize(INPUT1);
output_ = GetOutputData<int8_t>(OUTPUT);
type_ = static_cast<eltwise::Type>(
GetArgByName("type", static_cast<int32_t>(NONE)));
coeff_ = GetRepeatArgByName<float>("coeff", &coeff_size_);
return MACE_SUCCESS;
}
MaceStatus ArmEltwiseInt8Op::Run() {
MACE_ASSERT1(GetInputSize() == 2,
"ArmEltwiseInt8Op only supports 2 inputs");
MACE_ASSERT(input0_dim_size_ == input1_dim_size_);
MACE_ASSERT(base::ShapeIsEqual(input0_dims_, input1_dims_, input1_dim_size_));
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, input0_dim_size_, input0_dims_));
if (type_ == eltwise::SUM) {
QuantizeInfo input_quantize_info0 = GetInputQuantizeInfo(0);
QuantizeInfo input_quantize_info1 = GetInputQuantizeInfo(1);
QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
int32_t input0_offset = -input_quantize_info0.zero;
double input0_scale = input_quantize_info0.scale;
int32_t input1_offset = -input_quantize_info1.zero;
double input1_scale = input_quantize_info1.scale;
int32_t output_offset = output_quantize_info.zero;
double output_scale = output_quantize_info.scale;
int32_t left_shift = 20;
const double twice_max_input_scale =
2 * static_cast<double>(base::max(input0_scale, input1_scale));
const double real_input0_multiplier =
static_cast<double>(input0_scale) / twice_max_input_scale;
const double real_input1_multiplier =
static_cast<double>(input1_scale) / twice_max_input_scale;
const double real_output_multiplier =
twice_max_input_scale /
((1 << left_shift) * static_cast<double>(output_scale));
int32_t input0_multiplier = 0;
int32_t input0_shift = 0;
QuantizeMultiplier(real_input0_multiplier, &input0_multiplier,
&input0_shift);
int32_t input1_multiplier = 0;
int32_t input1_shift = 0;
QuantizeMultiplier(real_input1_multiplier, &input1_multiplier,
&input1_shift);
int32_t output_multiplier = 0;
int32_t output_shift = 0;
QuantizeMultiplier(real_output_multiplier, &output_multiplier,
&output_shift);
int32_t element_size = base::GetShapeSize(input0_dim_size_, input0_dims_);
arm_elementwise_add_s8(input0_, input1_, input0_offset, input0_multiplier,
input0_shift, input1_offset, input1_multiplier,
input1_shift, left_shift, output_, output_offset,
output_multiplier, output_shift, -128, 127,
element_size);
} else {
MACE_ASSERT1(false, "Unsupported ArmEltwiseInt8Op type");
}
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_
#define MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_
#include "micro/base/logger.h"
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/framework/op_context.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/model/const_tensor.h"
#include "micro/model/net_def.h"
namespace micro {
namespace ops {
class ArmEltwiseInt8Op : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const int8_t *input0_;
const int32_t *input0_dims_;
uint32_t input0_dim_size_;
const int8_t *input1_;
const int32_t *input1_dims_;
uint32_t input1_dim_size_;
int8_t *output_;
eltwise::Type type_;
const float *coeff_;
uint32_t coeff_size_;
int32_t scalar_input_index_;
bool nchw_;
MACE_OP_INPUT_TAGS(INPUT0, INPUT1);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_ELTWISE_INT8_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h"
#include <arm_nnfunctions.h>
#include "micro/base/logger.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/op_context.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/model/argument.h"
#include "micro/model/const_tensor.h"
#include "micro/model/net_def.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
MaceStatus ArmMatMulInt8Op::OnInit() {
transpose_a_ = GetArgByName("transpose_a", false);
transpose_b_ = GetArgByName("transpose_b", false);
input_a_ = GetInputData<int8_t>(INPUT_A);
input_b_ = GetInputData<int8_t>(INPUT_B);
output_ = GetOutputData<int8_t>(OUTPUT);
if (GetInputSize() >= 3) {
bias_ = GetInputData<int32_t>(BIAS);
bias_dim_size_ = GetInputShapeDimSize(BIAS);
bias_dims_ = GetInputShapeDims(BIAS);
} else {
bias_ = NULL;
bias_dim_size_ = 0;
bias_dims_ = NULL;
}
input_a_dim_size_ = GetInputShapeDimSize(INPUT_A);
input_b_dim_size_ = GetInputShapeDimSize(INPUT_B);
input_a_dims_ = GetInputShapeDims(INPUT_A);
input_b_dims_ = GetInputShapeDims(INPUT_B);
return MACE_SUCCESS;
}
MaceStatus ArmMatMulInt8Op::Run() {
MACE_ASSERT(Validate());
MACE_ASSERT(input_a_dim_size_ == 2);
MACE_ASSERT(input_b_dim_size_ == 2);
MACE_ASSERT(input_a_dims_[0] == 1);
MACE_ASSERT(transpose_b_);
MACE_ASSERT(!transpose_a_);
const int32_t lhs_rows = input_a_dims_[0];
const int32_t rhs_rows = input_b_dims_[0];
const int32_t rhs_cols = input_b_dims_[1];
const int32_t rhs_t_cols = rhs_rows;
const int32_t rows = lhs_rows;
const int32_t cols = rhs_t_cols;
if (bias_ != NULL) {
MACE_ASSERT(bias_dim_size_ == 1);
MACE_ASSERT(bias_dims_[0] == cols);
}
int32_t *output_dims0 =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(input_a_dim_size_);
output_dims0[0] = rows;
output_dims0[1] = cols;
MACE_RETURN_IF_ERROR(
ResizeOutputShape(OUTPUT, input_a_dim_size_, output_dims0));
QuantizeInfo input_quantize_info_a = GetInputQuantizeInfo(INPUT_A);
QuantizeInfo input_quantize_info_b = GetInputQuantizeInfo(INPUT_B);
QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
double double_multiplier = input_quantize_info_a.scale *
input_quantize_info_b.scale /
output_quantize_info.scale;
int32_t multiplier;
int32_t shift;
QuantizeMultiplier(double_multiplier, &multiplier, &shift);
ScratchBuffer scratch_buffer(engine_config_);
int32_t *bias = NULL;
if (bias_ == NULL) {
bias = scratch_buffer.GetBuffer<int32_t>(cols);
for (int32_t i = 0; i < cols; ++i) {
bias[i] = 0;
}
} else {
bias = const_cast<int32_t *>(bias_);
}
arm_status status = arm_nn_vec_mat_mult_t_s8(
input_a_, input_b_, bias, output_, -input_quantize_info_a.zero,
input_quantize_info_b.zero, output_quantize_info.zero, multiplier, shift,
rhs_cols, rhs_rows, -128, 127);
MACE_ASSERT(status == ARM_MATH_SUCCESS);
return MACE_SUCCESS;
}
bool ArmMatMulInt8Op::Validate() {
const int32_t lhs_rank = input_a_dim_size_;
const int32_t rhs_rank = input_b_dim_size_;
if (input_a_dim_size_ == input_b_dim_size_) {
for (uint32_t i = 0; i < input_a_dim_size_ - 2; ++i) {
MACE_ASSERT1(input_a_dims_[i] == input_b_dims_[i],
"batch dimensions are not equal");
}
} else {
MACE_ASSERT1(input_a_dim_size_ == 2 || input_b_dim_size_ == 2,
"Either lhs or rhs matrix should has rank 2 "
"for non-batched matrix multiplication");
}
int32_t lhs_depth =
transpose_a_ ? input_a_dims_[lhs_rank - 2] : input_a_dims_[lhs_rank - 1];
int32_t rhs_depth =
transpose_b_ ? input_b_dims_[rhs_rank - 1] : input_b_dims_[rhs_rank - 2];
if (lhs_depth != rhs_depth) {
MACE_ASSERT1(false, "the number of A's column must be equal to B's row ");
return false;
}
return true;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_
#define MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class ArmMatMulInt8Op : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
bool Validate();
private:
const int8_t *input_a_;
const int32_t *input_a_dims_;
uint32_t input_a_dim_size_;
const int8_t *input_b_;
const int32_t *input_b_dims_;
uint32_t input_b_dim_size_;
const int32_t *bias_;
const int32_t *bias_dims_;
uint32_t bias_dim_size_;
int8_t *output_;
bool transpose_a_;
bool transpose_b_;
MACE_OP_INPUT_TAGS(INPUT_A, INPUT_B, BIAS);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_MAT_MUL_INT8_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h"
#include <arm_nnfunctions.h>
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
#include "micro/include/utils/macros.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
void ArmPoolingInt8Op::MaxPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
MACE_UNUSED(dilation_hw);
cmsis_nn_context ctx;
ctx.buf = NULL;
ctx.size = 0;
cmsis_nn_pool_params pool_params;
pool_params.activation.min = -128;
pool_params.activation.max = 127;
pool_params.stride.h = stride_hw[0];
pool_params.stride.w = stride_hw[1];
pool_params.padding.h = pad_hw[0];
pool_params.padding.w = pad_hw[1];
MACE_ASSERT(input_dims_[0] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
input_dims.w = input_dims_[2];
input_dims.c = input_dims_[3];
const int8_t *input_data = reinterpret_cast<const int8_t *>(input);
cmsis_nn_dims filter_dims;
filter_dims.h = filter_hw[0];
filter_dims.w = filter_hw[1];
cmsis_nn_dims output_dims;
output_dims.n = output_dims_[0];
output_dims.h = output_dims_[1];
output_dims.w = output_dims_[2];
output_dims.c = output_dims_[3];
int8_t *output_data = reinterpret_cast<int8_t *>(output_);
arm_max_pool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
&output_dims, output_data);
}
void ArmPoolingInt8Op::AvgPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw) {
MACE_UNUSED(dilation_hw);
const int32_t out_width = output_dims_[2];
const int32_t in_channels = input_dims_[3];
cmsis_nn_context ctx;
ctx.size = arm_avgpool_s8_get_buffer_size(out_width, in_channels);
ScratchBuffer scratch_buffer(engine_config_);
if (ctx.size > 0) {
ctx.buf = scratch_buffer.GetBuffer<int8_t>(ctx.size);
} else {
ctx.buf = NULL;
}
cmsis_nn_pool_params pool_params;
pool_params.activation.min = -128;
pool_params.activation.max = 127;
pool_params.stride.h = stride_hw[0];
pool_params.stride.w = stride_hw[1];
pool_params.padding.h = pad_hw[0];
pool_params.padding.w = pad_hw[1];
MACE_ASSERT(input_dims_[0] == 1);
cmsis_nn_dims input_dims;
input_dims.n = input_dims_[0];
input_dims.h = input_dims_[1];
input_dims.w = input_dims_[2];
input_dims.c = input_dims_[3];
const int8_t *input_data = reinterpret_cast<const int8_t *>(input);
cmsis_nn_dims filter_dims;
filter_dims.h = filter_hw[0];
filter_dims.w = filter_hw[1];
cmsis_nn_dims output_dims;
output_dims.n = output_dims_[0];
output_dims.h = output_dims_[1];
output_dims.w = output_dims_[2];
output_dims.c = output_dims_[3];
int8_t *output_data = reinterpret_cast<int8_t *>(output_);
arm_avgpool_s8(&ctx, &pool_params, &input_dims, input_data, &filter_dims,
&output_dims, output_data);
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_
#define MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_
#include "micro/model/output_shape.h"
#include "micro/ops/nhwc/base/pooling_base.h"
namespace micro {
namespace ops {
class ArmPoolingInt8Op : public PoolingBase {
private:
void MaxPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw);
void AvgPooling(const mifloat *input,
const int32_t *filter_hw,
const int32_t *stride_hw,
const int32_t *dilation_hw,
const int32_t *pad_hw);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_POOLING_INT8_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h"
#include <arm_nnfunctions.h>
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/op_context.h"
#include "micro/model/net_def.h"
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
namespace micro {
namespace ops {
MaceStatus ArmSoftmaxInt8Op::OnInit() {
data_format_ = static_cast<DataFormat>(
GetArgByName("data_format", static_cast<int32_t>(NHWC)));
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
MACE_ASSERT(input_dim_size_ == 2);
output_ = GetOutputData<mifloat>(OUTPUT);
bool use_log = GetArgByName("use_log", false);
MACE_ASSERT1(!use_log, "The argument \"use_log\" is unsupported");
return MACE_SUCCESS;
}
MaceStatus ArmSoftmaxInt8Op::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
// TODO(ZhangZhimin): Workarounds for AUTO data format
if (NHWC == data_format_ || AUTO == data_format_) { // NHWC
return RunForNHWC();
} else {
MACE_NOT_IMPLEMENTED;
return MACE_UNSUPPORTED;
}
}
MaceStatus ArmSoftmaxInt8Op::RunForNHWC() {
int32_t class_size = input_dims_[input_dim_size_ - 1];
const int8_t *input_data = reinterpret_cast<const int8_t *>(input_);
int8_t *output_data = reinterpret_cast<int8_t *>(output_);
int32_t num_rows = input_dims_[0];
QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
int kInputDeltaIntBits = 5;
int32_t scale_q = static_cast<int32_t>(
base::min(static_cast<double>(input_quantize_info.scale) *
(1 << (31 - kInputDeltaIntBits)),
(1ll << 31) - 1.0));
int32_t mult;
int32_t shift;
QuantizeMultiplier(scale_q, &mult, &shift);
int32_t diff_min = -128;
arm_softmax_s8(input_data, num_rows, class_size, mult, shift, diff_min,
output_data);
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_
#define MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class ArmSoftmaxInt8Op : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
MaceStatus RunForNHWC();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
DataFormat data_format_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_ARM_SOFTMAX_INT8_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/dequantize.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/op_context.h"
#include "micro/framework/operator.h"
#include "micro/model/net_def.h"
namespace micro {
namespace ops {
MaceStatus DequantizeOp::OnInit() {
input_ = GetInputData<int8_t>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<mifloat>(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus DequantizeOp::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
QuantizeInfo input_quantize_info = GetInputQuantizeInfo(INPUT);
float scale = input_quantize_info.scale;
int32_t zero_point = input_quantize_info.zero;
int32_t element_size = 1;
for (uint32_t i = 0; i < input_dim_size_; ++i) {
element_size *= input_dims_[i];
}
for (int32_t i = 0; i < element_size; ++i) {
output_[i] = scale * (input_[i] - zero_point);
}
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_
#define MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class DequantizeOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const int8_t *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
mifloat *output_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_DEQUANTIZE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/quantize.h"
#include <cmath>
#include "micro/base/logging.h"
#include "micro/base/utils.h"
namespace micro {
namespace ops {
inline int8_t SaturateInt8(float value) {
int rounded_value = static_cast<int>(value);
if (rounded_value <= -128) {
return -128;
} else if (rounded_value >= 127) {
return 127;
} else {
return static_cast<int8_t>(rounded_value);
}
}
MaceStatus QuantizeOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
output_ = GetOutputData<int8_t>(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus QuantizeOp::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
QuantizeInfo output_quantize_info = GetOutputQuantizeInfo(OUTPUT);
float recip_scale = 1.0f / output_quantize_info.scale;
int32_t zero_point = output_quantize_info.zero;
int32_t element_size = 1;
for (uint32_t i = 0; i < input_dim_size_; ++i) {
element_size *= input_dims_[i];
}
for (int32_t i = 0; i < element_size; ++i) {
output_[i] = SaturateInt8(roundf(recip_scale * input_[i] + zero_point));
}
return MACE_SUCCESS;
}
} // namespace ops
} // namespace micro
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_
#define MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_
#include "micro/framework/operator.h"
namespace micro {
namespace ops {
class QuantizeOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
private:
const mifloat *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
int8_t *output_;
MACE_OP_INPUT_TAGS(INPUT);
MACE_OP_OUTPUT_TAGS(OUTPUT);
};
} // namespace ops
} // namespace micro
#endif // MICRO_OPS_NHWC_CMSIS_NN_QUANTIZE_H_
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/nhwc/cmsis_nn/utilities.h"
#include <math.h>
void QuantizeMultiplier(double double_multiplier,
int32_t *quantized_multiplier,
int32_t *shift) {
if (double_multiplier == 0.) {
*quantized_multiplier = 0;
*shift = 0;
return;
}
const double q = frexp(double_multiplier, reinterpret_cast<int *>(shift));
int64_t q_fixed = static_cast<int64_t>(round(q * (1ll << 31)));
if (q_fixed == (1ll << 31)) {
q_fixed /= 2;
++*shift;
}
if (*shift < -31) {
*shift = 0;
q_fixed = 0;
}
*quantized_multiplier = static_cast<int32_t>(q_fixed);
}
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_
#define MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_
#include "micro/base/types.h"
void QuantizeMultiplier(double double_multiplier,
int32_t *quantized_multiplier,
int32_t *shift);
#endif // MICRO_OPS_NHWC_CMSIS_NN_UTILITIES_H_
......@@ -49,7 +49,7 @@ void PoolingRefOp::MaxPooling(const mifloat *input,
}
for (int32_t fh = 0; fh < filter_hw[0]; ++fh) {
int32_t inh = inh_addr + dilation_hw[0] * fh;
if (inh < 0 && inh >= in_height) {
if (inh < 0 || inh >= in_height) {
continue;
}
int32_t in_h_base = (in_b_base + inh) * in_width;
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "micro/ops/reshape.h"
#include "micro/base/logging.h"
#include "micro/base/utils.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
namespace {
MaceStatus ValidShapeData(const int32_t *input_dims,
const uint32_t input_dim_size,
int32_t *shape_data,
const uint32_t shape_data_size) {
MACE_ASSERT(
input_dims != NULL && shape_data != NULL);
int32_t unknown_idx = -1;
int32_t product = 1;
const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
for (uint32_t i = 0; i < shape_data_size; ++i) {
if (shape_data[i] == -1) {
MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1");
unknown_idx = i;
shape_data[i] = 1;
} else {
MACE_ASSERT2(shape_data[i] >= 0, "Shape must be non-negative: ",
shape_data[i]);
if (shape_data[i] == 0) {
MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range.");
shape_data[i] = input_dims[i];
}
product *= shape_data[i];
}
}
if (unknown_idx != -1) {
MACE_ASSERT1(product != 0,
"Cannot infer shape if there is zero shape size.");
const int32_t missing = input_size / product;
MACE_ASSERT1(missing * product == input_size,
"Input size not match reshaped tensor size");
shape_data[unknown_idx] = missing;
}
return MACE_SUCCESS;
}
} // namespace
MaceStatus ReshapeOp::OnInit() {
input_ = GetInputData<mifloat>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
shape_ = GetInputData<int32_t>(SHAPE);
shape_dims_ = GetInputShapeDims(SHAPE);
shape_dim_size_ = GetInputShapeDimSize(SHAPE);
output_ = GetOutputData<mifloat>(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus ReshapeOp::Run() {
const int32_t input_data_size =
base::GetShapeSize(input_dim_size_, input_dims_);
const int32_t shape_data_size =
base::GetShapeSize(shape_dim_size_, shape_dims_);
int32_t *shape_data =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(shape_data_size);
base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t));
MACE_RETURN_IF_ERROR(ValidShapeData(input_dims_, input_dim_size_,
shape_data, shape_data_size));
#ifndef MACE_MICRO_NDEBUG
const int32_t output_data_size = base::accumulate_multi(
shape_data, 0, static_cast<uint32_t>(shape_data_size));
if (input_data_size != output_data_size) {
LOG(FATAL) << "input_data_size(" << input_data_size
<< ") != output_data_size(" << output_data_size
<< "), please check the model.";
}
#endif
// TODO(luxuhui): optimize this method by reusing buffer
base::memcpy(output_, input_, input_data_size * sizeof(mifloat));
return ResizeOutputShape(OUTPUT, shape_data_size, shape_data);
}
} // namespace ops
} // namespace micro
......@@ -15,17 +15,104 @@
#ifndef MICRO_OPS_RESHAPE_H_
#define MICRO_OPS_RESHAPE_H_
#include "micro/base/utils.h"
#include "micro/framework/operator.h"
#include "micro/framework/scratch_buffer.h"
namespace micro {
namespace ops {
namespace internal {
inline MaceStatus ValidShapeData(const int32_t *input_dims,
const uint32_t input_dim_size,
int32_t *shape_data,
const uint32_t shape_data_size) {
MACE_ASSERT(input_dims != NULL && shape_data != NULL);
int32_t unknown_idx = -1;
int32_t product = 1;
const int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
for (uint32_t i = 0; i < shape_data_size; ++i) {
if (shape_data[i] == -1) {
MACE_ASSERT1(unknown_idx == -1, "Only one input size may be -1");
unknown_idx = i;
shape_data[i] = 1;
} else {
MACE_ASSERT2(shape_data[i] >= 0,
"Shape must be non-negative: ", shape_data[i]);
if (shape_data[i] == 0) {
MACE_ASSERT1(i < input_dim_size, "dims:0 out of input dims' range.");
shape_data[i] = input_dims[i];
}
product *= shape_data[i];
}
}
if (unknown_idx != -1) {
MACE_ASSERT1(product != 0,
"Cannot infer shape if there is zero shape size.");
const int32_t missing = input_size / product;
MACE_ASSERT1(missing * product == input_size,
"Input size not match reshaped tensor size");
shape_data[unknown_idx] = missing;
}
return MACE_SUCCESS;
}
} // namespace internal
template <typename T>
class ReshapeOp : public framework::Operator {
public:
MaceStatus OnInit();
MaceStatus Run();
typedef T value_type;
MaceStatus OnInit() {
input_ = GetInputData<ReshapeOp::value_type>(INPUT);
input_dims_ = GetInputShapeDims(INPUT);
input_dim_size_ = GetInputShapeDimSize(INPUT);
shape_ = GetInputData<int32_t>(SHAPE);
shape_dims_ = GetInputShapeDims(SHAPE);
shape_dim_size_ = GetInputShapeDimSize(SHAPE);
output_ = GetOutputData<ReshapeOp::value_type>(OUTPUT);
return MACE_SUCCESS;
}
MaceStatus Run() {
const int32_t input_data_size =
base::GetShapeSize(input_dim_size_, input_dims_);
const int32_t shape_data_size =
base::GetShapeSize(shape_dim_size_, shape_dims_);
int32_t *shape_data =
ScratchBuffer(engine_config_).GetBuffer<int32_t>(shape_data_size);
base::memcpy(shape_data, shape_, shape_data_size * sizeof(int32_t));
MACE_RETURN_IF_ERROR(internal::ValidShapeData(input_dims_, input_dim_size_,
shape_data, shape_data_size));
#ifndef MACE_MICRO_NDEBUG
const int32_t output_data_size = base::accumulate_multi(
shape_data, 0, static_cast<uint32_t>(shape_data_size));
if (input_data_size != output_data_size) {
LOG(FATAL) << "input_data_size(" << input_data_size
<< ") != output_data_size(" << output_data_size
<< "), please check the model.";
}
#endif
// TODO(luxuhui): optimize this method by reusing buffer
base::memcpy(output_, input_,
input_data_size * sizeof(ReshapeOp::value_type));
return ResizeOutputShape(OUTPUT, shape_data_size, shape_data);
}
private:
const mifloat *input_;
const value_type *input_;
const int32_t *input_dims_;
uint32_t input_dim_size_;
......@@ -33,7 +120,7 @@ class ReshapeOp : public framework::Operator {
const int32_t *shape_dims_;
uint32_t shape_dim_size_;
mifloat *output_;
value_type *output_;
MACE_OP_INPUT_TAGS(INPUT, SHAPE);
MACE_OP_OUTPUT_TAGS(OUTPUT);
......
......@@ -36,7 +36,8 @@ MaceStatus SoftmaxOp::OnInit() {
MaceStatus SoftmaxOp::Run() {
MACE_RETURN_IF_ERROR(ResizeOutputShape(OUTPUT, input_dim_size_, input_dims_));
if (NHWC == data_format_) { // NHWC
// TODO(ZhangZhimin): Walkarounds for AUTO data format
if (NHWC == data_format_ || AUTO == data_format_) { // NHWC
return RunForNHWC();
} else {
MACE_NOT_IMPLEMENTED;
......
# Tensorflow Keras Models
MACE Micro supports Keras models of Tensorflow 2.x
## HAR
The model is from <https://github.com/Shahnawax/HAR-CNN-Keras/>.
## MNIST
The mnist_keras.py depends on tensorflow 2.x and tensorflow_model_optimization. You can run this script to generate "mnist.h5" and "mnist-int8.h5" models
library_name: har
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
har_int8:
platform: keras
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5
model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd
subgraphs:
- input_tensors:
- conv2d_1_input:0
input_shapes:
- 1,90,3,1
input_ranges:
- -5,15
output_tensors:
- dense_3/Softmax:0
output_shapes:
- 1,6
runtime: cpu
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 1
quantize_schema: int8
quantize_range_file: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.range
library_name: har
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
har:
platform: keras
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/har/har.h5
model_sha256_checksum: ec0477b8e489541bb34377c9cabc42ee6cefa8bdf0a9f726e06be1b967ea1dcd
subgraphs:
- input_tensors:
- conv2d_1_input:0
input_shapes:
- 1,90,3,1
output_tensors:
- dense_3/Softmax:0
output_shapes:
- 1,6
runtime: cpu
data_type: fp32_fp32
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
library_name: mnist
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
mnist_int8:
platform: keras
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/mnist/mnist_int8.h5
model_sha256_checksum: f56ae3b94c114719683c3bc55351f871d371e874d3a4d3224cc5299717e8b7fc
subgraphs:
- input_tensors:
- conv2d_input:0
input_shapes:
- 1,28,28,1
input_ranges:
- 0,1
output_tensors:
- quant_dense_1/Softmax:0
output_shapes:
- 1,10
runtime: cpu
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 1
quantize_schema: int8
library_name: mnist
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
mnist:
platform: keras
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/keras/mnist/mnist.h5
model_sha256_checksum: 85f2ffe02e1b9dd2d6ad3826b91ac134fed15b838bb92a1010f67c19d55b1f65
subgraphs:
- input_tensors:
- conv2d_input:0
input_shapes:
- 1,28,28,1
output_tensors:
- dense_1/Softmax:0
output_shapes:
- 1,10
runtime: cpu
data_type: fp32_fp32
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 0
# Refer to https://www.tensorflow.org/model_optimization/guide
import tensorflow.compat.v2 as tf
import tensorflow_datasets as tfds
import tensorflow_model_optimization as tfmot
def normalize_img(image, label):
"""Normalizes images: `uint8` -> `float32`."""
return tf.cast(image, tf.float32) / 255.0, label
tfds.disable_progress_bar()
tf.enable_v2_behavior()
(ds_train, ds_test), ds_info = tfds.load(
"mnist",
split=["train", "test"],
shuffle_files=True,
as_supervised=True,
with_info=True,
)
ds_train = ds_train.map(
normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(ds_info.splits["train"].num_examples)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)
ds_test = ds_test.map(
normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE
)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)
model = tf.keras.models.Sequential(
[
tf.keras.layers.Conv2D(
filters=32, kernel_size=3, activation="relu", padding="same"
),
tf.keras.layers.DepthwiseConv2D(
kernel_size=3, activation="relu", padding="same"
),
tf.keras.layers.MaxPool2D(pool_size=2),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dense(10, activation="softmax"),
]
)
model.compile(
loss="sparse_categorical_crossentropy",
optimizer=tf.keras.optimizers.Adam(0.001),
metrics=["accuracy"],
)
model.fit(
ds_train,
epochs=6,
validation_data=ds_test,
)
model.save("mnist.h5")
quantize_model = tfmot.quantization.keras.quantize_model
quantization_aware_model = quantize_model(model)
quantization_aware_model.compile(
optimizer="adam",
loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=["accuracy"],
)
quantization_aware_model.fit(
ds_train,
epochs=6,
validation_data=ds_test,
)
quantization_aware_model.save("mnist-int8.h5")
# Tensorflow frozen models
## KWS
The model is from <https://github.com/hyperconnect/TC-ResNet/>.
library_name: kws-tc_resnet8
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
kws_tc_resnet8_bf16:
platform: tensorflow
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/tensorflow/kws/kws-tc_resnet8.pb
model_sha256_checksum: c552cf79cb64d3c755ae7d867c1c78b13f55f7589d46def1f70ce657c0db0d79
subgraphs:
- input_tensors:
- input
input_shapes:
- 1,98,40,1
output_tensors:
- output/softmax
output_shapes:
- 1,12
runtime: cpu
data_type: bf16_fp32
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 0
library_name: kws-tc_resnet8
target_abis: [host]
model_graph_format: file
model_data_format: file
models:
kws_tc_resnet8:
platform: tensorflow
model_file_path: https://cdn.cnbj1.fds.api.mi-img.com/mace/miai-models/micro/tensorflow/kws/kws-tc_resnet8.pb
model_sha256_checksum: c552cf79cb64d3c755ae7d867c1c78b13f55f7589d46def1f70ce657c0db0d79
subgraphs:
- input_tensors:
- input
input_shapes:
- 1,98,40,1
output_tensors:
- output/softmax
output_shapes:
- 1,12
runtime: cpu
data_type: fp32_fp32
limit_opencl_kernel_time: 0
nnlib_graph_mode: 0
obfuscate: 0
winograd: 0
quantize: 0
add_subdirectory(ccutils)
if(NOT HEXAGON)
include(${PROJECT_SOURCE_DIR}/third_party/googletest/googletest.cmake)
add_subdirectory(ccunit)
endif()
......
add_executable(micro_ops_test
micro/ops/stack_test.cc
micro/ops/reshape_test.cc
......@@ -20,25 +18,23 @@ add_executable(micro_ops_test
micro/ops/softmax_test.cc
micro/ops/bias_add_test.cc
micro/ops/expand_dims_test.cc
micro/ops/concat_test.cc
)
if(MACE_MICRO_ENABLE_CMSIS)
target_link_libraries(micro_ops_test
PRIVATE micro_ops_nhwc_cmsis_nn
)
target_compile_options(micro_ops_test
PRIVATE "-DMACE_MICRO_ENABLE_CMSIS=ON"
)
endif()
target_link_libraries(micro_ops_test
PRIVATE micro_base
PRIVATE micro_ops_for_test
PRIVATE micro_ops
PRIVATE micro_framework_for_optest
PRIVATE micro_ccutils
PRIVATE gtest
PRIVATE gtest_main
)
if(MICRO_MODEL_NAME)
add_executable(micro_cc_test
micro/model/net_def_test.cc
micro/framework/graph_test.cc
micro/codegen/engine_test.cc
)
target_link_libraries(micro_cc_test
micro_engine
gtest
gtest_main
)
target_compile_definitions(micro_cc_test PRIVATE "-DMICRO_MODEL_NAME=${MICRO_MODEL_NAME}")
endif()
......@@ -33,8 +33,9 @@ class EngineTest : public ::testing::Test {
void OutputAllInfo() {
MaceMicroEngine *micro_engine = NULL;
MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine)
== MACE_SUCCESS && micro_engine != NULL);
MACE_ASSERT(MICRO_MODEL_NAME::GetMicroEngineSingleton(&micro_engine) ==
MACE_SUCCESS &&
micro_engine != NULL);
float input_buffer[1 * 1 * 128 * 9] = {0};
int32_t input_shape[] = {1, 1, 128, 9};
......
......@@ -14,8 +14,10 @@
#include "gtest/gtest.h"
#include "micro/ops/eltwise.h"
#include "micro/ops/nhwc/cmsis_nn/arm_eltwise_int8.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_quantize_utils.h"
#include "micro/ops/test_utils.h"
namespace micro {
......@@ -494,6 +496,91 @@ TEST_F(EltwiseOpTest, TensorGeneralBroadcastCPU) {
dims1121, output_9, expect_9, dims1123);
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestEltwiseQuantInt8(const int32_t *input_dims,
const uint32_t input_dim_size,
eltwise::Type type) {
int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims);
float *input0 = new float[shape_size];
float *input1 = new float[shape_size];
FillNormalRandomInput(input0, shape_size);
FillNormalRandomInput(input1, shape_size);
float *expect_output = new float[shape_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
EltwiseOp<float> eltwsie_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input_dims, input_dim_size)
.AddInput(input1, input_dims, input_dim_size)
.AddArg("type", static_cast<int>(type))
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
eltwsie_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
eltwsie_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input0_int8 = new int8_t[shape_size];
int8_t *input1_int8 = new int8_t[shape_size];
int8_t *output_int8 = new int8_t[shape_size];
float *output = new float[shape_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, shape_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8(input1, shape_size, input1_int8, &input_quant_info1.scale,
&input_quant_info1.zero);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, shape_size, &output_quant_info.scale,
&output_quant_info.zero);
ArmEltwiseInt8Op eltwsie_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8
.AddInput(input0_int8, input_dims, input_dim_size, input_quant_info0)
.AddInput(input1_int8, input_dims, input_dim_size, input_quant_info1)
.AddArg("type", static_cast<int>(type))
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
eltwsie_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
eltwsie_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
Dequantize(output_int8, shape_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(EltwiseOpTest, QuantInt8) {
const int32_t input_dims0[4] = {1, 32, 32, 16};
TestEltwiseQuantInt8(input_dims0, 4, eltwise::SUM);
const int32_t input_dims1[4] = {2, 31, 31, 17};
TestEltwiseQuantInt8(input_dims1, 4, eltwise::SUM);
const int32_t input_dims2[2] = {1, 31};
TestEltwiseQuantInt8(input_dims2, 2, eltwise::SUM);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -15,8 +15,10 @@
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/matmul.h"
#include "micro/ops/nhwc/cmsis_nn/arm_mat_mul_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
#include "micro/ops/test_quantize_utils.h"
namespace micro {
namespace ops {
......@@ -94,6 +96,94 @@ TEST_F(MatMulOpTest, SimpleCPU) {
Simple2();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestMatMulQuantInt8(int32_t lhs_rows, int32_t lhs_cols, int32_t rhs_cols) {
uint32_t input0_size = lhs_rows * lhs_cols;
uint32_t input1_size = lhs_cols * rhs_cols;
uint32_t output_size = lhs_rows * rhs_cols;
float *input0 = new float[input0_size];
float *input1 = new float[input1_size];
FillNormalRandomInput(input0, input0_size);
FillNormalRandomInput(input1, input1_size);
float *expect_output = new float[output_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
const int32_t input0_dims[2] = {lhs_rows, lhs_cols};
// mat0 * tranpose(mat1)
const int32_t input1_dims[2] = {rhs_cols, lhs_cols};
MatMulOp matmul_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, 2)
.AddInput(input1, input1_dims, 2)
.AddArg("transpose_a", false)
.AddArg("transpose_b", true)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
matmul_op.Init(NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
NULL);
matmul_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input0_int8 = new int8_t[input0_size];
int8_t *input1_int8 = new int8_t[input1_size];
int8_t *output_int8 = new int8_t[output_size];
float *output = new float[output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
&input_quant_info1.scale);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, output_size, &output_quant_info.scale,
&output_quant_info.zero);
ArmMatMulInt8Op matmul_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8.AddInput(input0_int8, input0_dims, 2, input_quant_info0)
.AddInput(input1_int8, input1_dims, 2, input_quant_info1)
.AddArg("transpose_a", false)
.AddArg("transpose_b", true)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
matmul_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
matmul_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(MatMulOpTest, QuantInt8) {
TestMatMulQuantInt8(1, 8, 4);
TestMatMulQuantInt8(1, 1001, 63);
// WARNING(ZhangZhimin): Batch inputs is unsupported
// TestMatMulQuantInt8(3, 100, 100);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -15,8 +15,10 @@
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/conv_2d_ref.h"
#include "micro/ops/nhwc/cmsis_nn/arm_conv_2d_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
#include "micro/ops/test_quantize_utils.h"
namespace micro {
namespace ops {
......@@ -315,6 +317,141 @@ TEST_F(Conv2dOpTest, CPUConv1x1) {
TestConv1x1();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestConv2dQuantInt8(const int32_t batch,
const int32_t out_channels,
const int32_t in_channels,
const int32_t in_height,
const int32_t in_width,
const int32_t kernel_height,
const int32_t kernel_width,
enum Padding padding_type,
const int32_t stride_height,
const int32_t stride_width,
const int32_t dilation_height,
const int32_t dilation_width) {
uint32_t input0_size = batch * in_height * in_width * in_channels;
uint32_t input1_size =
out_channels * kernel_height * kernel_width * in_channels;
uint32_t max_output_size = batch * out_channels *
(in_height + kernel_height * dilation_height) *
(in_width + kernel_width * dilation_width);
int32_t bias_size = out_channels;
float *input0 = new float[input0_size];
float *input1 = new float[input1_size];
float *bias = new float[bias_size];
FillNormalRandomInput(input0, input0_size);
FillNormalRandomInput(input1, input1_size);
FillNormalRandomInput(bias, bias_size);
float *expect_output = new float[max_output_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels};
const int32_t input1_dims[4] = {out_channels, kernel_height, kernel_width,
in_channels};
const int32_t bias_dims[1] = {bias_size};
const int32_t strides[2] = {stride_height, stride_width};
const int32_t dilations[2] = {dilation_height, dilation_width};
Conv2dRefOp conv2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, 4)
.AddInput(input1, input1_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
conv2d_op.Init(NULL, reinterpret_cast<framework::OpContext *>(&substitude_op),
NULL);
conv2d_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
uint32_t exepct_output_size =
base::GetShapeSize(expect_output_dim_size, expect_output_dims);
int8_t *input0_int8 = new int8_t[input0_size];
int8_t *input1_int8 = new int8_t[input1_size];
int32_t *bias_int32 = new int32_t[bias_size];
int8_t *output_int8 = new int8_t[max_output_size];
float *output = new float[max_output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
&input_quant_info1.scale);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale,
&output_quant_info.zero);
float bias_scale = input_quant_info0.scale * input_quant_info1.scale;
QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32);
ArmConv2dInt8Op conv2d_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0)
.AddInput(input1_int8, input1_dims, 4, input_quant_info1)
.AddInput(bias_int32, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
conv2d_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
conv2d_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] bias;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] bias_int32;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(Conv2dOpTest, QuantInt8) {
TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, VALID, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, FULL, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 32, 54, 3, 3, FULL, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 64, 14, 13, 5, 5, SAME, 2, 2, 1, 1);
TestConv2dQuantInt8(1, 128, 257, 28, 28, 3, 3, SAME, 1, 1, 1, 1);
TestConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1);
TestConv2dQuantInt8(1, 2, 1, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1);
TestConv2dQuantInt8(1, 128, 1, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1);
// dilations is unsupported
// TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 2);
// TestConv2dQuantInt8(1, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 2, 1);
// batch must be 1
// TestConv2dQuantInt8(2, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
// TestConv2dQuantInt8(4, 128, 64, 32, 32, 3, 3, SAME, 1, 1, 1, 1);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -15,8 +15,10 @@
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/depthwise_conv_2d_ref.h"
#include "micro/ops/nhwc/cmsis_nn/arm_depthwise_conv_2d_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_utils.h"
#include "micro/ops/test_quantize_utils.h"
namespace micro {
namespace ops {
......@@ -107,6 +109,146 @@ TEST_F(DepthwiseConv2dOpTest, MuiltiC2CPU) {
MultiC2ValidTest();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestDepthwiseConv2dQuantInt8(const int32_t batch,
const int32_t multiplier,
const int32_t in_channels,
const int32_t in_height,
const int32_t in_width,
const int32_t kernel_height,
const int32_t kernel_width,
enum Padding padding_type,
const int32_t stride_height,
const int32_t stride_width,
const int32_t dilation_height,
const int32_t dilation_width) {
uint32_t input0_size = batch * in_height * in_width * in_channels;
uint32_t input1_size =
multiplier * kernel_height * kernel_width * in_channels;
uint32_t max_output_size = batch * multiplier * in_channels *
(in_height + kernel_height * dilation_height) *
(in_width + kernel_width * dilation_width);
int32_t bias_size = multiplier * in_channels;
float *input0 = new float[input0_size];
float *input1 = new float[input1_size];
float *bias = new float[bias_size];
FillNormalRandomInput(input0, input0_size);
FillNormalRandomInput(input1, input1_size);
FillNormalRandomInput(bias, bias_size);
float *expect_output = new float[max_output_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
const int32_t input0_dims[4] = {batch, in_height, in_width, in_channels};
const int32_t input1_dims[4] = {multiplier, kernel_height, kernel_width,
in_channels};
const int32_t bias_dims[1] = {bias_size};
const int32_t strides[2] = {stride_height, stride_width};
const int32_t dilations[2] = {dilation_height, dilation_width};
DepthwiseConv2dRefOp depthwise_conv2d_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input0, input0_dims, 4)
.AddInput(input1, input1_dims, 4)
.AddInput(bias, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
depthwise_conv2d_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
depthwise_conv2d_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
uint32_t exepct_output_size =
base::GetShapeSize(expect_output_dim_size, expect_output_dims);
int8_t *input0_int8 = new int8_t[input0_size];
int8_t *input1_int8 = new int8_t[input1_size];
int32_t *bias_int32 = new int32_t[bias_size];
int8_t *output_int8 = new int8_t[max_output_size];
float *output = new float[max_output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info0;
QuantizeInfo input_quant_info1;
AutoQuantizeInt8(input0, input0_size, input0_int8, &input_quant_info0.scale,
&input_quant_info0.zero);
AutoQuantizeInt8Symmetric(input1, input1_size, input1_int8,
&input_quant_info1.scale);
QuantizeInfo output_quant_info = {0.0f, 0};
AdjustRangeInt8(expect_output, exepct_output_size, &output_quant_info.scale,
&output_quant_info.zero);
float bias_scale = input_quant_info0.scale * input_quant_info1.scale;
QuantizeWithScaleAndZeropoint(bias, bias_size, bias_scale, 0, bias_int32);
ArmDepthwiseConv2dInt8Op depthwise_conv2d_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8.AddInput(input0_int8, input0_dims, 4, input_quant_info0)
.AddInput(input1_int8, input1_dims, 4, input_quant_info1)
.AddInput(bias_int32, bias_dims, 1)
.AddArg("padding", padding_type)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
depthwise_conv2d_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
depthwise_conv2d_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input0;
delete[] input1;
delete[] bias;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input0_int8;
delete[] input1_int8;
delete[] bias_int32;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(DepthwiseConv2dOpTest, QuantInt8) {
TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, VALID, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, SAME, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 1024, 7, 7, 3, 3, FULL, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 3, 3, SAME, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 512, 14, 13, 5, 5, SAME, 2, 2, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 256, 28, 28, 3, 3, SAME, 1, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 128, 56, 56, 3, 3, SAME, 2, 2, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 1, 1, 1);
TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 2, 3, 1, 1);
// dilations is unsupported
// TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 3, 3, VALID, 1, 1, 2, 2);
// TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 1, 3, 5);
// TestDepthwiseConv2dQuantInt8(1, 1, 3, 1000, 1000, 4, 3, FULL, 1, 3, 3, 1);
// batch must be 1
// TestDepthwiseConv2dQuantInt8(3, 1, 128, 56, 56, 3, 3, SAME, 2, 2);
// multiplier must be 1
// TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 1, 1);
// TestDepthwiseConv2dQuantInt8(1, 2, 1024, 7, 7, 3, 3, SAME, 2, 2);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -16,7 +16,9 @@
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/pooling_ref.h"
#include "micro/ops/nhwc/pooling_s4.h"
#include "micro/ops/nhwc/cmsis_nn/arm_pooling_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_quantize_utils.h"
#include "micro/ops/test_utils.h"
namespace micro {
......@@ -203,6 +205,134 @@ TEST_F(PoolingOpTest, TestPoolingOpSameAvg) {
TestPoolingOpSameAvg();
}
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestPoolingQuantInt8(const int32_t *input_dims,
const uint32_t input_dim_size,
const int32_t *kernels,
const int32_t *strides,
Padding padding,
PoolingType pooling_type) {
int32_t input_size = base::GetShapeSize(input_dim_size, input_dims);
int32_t max_output_size = input_dims[0] * input_dims[3] *
(input_dims[1] + kernels[0]) *
(input_dims[2] + kernels[1]);
float *input = new float[input_size];
FillNormalRandomInput(input, input_size);
float *expect_output = new float[max_output_size];
const uint32_t MAX_OUTPUT_DIM_SIZE = 100;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE];
const int32_t dilations[2] = {1, 1};
PoolingRefOp pooling_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("kernels", kernels, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddArg("padding", padding)
.AddArg("pooling_type", pooling_type)
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_DIM_SIZE);
pooling_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
pooling_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input_int8 = new int8_t[input_size];
int8_t *output_int8 = new int8_t[max_output_size];
float *output = new float[max_output_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_DIM_SIZE];
QuantizeInfo input_quant_info;
AutoQuantizeInt8(input, input_size, input_int8, &input_quant_info.scale,
&input_quant_info.zero);
QuantizeInfo output_quant_info = input_quant_info;
ArmPoolingInt8Op pooling_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8
.AddInput(input_int8, input_dims, input_dim_size, input_quant_info)
.AddRepeatArg("strides", strides, 2)
.AddRepeatArg("kernels", kernels, 2)
.AddRepeatArg("dilations", dilations, 2)
.AddArg("padding", padding)
.AddArg("pooling_type", pooling_type)
.AddOutput(output_int8, output_dims, MAX_OUTPUT_DIM_SIZE,
output_quant_info);
pooling_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
pooling_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
uint32_t output_size = base::GetShapeSize(output_dim_size, output_dims);
Dequantize(output_int8, output_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(PoolingOpTest, Quant) {
const int32_t input_dims0[4] = {1, 7, 7, 1024};
const int32_t kernels0[2] = {7, 7};
const int32_t strides0[2] = {1, 1};
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID,
PoolingType::AVG);
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::VALID,
PoolingType::MAX);
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::FULL,
PoolingType::AVG);
TestPoolingQuantInt8(input_dims0, 4, kernels0, strides0, Padding::SAME,
PoolingType::MAX);
const int32_t input_dims1[4] = {1, 3, 3, 2};
const int32_t kernels1[2] = {3, 3};
const int32_t strides1[2] = {1, 1};
TestPoolingQuantInt8(input_dims1, 4, kernels1, strides1, Padding::SAME,
PoolingType::AVG);
const int32_t input_dims2[4] = {1, 3, 3, 2};
const int32_t kernels2[2] = {2, 3};
const int32_t strides2[2] = {1, 2};
TestPoolingQuantInt8(input_dims2, 4, kernels2, strides2, Padding::SAME,
PoolingType::MAX);
// WARNING(ZhangZhimin): Batch inputs is unsupported
// const int32_t input_dims3[4] = {3,15,15,128};
// const int32_t kernels3[2] = {4, 4};
// const int32_t strides3[2] = {4, 4};
// TestPoolingQuantInt8(input_dims3, 4, kernels3, strides3, Padding::SAME,
// PoolingType::AVG);
// const int32_t input_dims4[4] = {3,15,15,128};
// const int32_t kernels4[2] = {4, 4};
// const int32_t strides4[2] = {4, 4};
// TestPoolingQuantInt8(input_dims4, 4, kernels4, strides4, Padding::SAME,
// PoolingType::MAX);
const int32_t input_dims5[4] = {1, 31, 31, 127};
const int32_t kernels5[2] = {2, 2};
const int32_t strides5[2] = {3, 3};
TestPoolingQuantInt8(input_dims5, 4, kernels5, strides5, Padding::SAME,
PoolingType::AVG);
const int32_t input_dims6[4] = {1, 31, 31, 127};
const int32_t kernels6[2] = {2, 2};
const int32_t strides6[2] = {3, 3};
TestPoolingQuantInt8(input_dims6, 4, kernels6, strides6, Padding::SAME,
PoolingType::MAX);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -33,7 +33,7 @@ void TestReshapeOp(
T *y, int32_t *y_dims, const uint32_t y_dim_size,
const T *e, const int32_t *e_dims, const uint32_t e_dim_size) {
ReshapeOp reshape_op;
ReshapeOp<T> reshape_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddInput(shape, shape_dims, 1)
......
......@@ -13,9 +13,11 @@
// limitations under the License.
#include "gtest/gtest.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/softmax.h"
#include "micro/ops/gtest_utils.h"
#include "micro/ops/nhwc/cmsis_nn/arm_softmax_int8.h"
#include "micro/ops/substitute_op.h"
#include "micro/ops/test_quantize_utils.h"
#include "micro/ops/test_utils.h"
namespace micro {
......@@ -49,15 +51,89 @@ void Simple(bool use_log = false) {
&substitude_op), NULL);
softmax_op.Run();
ExpectTensorNear<float>(output, output_dims, output_dim_size,
expect, expect_dims, output_dim_size, 1e-5);
ExpectTensorNear<float>(output, output_dims, output_dim_size, expect,
expect_dims, output_dim_size, 1e-5);
}
} // namespace
TEST_F(SoftmaxOpTest, CPUSimple) { Simple(); }
TEST_F(SoftmaxOpTest, CPUSimpleUseLog) { Simple(true); }
#ifdef MACE_MICRO_ENABLE_CMSIS
namespace {
void TestSoftmaxQuantInt8(const int32_t *input_dims,
const uint32_t input_dim_size,
bool use_log = false) {
int32_t shape_size = base::GetShapeSize(input_dim_size, input_dims);
float *input = new float[shape_size];
FillNormalRandomInput(input, shape_size);
float *expect_output = new float[shape_size];
const uint32_t MAX_OUTPUT_NUM = 10;
int32_t *expect_output_dims = new int32_t[MAX_OUTPUT_NUM];
SoftmaxOp softmax_op;
framework::SubstituteOp substitude_op;
substitude_op.AddInput(input, input_dims, input_dim_size)
.AddArg("use_log", static_cast<int>(use_log))
.AddOutput(expect_output, expect_output_dims, MAX_OUTPUT_NUM);
softmax_op.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op), NULL);
softmax_op.Run();
uint32_t expect_output_dim_size = substitude_op.GetOutputShapeDimSize(0);
int8_t *input_int8 = new int8_t[shape_size];
int8_t *output_int8 = new int8_t[shape_size];
float *output = new float[shape_size];
int32_t *output_dims = new int32_t[MAX_OUTPUT_NUM];
QuantizeInfo input_quant_info;
AutoQuantizeInt8(input, shape_size, input_int8, &input_quant_info.scale,
&input_quant_info.zero);
QuantizeInfo output_quant_info = {1.0f / 255.0f, -128};
ArmSoftmaxInt8Op softmax_op_int8;
framework::SubstituteOp substitude_op_int8;
substitude_op_int8
.AddInput(input_int8, input_dims, input_dim_size, input_quant_info)
.AddArg("use_log", static_cast<int>(use_log))
.AddOutput(output_int8, output_dims, MAX_OUTPUT_NUM, output_quant_info);
softmax_op_int8.Init(
NULL, reinterpret_cast<framework::OpContext *>(&substitude_op_int8),
NULL);
softmax_op_int8.Run();
uint32_t output_dim_size = substitude_op_int8.GetOutputShapeDimSize(0);
Dequantize(output_int8, shape_size, output_quant_info.scale,
output_quant_info.zero, output);
ExpectTensorSimilar(expect_output, expect_output_dims, expect_output_dim_size,
output, output_dims, output_dim_size, 0.1);
delete[] input;
delete[] expect_output;
delete[] expect_output_dims;
delete[] input_int8;
delete[] output_int8;
delete[] output;
delete[] output_dims;
}
} // namespace
TEST_F(SoftmaxOpTest, QuantInt8) {
const int32_t input_dims0[2] = {5, 10};
TestSoftmaxQuantInt8(input_dims0, 2);
const int32_t input_dims1[2] = {50, 100};
TestSoftmaxQuantInt8(input_dims1, 2);
const int32_t input_dims2[2] = {1, 31};
TestSoftmaxQuantInt8(input_dims2, 2);
}
#endif
} // namespace test
} // namespace ops
} // namespace micro
......@@ -7,6 +7,7 @@ add_library(micro_ccutils
target_include_directories(micro_ccutils PUBLIC .)
target_link_libraries(micro_ccutils micro_base micro_framework_for_optest)
target_compile_options(micro_ccutils PUBLIC "-std=c++11")
if(HEXAGON_STUB)
add_library(micro_rpc_stub
......
......@@ -105,6 +105,16 @@ MaceStatus Operator::ResizeOutputShape(uint32_t idx, uint32_t dim_size,
return fake_op_->ResizeOutputShape(idx, dim_size, dims);
}
QuantizeInfo Operator::GetInputQuantizeInfo(uint32_t idx) {
return fake_op_->GetInputQuantizeInfo(idx);
}
QuantizeInfo Operator::GetOutputQuantizeInfo(uint32_t idx) {
return fake_op_->GetOutputQuantizeInfo(idx);
}
#ifndef MACE_DEFINE_GET_ARG_BY_NAME_FUNC
#define MACE_DEFINE_GET_ARG_BY_NAME_FUNC(T, FUNC) \
template <> \
......
......@@ -24,26 +24,32 @@ namespace framework {
SubstituteOp::SubstituteOp()
: input_idx_(0), output_idx_(0), arg_idx_(0), repeat_arg_idx_(0) {}
SubstituteOp &SubstituteOp::AddInput(
const void *input, const int32_t *dims, const uint32_t dims_size) {
SubstituteOp &SubstituteOp::AddInput(const void *input,
const int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info) {
MACE_ASSERT1(input != NULL || dims != NULL || dims_size == 0,
"Invalid param");
MACE_ASSERT1(input_idx_ < kMaxInputNum, "Not enough mem.");
inputs_[input_idx_] = input;
input_dims_[input_idx_] = dims;
input_dim_sizes_[input_idx_] = dims_size;
input_quant_info_[input_idx_] = quant_info;
++input_idx_;
return *this;
}
SubstituteOp &SubstituteOp::AddOutput(
void *output, int32_t *dims, const uint32_t dims_size) {
SubstituteOp &SubstituteOp::AddOutput(void *output,
int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info) {
MACE_ASSERT1(output != NULL || dims != NULL || dims_size == 0,
"Invalid param");
MACE_ASSERT1(output_idx_ < kMaxOutputNum, "Not enough mem.");
outputs_[output_idx_] = output;
output_dims_[output_idx_] = dims;
output_dim_sizes_[output_idx_] = dims_size;
output_quant_info_[output_idx_] = quant_info;
++output_idx_;
return *this;
}
......@@ -86,6 +92,14 @@ const int32_t *SubstituteOp::GetOutputShapeDims(uint32_t idx) {
return output_dims_[idx];
}
QuantizeInfo SubstituteOp::GetInputQuantizeInfo(uint32_t idx) {
return input_quant_info_[idx];
}
QuantizeInfo SubstituteOp::GetOutputQuantizeInfo(uint32_t idx) {
return output_quant_info_[idx];
}
MaceStatus SubstituteOp::ResizeOutputShape(uint32_t idx,
uint32_t input_dim_size,
const int32_t *input_dims) {
......
......@@ -16,6 +16,7 @@
#define MICRO_TEST_CCUTILS_MICRO_OPS_SUBSTITUTE_OP_H_
#include "micro/base/logging.h"
#include "micro/base/types.h"
#include "micro/base/utils.h"
#include "micro/include/public/micro.h"
......@@ -43,9 +44,13 @@ class SubstituteOp {
~SubstituteOp() {}
SubstituteOp &AddInput(const void *input,
const int32_t *dims, const uint32_t dims_size);
const int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info = QuantizeInfo{0.0f, 0});
SubstituteOp &AddOutput(void *output,
int32_t *dims, const uint32_t dims_size);
int32_t *dims,
const uint32_t dims_size,
QuantizeInfo quant_info = QuantizeInfo{0.0f, 0});
template<typename T>
SubstituteOp &AddArg(const char *name, T value) {
......@@ -106,6 +111,9 @@ class SubstituteOp {
const int32_t *input_dims);
MaceStatus ReuseInputBufferForOutput(uint32_t output_idx, uint32_t input_idx);
QuantizeInfo GetInputQuantizeInfo(uint32_t idx);
QuantizeInfo GetOutputQuantizeInfo(uint32_t idx);
template<typename T>
const T *GetInputData(uint32_t idx) {
return static_cast<const T *>(DoGetInputData(idx));
......@@ -120,11 +128,13 @@ class SubstituteOp {
const void *inputs_[kMaxInputNum];
const int32_t *input_dims_[kMaxInputNum];
uint32_t input_dim_sizes_[kMaxInputNum];
QuantizeInfo input_quant_info_[kMaxInputNum];
uint32_t input_idx_;
void *outputs_[kMaxOutputNum];
int32_t *output_dims_[kMaxOutputNum];
uint32_t output_dim_sizes_[kMaxOutputNum];
QuantizeInfo output_quant_info_[kMaxOutputNum];
uint32_t output_idx_;
// for arg
......
// Copyright 2020 The MACE Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
#define MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
#include <math.h>
#include <stdint.h>
#include <limits>
#include "micro/base/logging.h"
#include "micro/common/global_buffer.h"
#include "micro/include/public/micro.h"
#include "micro/port/api.h"
namespace micro {
namespace ops {
namespace test {
template <typename Q>
inline Q Saturate(float value) {
int rounded_value = static_cast<int>(value);
if (rounded_value <= std::numeric_limits<Q>::lowest()) {
return std::numeric_limits<Q>::lowest();
} else if (rounded_value >= std::numeric_limits<Q>::max()) {
return std::numeric_limits<Q>::max();
} else {
return static_cast<Q>(rounded_value);
}
}
inline void FindMinMax(const float *input,
const uint32_t size,
float *min_val,
float *max_val) {
float max_v = base::lowest();
float min_v = base::highest();
for (uint32_t i = 0; i < size; ++i) {
max_v = base::max(max_v, input[i]);
min_v = base::min(min_v, input[i]);
}
*min_val = min_v;
*max_val = max_v;
}
template <typename Q>
inline void QuantizeWithScaleAndZeropoint(const float *input,
const uint32_t size,
float scale,
int32_t zero_point,
Q *output) {
float recip_scale = 1 / scale;
for (uint32_t i = 0; i < size; ++i) {
output[i] = Saturate<Q>(roundf(zero_point + recip_scale * input[i]));
}
}
inline void AdjustRangeInt8(const float *input,
const uint32_t size,
float *scale,
int32_t *zero_point) {
float in_min_data;
float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data);
in_max_data = base::max(0.f, in_max_data);
in_min_data = base::min(0.f, in_min_data);
*scale = (in_max_data - in_min_data) / 255;
*zero_point = int8_t(-in_min_data / *scale - 128);
}
inline void AdjustRangeInt8Symmetric(const float *input,
const uint32_t size,
float *scale) {
float in_min_data;
float in_max_data;
FindMinMax(input, size, &in_min_data, &in_max_data);
in_max_data = base::max(0.f, in_max_data);
in_min_data = base::min(0.f, in_min_data);
float max_abs = base::max(base::abs(in_max_data), base::abs(in_min_data));
*scale = max_abs / 127.0f;
}
inline void AutoQuantizeInt8(const float *input,
const uint32_t size,
int8_t *output,
float *scale,
int32_t *zero_point) {
AdjustRangeInt8(input, size, scale, zero_point);
QuantizeWithScaleAndZeropoint(input, size, *scale, *zero_point, output);
}
inline void AutoQuantizeInt8Symmetric(const float *input,
const uint32_t size,
int8_t *output,
float *scale) {
AdjustRangeInt8Symmetric(input, size, scale);
QuantizeWithScaleAndZeropoint(input, size, *scale, 0, output);
}
inline void Dequantize(const int8_t *input,
const uint32_t size,
const float scale,
const int32_t zero_point,
float *output) {
for (uint32_t i = 0; i < size; ++i) {
output[i] = static_cast<float>(scale * (input[i] - zero_point));
}
}
} // namespace test
} // namespace ops
} // namespace micro
#endif // MICRO_TEST_CCUTILS_MICRO_OPS_TEST_QUANTIZE_UTILS_H_
......@@ -15,6 +15,8 @@
#include "micro/ops/test_utils.h"
#include <random>
namespace micro {
namespace ops {
namespace test {
......@@ -67,6 +69,30 @@ void FillRandomInput(void *input, const int32_t shape_size) {
}
}
void FillUniformRandomInput(float *input,
const int32_t shape_size,
float low,
float up) {
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_real_distribution<float> dis(low, up);
for (int n = 0; n < shape_size; ++n) {
input[n] = dis(gen);
}
}
void FillNormalRandomInput(float *input,
const int32_t shape_size,
float mean,
float std) {
std::random_device rd;
std::mt19937 gen(rd());
std::normal_distribution<float> dis(mean, std);
for (int n = 0; n < shape_size; ++n) {
input[n] = dis(gen);
}
}
} // namespace test
} // namespace ops
} // namespace micro
......
......@@ -38,6 +38,16 @@ T *input = common::test::GetGlobalBuffer()->GetBuffer<T>(shape_size); \
micro::ops::test::FillRandomInput(input, shape_size * sizeof(T))
#endif
void FillUniformRandomInput(float *input,
const int32_t shape_size,
float low = -50.0f,
float up = 50.0f);
void FillNormalRandomInput(float *input,
const int32_t shape_size,
float mean = 0.0f,
float std = 1.0f);
} // namespace test
} // namespace ops
} // namespace micro
......
Subproject commit 378acfb6490a82ba90e1ffb4bfd4e602668b180a
Subproject commit a386bd0f204cf99db253b3e84c56795dea8c397f
Copyright (c) 2006, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
INCLUDE(ExternalProject)
set(GFLAGS_SRCS_DIR "${MACE_THIRD_PARTY_DIR}/gflags")
set(GFLAGS_INSTALL_DIR "${MACE_THIRD_PARTY_DIR}/install/gflags")
set(GFLAGS_INCLUDE_DIR "${GFLAGS_INSTALL_DIR}/include" CACHE PATH "gflags include directory." FORCE)
if(MSVC)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
else(MSVC)
set(GFLAGS_LIBRARIES "${GFLAGS_INSTALL_DIR}/lib/libgflags.a" CACHE FILEPATH "GFLAGS_LIBRARIES" FORCE)
endif(MSVC)
include_directories(SYSTEM ${GFLAGS_INCLUDE_DIR})
# Mirror of https://github.com/gflags/gflags/archive/v2.2.2.zip
set(GFLAGS_URL "https://cnbj1.fds.api.xiaomi.com/mace/third-party/gflags/v2.2.2.zip")
set(GFLAGS_HASH "SHA256=19713a36c9f32b33df59d1c79b4958434cb005b5b47dc5400a7a4b078111d9b5")
ExternalProject_Add(
gflags_gflags
URL_HASH "${GFLAGS_HASH}"
URL "${GFLAGS_URL}"
PREFIX ${GFLAGS_SRCS_DIR}
UPDATE_COMMAND ""
BUILD_BYPRODUCTS ${GFLAGS_LIBRARIES}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DBUILD_STATIC_LIBS=ON
-DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=Release
-DCMAKE_GENERATOR=${CMAKE_GENERATOR}
${THIRD_PARTY_EXTRA_CMAKE_ARGS}
)
if(MSVC)
add_custom_command(TARGET gflags_gflags POST_BUILD
COMMAND if $<CONFIG:Debug>==1 (${CMAKE_COMMAND} -E copy ${GFLAGS_INSTALL_DIR}/lib/gflags_static_debug.lib ${GFLAGS_INSTALL_DIR}/lib/gflags_static.lib)
)
endif(MSVC)
add_library(gflags STATIC IMPORTED GLOBAL)
set_property(TARGET gflags PROPERTY IMPORTED_LOCATION ${GFLAGS_LIBRARIES})
add_dependencies(gflags gflags_gflags)
if(MSVC)
set_target_properties(gflags
PROPERTIES IMPORTED_LINK_INTERFACE_LIBRARIES
Shlwapi.lib)
endif(MSVC)
Subproject commit e6e2d3b7614ff4e6017d8968bd4c3f579133666e
Copyright 2008, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
licenses(["notice"])
exports_files(["LICENSE"])
cc_library(
name = "gtest",
srcs = [
"googletest/src/gtest-all.cc",
"googlemock/src/gmock-all.cc",
],
hdrs = glob([
"**/*.h",
"googletest/src/*.cc",
"googlemock/src/*.cc",
]),
includes = [
"googlemock",
"googletest",
"googletest/include",
"googlemock/include",
],
linkopts = ["-pthread"],
visibility = ["//visibility:public"],
)
cc_library(
name = "gtest_main",
srcs = ["googlemock/src/gmock_main.cc"],
linkopts = ["-pthread"],
visibility = ["//visibility:public"],
deps = [":gtest"],
)
enable_testing()
include(ExternalProject)
set(GTEST_SOURCES_DIR ${MACE_THIRD_PARTY_DIR}/gtest)
set(GTEST_INSTALL_DIR ${MACE_THIRD_PARTY_DIR}/install/gtest)
set(GTEST_INCLUDE_DIR "${GTEST_INSTALL_DIR}/include" CACHE PATH "gtest include directory." FORCE)
include_directories(SYSTEM ${GTEST_INCLUDE_DIR})
if(MSVC)
set(GTEST_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/gtest.lib" CACHE FILEPATH "gtest libraries." FORCE)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/gtest_main.lib" CACHE FILEPATH "gtest main libraries." FORCE)
else(MSVC)
set(GTEST_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/libgtest.a" CACHE FILEPATH "gtest libraries." FORCE)
set(GTEST_MAIN_LIBRARIES
"${GTEST_INSTALL_DIR}/lib/libgtest_main.a" CACHE FILEPATH "gtest main libraries." FORCE)
endif(MSVC)
# Mirror of "https://github.com/google/googletest/archive/release-1.8.0.zip"
set(GTEST_URL "https://cnbj1.fds.api.xiaomi.com/mace/third-party/googletest/googletest-release-1.8.0.zip")
set(GTEST_HASH "SHA256=f3ed3b58511efd272eb074a3a6d6fb79d7c2e6a0e374323d1e6bcbcc1ef141bf")
ExternalProject_Add(
extern_gtest
URL_HASH "${GTEST_HASH}"
URL "${GTEST_URL}"
PREFIX ${GTEST_SOURCES_DIR}
UPDATE_COMMAND ""
BUILD_BYPRODUCTS ${GTEST_LIBRARIES} ${GTEST_MAIN_LIBRARIES}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER}
-DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER}
-DCMAKE_INSTALL_PREFIX=${GTEST_INSTALL_DIR}
-DBUILD_GMOCK=ON
-Dgtest_disable_pthreads=ON
-Dgtest_force_shared_crt=ON
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
-DCMAKE_GENERATOR=${CMAKE_GENERATOR}
${THIRD_PARTY_EXTRA_CMAKE_ARGS}
)
add_library(gtest STATIC IMPORTED GLOBAL)
set_property(TARGET gtest PROPERTY IMPORTED_LOCATION ${GTEST_LIBRARIES})
add_dependencies(gtest extern_gtest)
add_library(gtest_main STATIC IMPORTED GLOBAL)
set_property(TARGET gtest_main PROPERTY IMPORTED_LOCATION ${GTEST_MAIN_LIBRARIES})
add_dependencies(gtest_main extern_gtest)
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册