diff --git a/deploy/tvm/.gitignore b/deploy/tvm/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..faeba235a6894f0bd28aab23dea5f4f559071846 --- /dev/null +++ b/deploy/tvm/.gitignore @@ -0,0 +1,5 @@ +include/inputs.h +include/outputs.h + +__pycache__/ +build/ \ No newline at end of file diff --git a/deploy/tvm/Makefile b/deploy/tvm/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..cf7d375b7e54c7781768db39274d9b3f7128812b --- /dev/null +++ b/deploy/tvm/Makefile @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Makefile to build demo + +# Setup build environment +BUILD_DIR := build + +ARM_CPU = ARMCM55 +ETHOSU_PATH = /opt/arm/ethosu +CMSIS_PATH ?= ${ETHOSU_PATH}/cmsis +ETHOSU_PLATFORM_PATH ?= ${ETHOSU_PATH}/core_platform +STANDALONE_CRT_PATH := $(abspath $(BUILD_DIR))/runtime +CORSTONE_300_PATH = ${ETHOSU_PLATFORM_PATH}/targets/corstone-300 +PKG_COMPILE_OPTS = -g -Wall -O2 -Wno-incompatible-pointer-types -Wno-format -mcpu=cortex-m55 -mthumb -mfloat-abi=hard -std=gnu99 +CMAKE ?= cmake +CC = arm-none-eabi-gcc +AR = arm-none-eabi-ar +RANLIB = arm-none-eabi-ranlib +PKG_CFLAGS = ${PKG_COMPILE_OPTS} \ + -I${STANDALONE_CRT_PATH}/include \ + -I${STANDALONE_CRT_PATH}/src/runtime/crt/include \ + -I${PWD}/include \ + -I${CORSTONE_300_PATH} \ + -I${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Include/ \ + -I${CMSIS_PATH}/CMSIS/Core/Include \ + -I${CMSIS_PATH}/CMSIS/NN/Include \ + -I${CMSIS_PATH}/CMSIS/DSP/Include \ + -I$(abspath $(BUILD_DIR))/codegen/host/include +CMSIS_NN_CMAKE_FLAGS = -DCMAKE_TOOLCHAIN_FILE=$(abspath $(BUILD_DIR))/../arm-none-eabi-gcc.cmake \ + -DTARGET_CPU=cortex-m55 \ + -DBUILD_CMSIS_NN_FUNCTIONS=YES +PKG_LDFLAGS = -lm -specs=nosys.specs -static -T corstone300.ld + +$(ifeq VERBOSE,1) +QUIET ?= +$(else) +QUIET ?= @ +$(endif) + +DEMO_MAIN = src/demo_bare_metal.c +CODEGEN_SRCS = $(wildcard $(abspath $(BUILD_DIR))/codegen/host/src/*.c) +CODEGEN_OBJS = $(subst .c,.o,$(CODEGEN_SRCS)) +CMSIS_STARTUP_SRCS = $(wildcard ${CMSIS_PATH}/Device/ARM/${ARM_CPU}/Source/*.c) +UART_SRCS = $(wildcard ${CORSTONE_300_PATH}/*.c) + +demo: $(BUILD_DIR)/demo + +$(BUILD_DIR)/stack_allocator.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/memory/stack_allocator.c + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^ + +$(BUILD_DIR)/crt_backend_api.o: $(STANDALONE_CRT_PATH)/src/runtime/crt/common/crt_backend_api.c + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) -c $(PKG_CFLAGS) -o $@ $^ + +# Build generated code +$(BUILD_DIR)/libcodegen.a: $(CODEGEN_SRCS) + $(QUIET)cd $(abspath $(BUILD_DIR)/codegen/host/src) && $(CC) -c $(PKG_CFLAGS) $(CODEGEN_SRCS) + $(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcodegen.a) $(CODEGEN_OBJS) + $(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcodegen.a) + +# Build CMSIS startup code +${BUILD_DIR}/libcmsis_startup.a: $(CMSIS_STARTUP_SRCS) + $(QUIET)mkdir -p $(abspath $(BUILD_DIR)/libcmsis_startup) + $(QUIET)cd $(abspath $(BUILD_DIR)/libcmsis_startup) && $(CC) -c $(PKG_CFLAGS) -D${ARM_CPU} $^ + $(QUIET)$(AR) -cr $(abspath $(BUILD_DIR)/libcmsis_startup.a) $(abspath $(BUILD_DIR))/libcmsis_startup/*.o + $(QUIET)$(RANLIB) $(abspath $(BUILD_DIR)/libcmsis_startup.a) + +CMSIS_SHA_FILE=${CMSIS_PATH}/977abe9849781a2e788b02282986480ff4e25ea6.sha +ifneq ("$(wildcard $(CMSIS_SHA_FILE))","") +${BUILD_DIR}/cmsis_nn/Source/libcmsis-nn.a: + $(QUIET)mkdir -p $(@D) + $(QUIET)cd $(CMSIS_PATH)/CMSIS/NN && $(CMAKE) -B $(abspath $(BUILD_DIR)/cmsis_nn) $(CMSIS_NN_CMAKE_FLAGS) + $(QUIET)cd $(abspath $(BUILD_DIR)/cmsis_nn) && $(MAKE) all +else +# Build CMSIS-NN +${BUILD_DIR}/cmsis_nn/Source/SoftmaxFunctions/libCMSISNNSoftmax.a: + $(QUIET)mkdir -p $(@D) + $(QUIET)cd $(CMSIS_PATH)/CMSIS/NN && $(CMAKE) -B $(abspath $(BUILD_DIR)/cmsis_nn) $(CMSIS_NN_CMAKE_FLAGS) + $(QUIET)cd $(abspath $(BUILD_DIR)/cmsis_nn) && $(MAKE) all +endif + +# Build demo application +ifneq ("$(wildcard $(CMSIS_SHA_FILE))","") +$(BUILD_DIR)/demo: $(DEMO_MAIN) $(UART_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o \ + ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a ${BUILD_DIR}/cmsis_nn/Source/libcmsis-nn.a + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS) +else +$(BUILD_DIR)/demo: $(DEMO_MAIN) $(UART_SRCS) $(BUILD_DIR)/stack_allocator.o $(BUILD_DIR)/crt_backend_api.o \ + ${BUILD_DIR}/libcodegen.a ${BUILD_DIR}/libcmsis_startup.a \ + ${BUILD_DIR}/cmsis_nn/Source/SoftmaxFunctions/libCMSISNNSoftmax.a \ + ${BUILD_DIR}/cmsis_nn/Source/FullyConnectedFunctions/libCMSISNNFullyConnected.a \ + ${BUILD_DIR}/cmsis_nn/Source/SVDFunctions/libCMSISNNSVDF.a \ + ${BUILD_DIR}/cmsis_nn/Source/ReshapeFunctions/libCMSISNNReshape.a \ + ${BUILD_DIR}/cmsis_nn/Source/ActivationFunctions/libCMSISNNActivation.a \ + ${BUILD_DIR}/cmsis_nn/Source/NNSupportFunctions/libCMSISNNSupport.a \ + ${BUILD_DIR}/cmsis_nn/Source/ConcatenationFunctions/libCMSISNNConcatenation.a \ + ${BUILD_DIR}/cmsis_nn/Source/BasicMathFunctions/libCMSISNNBasicMaths.a \ + ${BUILD_DIR}/cmsis_nn/Source/ConvolutionFunctions/libCMSISNNConvolutions.a \ + ${BUILD_DIR}/cmsis_nn/Source/PoolingFunctions/libCMSISNNPooling.a + $(QUIET)mkdir -p $(@D) + $(QUIET)$(CC) $(PKG_CFLAGS) $(FREERTOS_FLAGS) -o $@ -Wl,--whole-archive $^ -Wl,--no-whole-archive $(PKG_LDFLAGS) +endif + +clean: + $(QUIET)rm -rf $(BUILD_DIR)/codegen + +cleanall: + $(QUIET)rm -rf $(BUILD_DIR) + +.SUFFIXES: + +.DEFAULT: demo diff --git a/deploy/tvm/README.md b/deploy/tvm/README.md new file mode 100644 index 0000000000000000000000000000000000000000..ecd635cc42ba277f8b6ff9bf1a8fa586378bf4d2 --- /dev/null +++ b/deploy/tvm/README.md @@ -0,0 +1,100 @@ + + + + + + + + + + + + + + + + + +Running PaddleOCR text recognition model via TVM on bare metal Arm(R) Cortex(R)-M55 CPU and CMSIS-NN +=============================================================== + +This folder contains an example of how to use TVM to run a PaddleOCR model +on bare metal Cortex(R)-M55 CPU and CMSIS-NN. + +Prerequisites +------------- +If the demo is run in the ci_cpu Docker container provided with TVM, then the following +software will already be installed. + +If the demo is not run in the ci_cpu Docker container, then you will need the following: +- Software required to build and run the demo (These can all be installed by running + tvm/docker/install/ubuntu_install_ethosu_driver_stack.sh.) + - [Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software](https://developer.arm.com/tools-and-software/open-source-software/arm-platforms-software/arm-ecosystem-fvps) + - [cmake 3.19.5](https://github.com/Kitware/CMake/releases/) + - [GCC toolchain from Arm(R)](https://developer.arm.com/-/media/Files/downloads/gnu-rm/10-2020q4/gcc-arm-none-eabi-10-2020-q4-major-x86_64-linux.tar.bz2) + - [Arm(R) Ethos(TM)-U NPU driver stack](https://review.mlplatform.org) + - [CMSIS](https://github.com/ARM-software/CMSIS_5) +- The python libraries listed in the requirements.txt of this directory + - These can be installed by running the following from the current directory: + ```bash + pip install -r ./requirements.txt + ``` + +You will also need TVM which can either be: + - Built from source (see [Install from Source](https://tvm.apache.org/docs/install/from_source.html)) + - When building from source, the following need to be set in config.cmake: + - set(USE_CMSISNN ON) + - set(USE_MICRO ON) + - set(USE_LLVM ON) + - Installed from TLCPack nightly(see [TLCPack](https://tlcpack.ai/)) + +You will need to update your PATH environment variable to include the path to cmake 3.19.5 and the FVP. +For example if you've installed these in ```/opt/arm``` , then you would do the following: +```bash +export PATH=/opt/arm/FVP_Corstone_SSE-300/models/Linux64_GCC-6.4:/opt/arm/cmake/bin:$PATH +``` + +Running the demo application +---------------------------- +Type the following command to run the bare metal text recognition application ([src/demo_bare_metal.c](./src/demo_bare_metal.c)): + +```bash +./run_demo.sh +``` + +If the Ethos(TM)-U platform and/or CMSIS have not been installed in /opt/arm/ethosu then +the locations for these can be specified as arguments to run_demo.sh, for example: + +```bash +./run_demo.sh --cmsis_path /home/tvm-user/cmsis \ +--ethosu_platform_path /home/tvm-user/ethosu/core_platform +``` + +This will: +- Download a PaddleOCR text recognition model +- Use tvmc to compile the text recognition model for Cortex(R)-M55 CPU and CMSIS-NN +- Create a C header file inputs.c containing the image data as a C array +- Create a C header file outputs.c containing a C array where the output of inference will be stored +- Build the demo application +- Run the demo application on a Fixed Virtual Platform (FVP) based on Arm(R) Corstone(TM)-300 software +- The application will report the text on the image and the corresponding score. + +Using your own image +-------------------- +The create_image.py script takes a single argument on the command line which is the path of the +image to be converted into an array of bytes for consumption by the model. + +The demo can be modified to use an image of your choice by changing the following line in run_demo.sh + +```bash +python3 ./convert_image.py path/to/image +``` + +Model description +----------------- +In this demo, the model we use is an English recognition model based on [PP-OCRv3](https://github.com/PaddlePaddle/PaddleOCR/blob/dygraph/doc/doc_ch/PP-OCRv3_introduction.md). PP-OCRv3 is the third version of the PP-OCR series model released by [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR). This series of models has the following features: + - PP-OCRv3: ultra-lightweight OCR system: detection (3.6M) + direction classifier (1.4M) + recognition (12M) = 17.0M + - Support more than 80 kinds of multi-language recognition models, including English, Chinese, French, German, Arabic, Korean, Japanese and so on. For details + - Support vertical text recognition, and long text recognition + +The text recognition model in PP-OCRv3 supports more than 80 languages. In the process of model development, since Arm(R) Cortex(R)-M55 CPU does not support rnn operator, we delete the unsupported operator based on the PP-OCRv3 text recognition model to obtain the current model. \ No newline at end of file diff --git a/deploy/tvm/arm-none-eabi-gcc.cmake b/deploy/tvm/arm-none-eabi-gcc.cmake new file mode 100644 index 0000000000000000000000000000000000000000..415b3139be1b7f891c017dff0dc299b67f7ef2fe --- /dev/null +++ b/deploy/tvm/arm-none-eabi-gcc.cmake @@ -0,0 +1,79 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +if (__TOOLCHAIN_LOADED) + return() +endif() +set(__TOOLCHAIN_LOADED TRUE) + +set(CMAKE_SYSTEM_NAME Generic) +set(CMAKE_C_COMPILER "arm-none-eabi-gcc") +set(CMAKE_CXX_COMPILER "arm-none-eabi-g++") +set(CMAKE_SYSTEM_PROCESSOR "cortex-m55" CACHE STRING "Select Arm(R) Cortex(R)-M architecture. (cortex-m0, cortex-m3, cortex-m33, cortex-m4, cortex-m55, cortex-m7, etc)") + +set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) + +SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +set(CMAKE_C_STANDARD 99) +set(CMAKE_CXX_STANDARD 14) + +# The system processor could for example be set to cortex-m33+nodsp+nofp. +set(__CPU_COMPILE_TARGET ${CMAKE_SYSTEM_PROCESSOR}) +string(REPLACE "+" ";" __CPU_FEATURES ${__CPU_COMPILE_TARGET}) +list(POP_FRONT __CPU_FEATURES CMAKE_SYSTEM_PROCESSOR) + +string(FIND ${__CPU_COMPILE_TARGET} "+" __OFFSET) +if(__OFFSET GREATER_EQUAL 0) + string(SUBSTRING ${__CPU_COMPILE_TARGET} ${__OFFSET} -1 CPU_FEATURES) +endif() + +# Add -mcpu to the compile options to override the -mcpu the CMake toolchain adds +add_compile_options(-mcpu=${__CPU_COMPILE_TARGET}) + +# Set floating point unit +if("${__CPU_COMPILE_TARGET}" MATCHES "\\+fp") + set(FLOAT hard) +elseif("${__CPU_COMPILE_TARGET}" MATCHES "\\+nofp") + set(FLOAT soft) +elseif("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m33" OR + "${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "cortex-m55") + set(FLOAT hard) +else() + set(FLOAT soft) +endif() + +add_compile_options(-mfloat-abi=${FLOAT}) +add_link_options(-mfloat-abi=${FLOAT}) + +# Link target +add_link_options(-mcpu=${__CPU_COMPILE_TARGET}) +add_link_options(-Xlinker -Map=output.map) + +# +# Compile options +# +set(cxx_flags "-fno-unwind-tables;-fno-rtti;-fno-exceptions") + +add_compile_options("-Wall;-Wextra;-Wsign-compare;-Wunused;-Wswitch-default;\ +-Wdouble-promotion;-Wredundant-decls;-Wshadow;-Wnull-dereference;\ +-Wno-format-extra-args;-Wno-unused-function;-Wno-unused-label;\ +-Wno-missing-field-initializers;-Wno-return-type;-Wno-format;-Wno-int-conversion" + "$<$:${cxx_flags}>" +) diff --git a/deploy/tvm/convert_image.py b/deploy/tvm/convert_image.py new file mode 100755 index 0000000000000000000000000000000000000000..747ab29e4ad2b577890298626da756b7bac6047c --- /dev/null +++ b/deploy/tvm/convert_image.py @@ -0,0 +1,102 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import os +import pathlib +import re +import sys +import cv2 +import math +from PIL import Image +import numpy as np + +def resize_norm_img(img, image_shape, padding=True): + imgC, imgH, imgW = image_shape + h = img.shape[0] + w = img.shape[1] + if not padding: + resized_image = cv2.resize( + img, (imgW, imgH), interpolation=cv2.INTER_LINEAR) + resized_w = imgW + else: + ratio = w / float(h) + if math.ceil(imgH * ratio) > imgW: + resized_w = imgW + else: + resized_w = int(math.ceil(imgH * ratio)) + resized_image = cv2.resize(img, (resized_w, imgH)) + resized_image = resized_image.astype('float32') + if image_shape[0] == 1: + resized_image = resized_image / 255 + resized_image = resized_image[np.newaxis, :] + else: + resized_image = resized_image.transpose((2, 0, 1)) / 255 + resized_image -= 0.5 + resized_image /= 0.5 + padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32) + padding_im[:, :, 0:resized_w] = resized_image + return padding_im + + +def create_header_file(name, tensor_name, tensor_data, output_path): + """ + This function generates a header file containing the data from the numpy array provided. + """ + file_path = pathlib.Path(f"{output_path}/" + name).resolve() + # Create header file with npy_data as a C array + raw_path = file_path.with_suffix(".h").resolve() + with open(raw_path, "w") as header_file: + header_file.write( + "\n" + + f"const size_t {tensor_name}_len = {tensor_data.size};\n" + + f'__attribute__((section(".data.tvm"), aligned(16))) float {tensor_name}[] = ' + ) + + header_file.write("{") + for i in np.ndindex(tensor_data.shape): + header_file.write(f"{tensor_data[i]}, ") + header_file.write("};\n\n") + + +def create_headers(image_name): + """ + This function generates C header files for the input and output arrays required to run inferences + """ + img_path = os.path.join("./", f"{image_name}") + + # Resize image to 32x320 + img = cv2.imread(img_path) + img = resize_norm_img(img, [3,32,320]) + img_data = img.astype("float32") + + # # Add the batch dimension, as we are expecting 4-dimensional input: NCHW. + img_data = np.expand_dims(img_data, axis=0) + + # Create input header file + create_header_file("inputs", "input", img_data, "./include") + # Create output header file + output_data = np.zeros([7760], np.float) + create_header_file( + "outputs", + "output", + output_data, + "./include", + ) + + +if __name__ == "__main__": + create_headers(sys.argv[1]) diff --git a/deploy/tvm/corstone300.ld b/deploy/tvm/corstone300.ld new file mode 100644 index 0000000000000000000000000000000000000000..e52b23da33601b8b858f11902e6553687cfbf352 --- /dev/null +++ b/deploy/tvm/corstone300.ld @@ -0,0 +1,295 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*------------------ Reference System Memories ------------- + +===================+============+=======+============+============+ + | Memory | Address | Size | CPU Access | NPU Access | + +===================+============+=======+============+============+ + | ITCM | 0x00000000 | 512KB | Yes (RO) | No | + +-------------------+------------+-------+------------+------------+ + | DTCM | 0x20000000 | 512KB | Yes (R/W) | No | + +-------------------+------------+-------+------------+------------+ + | SSE-300 SRAM | 0x21000000 | 2MB | Yes (R/W) | Yes (R/W) | + +-------------------+------------+-------+------------+------------+ + | Data SRAM | 0x01000000 | 2MB | Yes (R/W) | Yes (R/W) | + +-------------------+------------+-------+------------+------------+ + | DDR | 0x60000000 | 32MB | Yes (R/W) | Yes (R/W) | + +-------------------+------------+-------+------------+------------+ */ + +/*---------------------- ITCM Configuration ---------------------------------- + Flash Configuration + Flash Base Address <0x0-0xFFFFFFFF:8> + Flash Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__ROM_BASE = 0x00000000; +__ROM_SIZE = 0x00080000; + +/*--------------------- DTCM RAM Configuration ---------------------------- + RAM Configuration + RAM Base Address <0x0-0xFFFFFFFF:8> + RAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__RAM_BASE = 0x20000000; +__RAM_SIZE = 0x00080000; + +/*----------------------- Data SRAM Configuration ------------------------------ + Data SRAM Configuration + DATA_SRAM Base Address <0x0-0xFFFFFFFF:8> + DATA_SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__DATA_SRAM_BASE = 0x01000000; +__DATA_SRAM_SIZE = 0x00200000; + +/*--------------------- Embedded SRAM Configuration ---------------------------- + SRAM Configuration + SRAM Base Address <0x0-0xFFFFFFFF:8> + SRAM Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__SRAM_BASE = 0x21000000; +__SRAM_SIZE = 0x00200000; + +/*--------------------- Stack / Heap Configuration ---------------------------- + Stack / Heap Configuration + Stack Size (in Bytes) <0x0-0xFFFFFFFF:8> + Heap Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__STACK_SIZE = 0x00008000; +__HEAP_SIZE = 0x00008000; + +/*--------------------- Embedded RAM Configuration ---------------------------- + DDR Configuration + DDR Base Address <0x0-0xFFFFFFFF:8> + DDR Size (in Bytes) <0x0-0xFFFFFFFF:8> + + -----------------------------------------------------------------------------*/ +__DDR_BASE = 0x60000000; +__DDR_SIZE = 0x02000000; + +/* + *-------------------- <<< end of configuration section >>> ------------------- + */ + +MEMORY +{ + ITCM (rx) : ORIGIN = __ROM_BASE, LENGTH = __ROM_SIZE + DTCM (rwx) : ORIGIN = __RAM_BASE, LENGTH = __RAM_SIZE + DATA_SRAM (rwx) : ORIGIN = __DATA_SRAM_BASE, LENGTH = __DATA_SRAM_SIZE + SRAM (rwx) : ORIGIN = __SRAM_BASE, LENGTH = __SRAM_SIZE + DDR (rwx) : ORIGIN = __DDR_BASE, LENGTH = __DDR_SIZE +} + +/* Linker script to place sections and symbol values. Should be used together + * with other linker script that defines memory regions ITCM and RAM. + * It references following symbols, which must be defined in code: + * Reset_Handler : Entry of reset handler + * + * It defines following symbols, which code can use without definition: + * __exidx_start + * __exidx_end + * __copy_table_start__ + * __copy_table_end__ + * __zero_table_start__ + * __zero_table_end__ + * __etext + * __data_start__ + * __preinit_array_start + * __preinit_array_end + * __init_array_start + * __init_array_end + * __fini_array_start + * __fini_array_end + * __data_end__ + * __bss_start__ + * __bss_end__ + * __end__ + * end + * __HeapLimit + * __StackLimit + * __StackTop + * __stack + */ +ENTRY(Reset_Handler) + +SECTIONS +{ + /* .ddr is placed before .text so that .rodata.tvm is encountered before .rodata* */ + .ddr : + { + . = ALIGN (16); + *(.rodata.tvm) + . = ALIGN (16); + *(.data.tvm); + . = ALIGN(16); + } > DDR + + .text : + { + KEEP(*(.vectors)) + *(.text*) + + KEEP(*(.init)) + KEEP(*(.fini)) + + /* .ctors */ + *crtbegin.o(.ctors) + *crtbegin?.o(.ctors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .ctors) + *(SORT(.ctors.*)) + *(.ctors) + + /* .dtors */ + *crtbegin.o(.dtors) + *crtbegin?.o(.dtors) + *(EXCLUDE_FILE(*crtend?.o *crtend.o) .dtors) + *(SORT(.dtors.*)) + *(.dtors) + + *(.rodata*) + + KEEP(*(.eh_frame*)) + } > ITCM + + .ARM.extab : + { + *(.ARM.extab* .gnu.linkonce.armextab.*) + } > ITCM + + __exidx_start = .; + .ARM.exidx : + { + *(.ARM.exidx* .gnu.linkonce.armexidx.*) + } > ITCM + __exidx_end = .; + + .copy.table : + { + . = ALIGN(4); + __copy_table_start__ = .; + LONG (__etext) + LONG (__data_start__) + LONG (__data_end__ - __data_start__) + /* Add each additional data section here */ + __copy_table_end__ = .; + } > ITCM + + .zero.table : + { + . = ALIGN(4); + __zero_table_start__ = .; + __zero_table_end__ = .; + } > ITCM + + /** + * Location counter can end up 2byte aligned with narrow Thumb code but + * __etext is assumed by startup code to be the LMA of a section in DTCM + * which must be 4byte aligned + */ + __etext = ALIGN (4); + + .sram : + { + . = ALIGN(16); + } > SRAM AT > SRAM + + .data : AT (__etext) + { + __data_start__ = .; + *(vtable) + *(.data) + *(.data.*) + + . = ALIGN(4); + /* preinit data */ + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP(*(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + + . = ALIGN(4); + /* init data */ + PROVIDE_HIDDEN (__init_array_start = .); + KEEP(*(SORT(.init_array.*))) + KEEP(*(.init_array)) + PROVIDE_HIDDEN (__init_array_end = .); + + + . = ALIGN(4); + /* finit data */ + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP(*(SORT(.fini_array.*))) + KEEP(*(.fini_array)) + PROVIDE_HIDDEN (__fini_array_end = .); + + KEEP(*(.jcr*)) + . = ALIGN(4); + /* All data end */ + __data_end__ = .; + + } > DTCM + + .bss.noinit (NOLOAD): + { + . = ALIGN(16); + *(.bss.noinit.*) + . = ALIGN(16); + } > SRAM AT > SRAM + + .bss : + { + . = ALIGN(4); + __bss_start__ = .; + *(.bss) + *(.bss.*) + *(COMMON) + . = ALIGN(4); + __bss_end__ = .; + } > DTCM AT > DTCM + + .data_sram : + { + . = ALIGN(16); + } > DATA_SRAM + + .heap (COPY) : + { + . = ALIGN(8); + __end__ = .; + PROVIDE(end = .); + . = . + __HEAP_SIZE; + . = ALIGN(8); + __HeapLimit = .; + } > DTCM + + .stack (ORIGIN(DTCM) + LENGTH(DTCM) - __STACK_SIZE) (COPY) : + { + . = ALIGN(8); + __StackLimit = .; + . = . + __STACK_SIZE; + . = ALIGN(8); + __StackTop = .; + } > DTCM + PROVIDE(__stack = __StackTop); + + /* Check if data + stack exceeds DTCM limit */ + ASSERT(__StackLimit >= __bss_end__, "region DTCM overflowed with stack") +} diff --git a/deploy/tvm/imgs_words_en/word_10.png b/deploy/tvm/imgs_words_en/word_10.png new file mode 100644 index 0000000000000000000000000000000000000000..07370f757ea83d5e3c5b1f7498b1f95d3aec2d18 Binary files /dev/null and b/deploy/tvm/imgs_words_en/word_10.png differ diff --git a/deploy/tvm/imgs_words_en/word_116.png b/deploy/tvm/imgs_words_en/word_116.png new file mode 100644 index 0000000000000000000000000000000000000000..fd000ff60b0a7a87c8c16eb1409aa0548228e3f9 Binary files /dev/null and b/deploy/tvm/imgs_words_en/word_116.png differ diff --git a/deploy/tvm/include/crt_config.h b/deploy/tvm/include/crt_config.h new file mode 100644 index 0000000000000000000000000000000000000000..4b9ccca02b269b289d029d2b791b37a88743148b --- /dev/null +++ b/deploy/tvm/include/crt_config.h @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#ifndef TVM_RUNTIME_CRT_CONFIG_H_ +#define TVM_RUNTIME_CRT_CONFIG_H_ + +/*! Log level of the CRT runtime */ +#define TVM_CRT_LOG_LEVEL TVM_CRT_LOG_LEVEL_DEBUG + +#endif // TVM_RUNTIME_CRT_CONFIG_H_ diff --git a/deploy/tvm/include/tvm_runtime.h b/deploy/tvm/include/tvm_runtime.h new file mode 100644 index 0000000000000000000000000000000000000000..2b59d9347027c3b6f0af6ced19bdfbdb37a7f940 --- /dev/null +++ b/deploy/tvm/include/tvm_runtime.h @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +void __attribute__((noreturn)) TVMPlatformAbort(tvm_crt_error_t error_code) { + printf("TVMPlatformAbort: %d\n", error_code); + printf("EXITTHESIM\n"); + exit(-1); +} + +tvm_crt_error_t TVMPlatformMemoryAllocate(size_t num_bytes, DLDevice dev, void** out_ptr) { + return kTvmErrorFunctionCallNotImplemented; +} + +tvm_crt_error_t TVMPlatformMemoryFree(void* ptr, DLDevice dev) { + return kTvmErrorFunctionCallNotImplemented; +} + +void TVMLogf(const char* msg, ...) { + va_list args; + va_start(args, msg); + vfprintf(stdout, msg, args); + va_end(args); +} + +TVM_DLL int TVMFuncRegisterGlobal(const char* name, TVMFunctionHandle f, int override) { return 0; } + +#ifdef __cplusplus +} +#endif diff --git a/deploy/tvm/requirements.txt b/deploy/tvm/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..1bf86ed1107cb18bf1ea6ac8eb2cc1214d654b60 --- /dev/null +++ b/deploy/tvm/requirements.txt @@ -0,0 +1,3 @@ +paddlepaddle +numpy +opencv-python \ No newline at end of file diff --git a/deploy/tvm/run_demo.sh b/deploy/tvm/run_demo.sh new file mode 100755 index 0000000000000000000000000000000000000000..dd8e8f163d56899005f6a2811e2ed398b8498fc8 --- /dev/null +++ b/deploy/tvm/run_demo.sh @@ -0,0 +1,152 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +set -e +set -u +set -o pipefail + +# Show usage +function show_usage() { + cat <&2 + show_usage >&2 + exit 1 + fi + ;; + + --ethosu_platform_path) + if [ $# -gt 1 ] + then + export ETHOSU_PLATFORM_PATH="$2" + shift 2 + else + echo 'ERROR: --ethosu_platform_path requires a non-empty argument' >&2 + show_usage >&2 + exit 1 + fi + ;; + + --fvp_path) + if [ $# -gt 1 ] + then + export PATH="$2/models/Linux64_GCC-6.4:$PATH" + shift 2 + else + echo 'ERROR: --fvp_path requires a non-empty argument' >&2 + show_usage >&2 + exit 1 + fi + ;; + + --cmake_path) + if [ $# -gt 1 ] + then + export CMAKE="$2" + shift 2 + else + echo 'ERROR: --cmake_path requires a non-empty argument' >&2 + show_usage >&2 + exit 1 + fi + ;; + + -*|--*) + echo "Error: Unknown flag: $1" >&2 + show_usage >&2 + exit 1 + ;; + esac +done + + +# Directories +script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" + + +# Make build directory +rm -rf build +make cleanall +mkdir -p build +cd build + +wget https://paddleocr.bj.bcebos.com/tvm/ocr_en.tar +tar -xf ocr_en.tar + +# Compile model for Arm(R) Cortex(R)-M55 CPU and CMSIS-NN +# An alternative to using "python3 -m tvm.driver.tvmc" is to call +# "tvmc" directly once TVM has been pip installed. +python3 -m tvm.driver.tvmc compile --target=cmsis-nn,c \ + --target-cmsis-nn-mcpu=cortex-m55 \ + --target-c-mcpu=cortex-m55 \ + --runtime=crt \ + --executor=aot \ + --executor-aot-interface-api=c \ + --executor-aot-unpacked-api=1 \ + --pass-config tir.usmp.enable=1 \ + --pass-config tir.usmp.algorithm=hill_climb \ + --pass-config tir.disable_storage_rewrite=1 \ + --pass-config tir.disable_vectorize=1 ocr_en/inference.pdmodel \ + --output-format=mlf \ + --model-format=paddle \ + --module-name=rec \ + --input-shapes x:[1,3,32,320] \ + --output=rec.tar +tar -xf rec.tar + +# Create C header files +cd .. +python3 ./convert_image.py imgs_words_en/word_116.png + +# Build demo executable +cd ${script_dir} +echo ${script_dir} +make + +# Run demo executable on the FVP +FVP_Corstone_SSE-300_Ethos-U55 -C cpu0.CFGDTCMSZ=15 \ +-C cpu0.CFGITCMSZ=15 -C mps3_board.uart0.out_file=\"-\" -C mps3_board.uart0.shutdown_tag=\"EXITTHESIM\" \ +-C mps3_board.visualisation.disable-visualisation=1 -C mps3_board.telnetterminal0.start_telnet=0 \ +-C mps3_board.telnetterminal1.start_telnet=0 -C mps3_board.telnetterminal2.start_telnet=0 -C mps3_board.telnetterminal5.start_telnet=0 \ +./build/demo \ No newline at end of file diff --git a/deploy/tvm/src/demo_bare_metal.c b/deploy/tvm/src/demo_bare_metal.c new file mode 100644 index 0000000000000000000000000000000000000000..3f5f1bc4b0fd152c37343689db77a1165de0f393 --- /dev/null +++ b/deploy/tvm/src/demo_bare_metal.c @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include +#include +#include + +#include "uart.h" + +// Header files generated by convert_image.py +#include "inputs.h" +#include "outputs.h" + + +int main(int argc, char** argv) { + char dict[]={"#0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~!\"#$%&'()*+,-./ "}; + int char_dict_nums = 97; + uart_init(); + printf("Starting ocr rec inference\n"); + struct tvmgen_rec_outputs rec_outputs = { + .output = output, + }; + struct tvmgen_rec_inputs rec_inputs = { + .x = input, + }; + + tvmgen_rec_run(&rec_inputs, &rec_outputs); + + // post process + int char_nums = output_len / char_dict_nums; + + int last_index = 0; + float score = 0.f; + int count = 0; + + printf("text: "); + for (int i = 0; i < char_nums; i++) { + int argmax_idx = 0; + float max_value = 0.0f; + for (int j = 0; j < char_dict_nums; j++){ + if (output[i * char_dict_nums + j] > max_value){ + max_value = output[i * char_dict_nums + j]; + argmax_idx = j; + } + } + if (argmax_idx > 0 && (!(i > 0 && argmax_idx == last_index))) { + score += max_value; + count += 1; + // printf("%d,%f,%c\n", argmax_idx, max_value, dict[argmax_idx]); + printf("%c", dict[argmax_idx]); + } + last_index = argmax_idx; + } + score /= count; + printf(", score: %f\n", score); + + // The FVP will shut down when it receives "EXITTHESIM" on the UART + printf("EXITTHESIM\n"); + while (1 == 1) + ; + return 0; +}