未验证 提交 54a75ecb 编写于 作者: X xiebaiyuan 提交者: GitHub

remove paddle mobile old project , never say good bye (#4421)

* remove paddle mobile old project.  never say good bye

* test=develop
上级 4f53ecaa

要显示的变更太多。

To preserve performance only 1000 of 1000+ files are displayed.
...@@ -16,12 +16,6 @@ cmake_minimum_required(VERSION 3.0) ...@@ -16,12 +16,6 @@ cmake_minimum_required(VERSION 3.0)
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake") set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(lite_utils) include(lite_utils)
lite_option(WITH_PADDLE_MOBILE "Use the paddle-mobile legacy build" OFF)
if (WITH_PADDLE_MOBILE)
add_subdirectory(mobile)
return()
endif(WITH_PADDLE_MOBILE)
set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
set(CMAKE_CXX_STANDARD 11) set(CMAKE_CXX_STANDARD 11)
......
---
Language: Cpp
BasedOnStyle: Google
Standard: Cpp11
...
Checks: >
*
-android-*
-bugprone-bool-pointer-implicit-conversion
-cert-env33-c
-cert-dcl50-cpp
-cert-dcl59-cpp
-cppcoreguidelines-*
-fuchsia-*
-google-*
google-default-arguments
google-explicit-constructor
google-runtime-member-string-references
google-runtime-operator
-hicpp-braces-around-statements
-hicpp-named-parameter
-hicpp-no-array-decay
-hicpp-no-assembler
-hicpp-no-malloc
-hicpp-function-size
-hicpp-special-member-functions
-hicpp-vararg
-llvm-*
-objc-*
-readability-else-after-return
-readability-implicit-bool-conversion
-readability-named-parameter
-readability-simplify-boolean-expr
-readability-braces-around-statements
-readability-identifier-naming
-readability-function-size
-readability-redundant-member-init
-misc-bool-pointer-implicit-conversion
-misc-definitions-in-headers
-misc-unused-alias-decls
-misc-unused-parameters
-misc-unused-using-decls
-modernize-use-using
-modernize-use-default-member-init
-clang-diagnostic-*
-clang-analyzer-*
WarningsAsErrors: '*'
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
FormatStyle: none
User: allonli
CheckOptions:
- key: google-readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-readability-function-size.StatementThreshold
value: '800'
- key: google-readability-namespace-comments.ShortNamespaceLines
value: '10'
- key: google-readability-namespace-comments.SpacesBeforeComments
value: '2'
- key: modernize-loop-convert.MaxCopySize
value: '16'
- key: modernize-loop-convert.MinConfidence
value: reasonable
- key: modernize-loop-convert.NamingStyle
value: CamelCase
- key: modernize-pass-by-value.IncludeStyle
value: llvm
- key: modernize-replace-auto-ptr.IncludeStyle
value: llvm
- key: modernize-use-nullptr.NullMacros
value: 'NULL'
opencl_kernels.cpp
# Prerequisites
*.d
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.lib
*.a
# Executables
*.exe
*.out
*.app
.DS_Store
build/
.idea/
CMakeCache.txt
CMakeFiles/
Makefile
cmake_install.cmake
*.cbp
paddle-mobile.cbp
.idea
compile_commands.json
cmake-build-debug/
cmake-build-release/
test/models/
test/images/
# Emacs intermediate files
*~
# CMake building directory
build
# clion building directories
cmake-build-debug
cmake-build-release
# ios
tools/libomp.a
# ios demo
demo/ios/PaddleMobileDemo/PaddleMobileDemo/googlenet_combine/
demo/ios/PaddleMobileDemo/PaddleMobileDemo/*.jpg
demo/ios/PaddleMobileDemo/PaddleMobileDemo/PaddleMobile/*.a
*.xcuserstate
/tools/quantification/quantify
# metal
Podfile.lock
metal/Pods/
SwiftProtobuf.framework
paddle-mobile.xcworkspace
metal/models/
metal/images/
*.a
metal/paddle-mobile/paddle-mobile/CPU/libpaddle-mobile.a
*.xcuserdatad/
*/xcuserdata/
/venv/
metal/paddle-mobile-demo/paddle-mobile-demo/images
metal/paddle-mobile-demo/paddle-mobile-demo/models
metal/paddle-mobile-demo/paddle-mobile-demo/Resources
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/images
metal/paddle-mobile-demo/paddle-mobile-demo/Resources/models
metal/MobileNetDemo/MobileNetDemo/Resources
third_party/opencl/OpenCL-Headers
repos:
- repo: https://github.com/Lucas-C/pre-commit-hooks.git
sha: v1.0.1
hooks:
- id: remove-crlf
files: ^(mobile/src/).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$
exclude: ^(lite/)
- id: remove-tabs
files: ^(mobile/test/|mobile/src/).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|cu|h|hpp|hxx)$
exclude: ^(lite/)
- repo: https://github.com/pre-commit/pre-commit-hooks
sha: 5bf6c09bfa1297d3692cadd621ef95f1284e33c0
hooks:
- id: check-added-large-files
exclude: ^(lite/)
- id: check-merge-conflict
exclude: ^(lite/)
- id: check-symlinks
exclude: ^(lite/)
- id: detect-private-key
files: (?!.*tar.gz)^.*$
exclude: ^(lite/)
- id: end-of-file-fixer
files: ^(mobile/test/|mobile/src/).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|h|hpp|hxx)$
exclude: ^(lite/)
- id: trailing-whitespace
files: ^(mobile/test/|mobile/src/).*\.(md|py|mm|swift|java|c|cc|cxx|cpp|h|hpp|hxx)$
exclude: ^(lite/)
- repo: local
hooks:
- id: copyright
name: copyright
entry: python ./mobile/tools/pre-commit.hooks/copyright.hook
language: system
files: ^(mobile/test/|mobile/src/).*\.(c|cc|cxx|cpp|h|hpp|hxx|py)$
exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$ | ^(lite/)
- repo: local
hooks:
- id: clang-format
name: clang-format
description: Format files with ClangFormat.
entry: bash ./mobile/tools/pre-commit.hooks/clang-format.hook -i
language: system
files: ^(mobile/test/|mobile/src/).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
exclude: ^(lite/)
- repo: local
hooks:
- id: cpplint
name: cpplint
description: Check C++ code style using cpplint.
entry: bash ./mobile/tools/pre-commit.hooks/cpplint.hook
language: system
files: ^(mobile/test/|mobile/src/).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
exclude: (?!.*third_party)^.*$ | (?!.*book)^.*$i | *\.pb\.cpp | ^(lite/)
#
#- repo: local
# hooks:
# - id: clang-tidy
# name: clang-tidy
# description: Check C++ code style using clang-tidy.
# entry: bash ./tools/pre-commit.hooks/.clang-tidy.hook -i
# language: system
# files: (src).*\.(c|cc|cxx|cpp|h|hpp|hxx)$
language: cpp
cache: ccache
sudo: required
dist: trusty
os:
- linux
addons:
apt:
packages:
- git
- python
- python-pip
- python2.7-dev
- libc6-i386
- curl
compiler:
- clang
before_install:
- sudo pip install -U virtualenv pre-commit pip
# Download and install recent cmake
script:
- |
function timeout() { perl -e 'alarm shift; exec @ARGV' "$@"; }
- |
timeout 600 .travis/pre-commit-job.sh # 10min timeout
RESULT=$?; if [ $RESULT -eq 0 ] || [ $RESULT -eq 142 ]; then true; else exit 1; fi;
notifications:
email:
on_success: change
on_failure: always
#!/bin/bash
function abort(){
echo "Your change doesn't follow Paddle-Moible's code style" 1>&2
echo "Please use pre-commit to auto-format your code." 1>&2
exit 1
}
trap 'abort' 0
set -e
cd `dirname $0`
cd ..
export PATH=/usr/bin:$PATH
pre-commit install
if ! pre-commit run -a ; then
ls -lh
git diff --exit-code
exit 1
fi
trap : 0
cmake_minimum_required(VERSION 3.0.0)
# basic build option
if(IS_IOS)
option(USE_OPENMP "build with openmp support" OFF)
else()
option(USE_OPENMP "build with openmp support" OFF)
endif()
option(USE_EXCEPTION "build with exception" ON)
option(WITH_LOGGING "print logging for debug" OFF)
option(WITH_SYMBOL "build with all symbols" ON) # turn off if use jni or ios io
option(WITH_PROFILE "print op profile for debug" OFF)
option(WITH_TEST "build with unit tests" ON)
# select platform: CPU, GPU_CL, FPGA
option(CPU "build with arm CPU support" ON)
option(GPU_CL "build with OpenCL support" ON)
option(FPGA "build with FPGA support" OFF)
if(FPGA)
option(FPGAV1 "build with fpga v1 support" ON)
option(FPGAV2 "build with fpga v2 support" OFF)
option(FPGAKD "build with fpga KD support" OFF)
endif()
project(paddle-mobile)
# source code
file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
include_directories(src/)
# build flags
set(CMAKE_CXX_FLAGS "-O3 -s -DNDEBUG ${CMAKE_CXX_FLAGS} -Wno-attributes")
if(IS_IOS)
set(CMAKE_CXX_FLAGS "-mfpu=neon -marm -fobjc-abi-version=2 -fobjc-arc \
-std=gnu++11 -stdlib=libc++ -isysroot ${CMAKE_OSX_SYSROOT} ${CMAKE_CXX_FLAGS}")
add_compile_options(-fembed-bitcode)
else()
set(CMAKE_CXX_FLAGS "-std=c++11 ${CMAKE_CXX_FLAGS}")
endif()
# others
if(USE_OPENMP)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")
add_definitions(-DPADDLE_MOBILE_USE_OPENMP)
endif()
if(WITH_LOGGING)
message(STATUS "Debugging mode")
add_definitions(-DPADDLE_MOBILE_DEBUG)
else()
endif()
if(NOT WITH_SYMBOL)
add_definitions(-fvisibility=hidden -fvisibility-inlines-hidden)
endif()
if(USE_EXCEPTION)
message(STATUS "Use exception")
add_definitions(-DENABLE_EXCEPTION -fexceptions)
else()
add_definitions(-fno-exceptions)
endif()
if(WITH_PROFILE)
add_definitions(-DPADDLE_MOBILE_PROFILE)
endif()
# platform control
if(ARM_LINUX)
include("${CMAKE_CURRENT_LIST_DIR}/tools/arm-platform.cmake")
endif()
if(CPU)
add_definitions(-DPADDLE_MOBILE_CPU)
else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/arm/*.cpp src/operators/kernel/arm/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/kernel/arm/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if (GPU_CL)
add_definitions(-DPADDLE_MOBILE_CL)
# opencl version
add_definitions(-DCL_TARGET_OPENCL_VERSION=220)
if (ANDROID_ABI STREQUAL "arm64-v8a")
link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL-64.so)
else ()
link_libraries(${CMAKE_CURRENT_LIST_DIR}/third_party/opencl/libOpenCL.so)
endif ()
include_directories(third_party/opencl/OpenCL-Headers)
else()
file(GLOB_RECURSE _tmp_list src/framework/cl/*.cpp src/operators/kernel/cl/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/framework/cl/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if(FPGA)
file(GLOB_RECURSE _tmp_list src/operators/math/*.cpp src/operators/math/*.cc src/operators/kernel/fpga/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/math/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
list(APPEND PADDLE_MOBILE_CC src/operators/math/softmax.cpp)
list(APPEND PADDLE_MOBILE_h src/operators/math/softmax.h)
list(APPEND PADDLE_MOBILE_h src/operators/math/math_func_neon.h)
if(FPGAV1)
add_definitions(-DPADDLE_MOBILE_FPGA)
message("FPGA_V1 enabled")
add_definitions(-DPADDLE_MOBILE_FPGA_V1)
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V2/*.cpp src/fpga/V2/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V2/*.h src/fpga/V2/*.h)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/KD/*.cpp src/fpga/KD/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/KD/*.h src/operators/kernel/fpga/KD/*.hpp
src/fpga/KD/*.h src/fpga/KD/*.hpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if(FPGAV2)
add_definitions(-DPADDLE_MOBILE_FPGA)
message("FPGA_V2 enabled")
add_definitions(-DPADDLE_MOBILE_FPGA_V2)
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V1/*.cpp src/fpga/V1/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V1/*.h src/fpga/V1/*.h)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/KD/*.cpp src/fpga/KD/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/KD/*.h src/operators/kernel/fpga/KD/*.hpp
src/fpga/KD/*.h src/fpga/KD/*.hpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if(FPGAKD)
message("FPGAKD enabled")
add_definitions(-DPADDLE_MOBILE_FPGA_KD)
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V1/*.cpp src/fpga/V1/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V1/*.h src/fpga/V1/*.h)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V2/*.cpp src/fpga/V2/*.cpp)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/V2/*.h src/fpga/V2/*.h)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/central-arm-func/*.h)
foreach(f ${_tmp_list})
list(APPEND PADDLE_MOBILE_H ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/operators/kernel/central-arm-func/*.cpp)
foreach(f ${_tmp_list})
list(APPEND PADDLE_MOBILE_CC ${f})
endforeach()
endif()
else()
file(GLOB_RECURSE _tmp_list src/operators/kernel/fpga/*.cpp src/operators/kernel/fpga/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/operators/kernel/fpga/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
file(GLOB_RECURSE _tmp_list src/fpga/*.cpp src/fpga/*.cc)
foreach(f ${_tmp_list})
list(REMOVE_ITEM PADDLE_MOBILE_CC ${f})
endforeach()
file(GLOB_RECURSE _tmp_list_h src/fpga/*.h)
foreach(f ${_tmp_list_h})
list(REMOVE_ITEM PADDLE_MOBILE_H ${f})
endforeach()
endif()
if(ANDROID_NDK_TOOLCHAIN_INCLUDED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -llog")
else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/io/jni/paddle_mobile_jni.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/io/jni/paddle_mobile_jni.cpp)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/operators/math/math_func_neon.h)
endif()
if(IS_IOS)
else()
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/io/ios_io/PaddleMobileCPU.h)
list(REMOVE_ITEM PADDLE_MOBILE_CC ${CMAKE_CURRENT_SOURCE_DIR}/src/io/ios_io/PaddleMobileCPU.mm)
list(REMOVE_ITEM PADDLE_MOBILE_H ${CMAKE_CURRENT_SOURCE_DIR}/src/io/ios_io/op_symbols.h)
endif ()
set(CMAKE_VERBOSE_MAKEFILE ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY build)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY build)
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY build)
# NET default
if(FPGAV1)
set(NET "FPGA_NET_V1" CACHE STRING "select net type")
elseif(FPGAV2)
set(NET "FPGA_NET_V2" CACHE STRING "select net type")
elseif(FPGAKD)
set(NET "FPGA_OPS_KD" CACHE STRING "select net type")
else()
set(NET "default" CACHE STRING "select net type")
endif()
set_property(CACHE NET PROPERTY STRINGS "default" "googlenet" "mobilenet" "yolo" "squeezenet" "FPGA_NET_V1" "FPGA_NET_V2" "NLP" "op")
include("${CMAKE_CURRENT_LIST_DIR}/tools/op.cmake")
# build library
if(ANDROID_NDK_TOOLCHAIN_INCLUDED)
list(REMOVE_DUPLICATES CMAKE_CXX_FLAGS)
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
elseif(IS_IOS)
if(USE_OPENMP)
add_library(paddle-mobile-stage0 STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
add_custom_target(paddle-mobile ALL
COMMAND libtool -static -o ${CMAKE_BINARY_DIR}/libpaddle-mobile.a ${CMAKE_CURRENT_LIST_DIR}/tools/libomp.a $<TARGET_FILE:paddle-mobile-stage0>
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
DEPENDS paddle-mobile
)
add_dependencies(paddle-mobile paddle-mobile-stage0)
else()
add_library(paddle-mobile STATIC ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
endif()
else()
add_library(paddle-mobile SHARED ${PADDLE_MOBILE_CC} ${PADDLE_MOBILE_H})
endif()
# unit test
if(WITH_TEST AND WITH_SYMBOL)
if(IS_IOS)
else()
add_subdirectory(test)
endif()
elseif(FPGA)
add_subdirectory(test)
endif()
# # if you want to combine third party static librares into paddle mobile so, please uncomment this code block
# target_link_libraries(
# paddle-mobile
# -Wl,--whole-archive
# "path_to_third_party_static_library"
# -Wl,--no-whole-archive
# )
# 贡献代码
欢迎您对Paddle-Mobile项目的贡献。
我们诚挚的感谢你的贡献,这个文档描述了我们的工作方式和工作流程。Paddle-Mobile在PaddlePaddle org下,和服务器版本的Paddle工程的代码规范基本相同,开发者也可以同时参考Paddle的相关文档。
## Workflow
Paddle-Mobile 开发中使用到的几种模型在这个链接下载 [点我](https://mms-mis.cdn.bcebos.com/paddle-mobile/models.zip).
之后是贡献代码的主要流程。
### Fork
* Paddle-Mobile采用Pull Request的方式提交代码,禁止直接push,所有的代码都需要人工review。首先要fork一份Paddle-Moble的代码 ["Fork" button](https://help.github.com/articles/fork-a-repo/).
* 跳转到[Paddle-Mobile](https://github.com/PaddlePaddle/paddle-mobile) GitHub首页,然后单击 `Fork` 按钮,生成自己目录下的仓库,比如 <https://github.com/你的用户名/paddle-mobile>
### Clone(克隆)
将远程仓库 clone 到本地:
```bash
➜ git clone https://github.com/你的用户名/paddle-mobile
cd Paddle
```
### 创建本地分支
Paddle-Mobile 和Paddle一样,目前使用[Git流分支模型](http://nvie.com/posts/a-successful-git-branching-model/)进行开发,测试,发行和维护,具体请参考 [Paddle 分支规范](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/design/releasing_process.md#paddle-分支规范)
所有的 feature 和 bug fix 的开发工作都应该在一个新的分支上完成,一般从 `develop` 分支上创建新分支。
使用 `git checkout -b` 创建并切换到新分支。
```bash
➜ git checkout -b my-cool-stuff
```
值得注意的是,在 checkout 之前,需要保持当前分支目录 clean,否则会把 untracked 的文件也带到新分支上,这可以通过 `git status` 查看。
### 使用 `pre-commit` 钩子
Paddle 开发人员使用 [pre-commit](http://pre-commit.com/) 工具来管理 Git 预提交钩子。 它可以帮助我们格式化源代码(C++,Python),在提交(commit)前自动检查一些基本事宜(如每个文件只有一个 EOL,Git 中不要添加大文件等)。
`pre-commit`测试是 Travis-CI 中单元测试的一部分,不满足钩子的 PR 不能被提交到 Paddle,首先安装并在当前目录运行它:
```bash
pip install pre-commit
pre-commit -v -a
```
Paddle-Mobile 使用 `clang-format` 来调整 C/C++ 源代码格式,在格式化代码时不同的`clang-format`版本会有不同的表现形态,和Paddle不同的是,Paddle-Mobile开发人员使用的是更的5.0版本的llvm工具集。所以为了防止无法CI,请确保 `clang-format` 版本是 5.0 版本。
> 另外:通过`pip install pre-commit`和`conda install -c conda-forge pre-commit`安装的`yapf`稍有不同的,Paddle 开发人员使用的是`pip install pre-commit`。
## 开始开发
在本例中,我删除了 README.md 中的一行,并创建了一个新文件。
通过 `git status` 查看当前状态,这会提示当前目录的一些变化,同时也可以通过 `git diff` 查看文件具体被修改的内容。
```bash
➜ git status
On branch test
Changes not staged for commit:
(use "git add <file>..." to update what will be committed)
(use "git checkout -- <file>..." to discard changes in working directory)
modified: README.md
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
no changes added to commit (use "git add" and/or "git commit -a")
```
## 构建
paddle-mobile是为了移动端版本开发的,而移动端大多以arm平台为主。所以我们要交叉编译到arm平台。以cpu为例:
1. 安装NDK最新版
2. 配置ANDROID_NDK和NDK_ROOT环境变量
3. 开发,并写单元测试
4. sh build.sh
## 提交(commit)
接下来我们取消对 README.md 文件的改变,然后提交新添加的 test 文件。
```bash
➜ git checkout -- README.md
➜ git status
On branch test
Untracked files:
(use "git add <file>..." to include in what will be committed)
test
nothing added to commit but untracked files present (use "git add" to track)
➜ git add test
```
Git 每次提交代码,都需要写提交说明,这可以让其他人知道这次提交做了哪些改变,这可以通过`git commit` 完成。
```bash
▶ pre-commit run -a -v
[remove-crlf] CRLF end-lines remover........................................Passed
[remove-tabs] Tabs remover..................................................Passed
[check-added-large-files] Check for added large files.......................Passed
[check-merge-conflict] Check for merge conflicts............................Passed
[check-symlinks] Check for broken symlinks..................................Passed
[detect-private-key] Detect Private Key.....................................Passed
[end-of-file-fixer] Fix End of Files........................................Passed
[trailing-whitespace] Trim Trailing Whitespace..............................Passed
[copyright] copyright.......................................................Passed
[clang-format] clang-format.................................................Passed
[cpplint] cpplint...........................................................Passed
hookid: cpplint
Ignoring build_bak.sh; not a valid file name (c, cc, h, hpp, c++, h++, cu, cpp, hxx, cxx, cuh)
Done processing build_bak.sh
Ignoring build_bak.sh; not a valid file name (c, cc, h, hpp, c++, h++, cu, cpp, hxx, cxx, cuh)
Done processing build_bak.sh
```
## 保持本地仓库最新
在准备发起 Pull Request 之前,需要同步原仓库(<https://github.com/PaddlePaddle/paddle-mobile>)最新的代码。
首先通过 `git remote` 查看当前远程仓库的名字。
```bash
➜ git remote
origin
➜ git remote -v
origin https://github.com/USERNAME/paddle-mobile (fetch)
origin https://github.com/USERNAME/paddle-mobile (push)
```
这里 origin 是我们 clone 的远程仓库的名字,也就是自己用户名下的 paddle-mobile,接下来我们创建一个原始 paddle-mobile 仓库的远程主机,命名为 upstream。
```bash
➜ git remote add upstream https://github.com/PaddlePaddle/paddle-mobile
➜ git remote
origin
upstream
```
获取 upstream 的最新代码并更新当前分支。
```bash
➜ git fetch upstream
➜ git pull upstream develop
```
## Push 到远程仓库
将本地的修改推送到 GitHub 上,也就是 https://github.com/USERNAME/paddle-mobile。
```bash
# 推送到远程仓库 origin 的 my-cool-stuff 分支上
➜ git push origin my-cool-stuff
```
## 建立 Issue 并完成 Pull Request
建立一个 Issue 描述问题,并记录它的编号。
切换到所建分支,然后点击 `New pull request`
在 PR 的描述说明中,填写 `resolve #Issue编号` 可以在这个 PR 被 merge 后,自动关闭对应的 Issue
> 具体请见 <https://help.github.com/articles/closing-issues-via-commit-messages/>
## review
在接到PR后,可以看到该pr页面内正在运行CI。如果运行出现问题,可以点Details进入Travis平台上看详细内容。
![](http://otkwwi4x8.bkt.clouddn.com/2018-06-20-15294833030073.jpg)
可以在travis上看到更加详细的信息。
![](http://otkwwi4x8.bkt.clouddn.com/2018-06-20-15294833651326.jpg)
接下来等待 review,如果有需要修改的地方,参照上述步骤更新 origin 中的对应分支即可。
![](http://otkwwi4x8.bkt.clouddn.com/2018-06-20-15294877166787.jpg)
之后就可以提交代码了
## 删除远程分支
在 PR 被 merge 进主仓库后,我们可以在 PR 的页面删除远程仓库的分支。
<img width="775" alt="screen shot 2017-04-26 at 9 18 24 pm" src="https://cloud.githubusercontent.com/assets/11692045/25436457/e4cdd472-2ac5-11e7-9272-badc76c4a23e.png">
也可以使用 `git push origin :分支名` 删除远程分支,如:
```bash
➜ git push origin :my-cool-stuff
```
## 删除本地分支
最后,删除本地分支。
```bash
# 切换到 develop 分支
➜ git checkout develop
# 删除 my-cool-stuff 分支
➜ git branch -D my-cool-stuff
```
至此,我们就完成了一次代码贡献的过程。
## 提交代码的一些约定
为了使评审人在评审代码时更好地专注于代码本身,请您每次提交代码时,遵守以下约定:
1. 请保证Travis-CI 中单元测试能顺利通过。如果没过,说明提交的代码存在问题,评审人一般不做评审。
2. 提交Pull Request前:
- 请注意commit的数量:
- 原因:如果仅仅修改一个文件但提交了十几个commit,每个commit只做了少量的修改,这会给评审人带来很大困扰。评审人需要逐一查看每个commit才能知道做了哪些修改,且不排除commit之间的修改存在相互覆盖的情况。
- 建议:每次提交时,保持尽量少的commit,可以通过`git commit --amend`补充上次的commit。对已经Push到远程仓库的多个commit,可以参考[squash commits after push](http://stackoverflow.com/questions/5667884/how-to-squash-commits-in-git-after-they-have-been-pushed)
- 请注意每个commit的名称:应能反映当前commit的内容,不能太随意。
3. 如果解决了某个Issue的问题,请在该Pull Request的**第一个**评论框中加上:`fix #issue_number`,这样当该Pull Request被合并后,会自动关闭对应的Issue。关键词包括:close, closes, closed, fix, fixes, fixed, resolve, resolves, resolved,请选择合适的词汇。详细可参考[Closing issues via commit messages](https://help.github.com/articles/closing-issues-via-commit-messages)
此外,在回复评审人意见时,请您遵守以下约定:
1. 评审人的每个意见都必须回复(这是开源社区的基本礼貌,别人帮了忙,应该说谢谢):
- 对评审意见同意且按其修改完的,给个简单的`Done`即可;
- 对评审意见不同意的,请给出您自己的反驳理由。
2. 如果评审意见比较多:
- 请给出总体的修改情况。
- 请采用[start a review](https://help.github.com/articles/reviewing-proposed-changes-in-a-pull-request/)进行回复,而非直接回复的方式。原因是每个回复都会发送一封邮件,会造成邮件灾难。
FROM ubuntu:16.04
RUN echo '\
deb <mirror> <version> main restricted universe multiverse\n\
deb <mirror> <version>-updates main restricted universe multiverse\n\
deb <mirror> <version>-backports main restricted universe multiverse\n\
deb <mirror> <version>-security main restricted universe multiverse\n'\
> /etc/apt/sources.list
RUN sed -ie 's|<mirror>|http://mirrors.tuna.tsinghua.edu.cn/ubuntu/|' /etc/apt/sources.list
RUN sed -ie 's|<version>|xenial|' /etc/apt/sources.list
RUN apt-get update && apt-get upgrade -y
RUN apt-get install -y --no-install-recommends \
curl \
unzip \
git \
make \
cmake-curses-gui \
python \
python-pip \
python-setuptools \
clang-format-5.0 \
graphviz \
g++-arm-linux-gnueabi \
gcc-arm-linux-gnueabi
RUN apt-get autoremove -y && apt-get clean
RUN ln -s clang-format-5.0 /usr/bin/clang-format
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
RUN cd /tmp && curl -O https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
RUN curl -O https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
mv /usr/bin/cmake /usr/bin/cmake.bak && ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
mv /usr/bin/ccmake /usr/bin/ccmake.bak && ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
RUN cd /opt && unzip /tmp/android-ndk-r17c-linux-x86_64.zip
ENV NDK_ROOT /opt/android-ndk-r17c
Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
# Paddle-Mobile
[![Build Status](https://travis-ci.org/PaddlePaddle/paddle-mobile.svg?branch=develop&longCache=true&style=flat-square)](https://travis-ci.org/PaddlePaddle/paddle-mobile)
[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
<!--[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle-Mobile.svg)](https://github.com/PaddlePaddle/Paddle-Mobile/releases)
[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)-->
Welcome to Paddle-Mobile GitHub project。Paddle-Mobile is a project of PaddlePaddle as well as a deep learning framework for embedded platforms.
欢迎来到 Paddle-Mobile GitHub 项目。Paddle-Mobile是PaddlePaddle组织下的项目,是一个致力于嵌入式平台的深度学习的框架。
## Features
- high performance in support of ARM CPU
- support Mali GPU
- support Andreno GPU
- support the realization of GPU Metal on Apple devices
- support implementation on ZU5、ZU9 and other FPGA-based development boards
- support implementation on Raspberry Pi and other arm-linux development boards
## Features
- 高性能支持ARM CPU
- 支持Mali GPU
- 支持Andreno GPU
- 支持苹果设备的GPU Metal实现
- 支持ZU5、ZU9等FPGA开发板
- 支持树莓派等arm-linux开发板
## Demo
- [ANDROID](https://github.com/xiebaiyuan/paddle-mobile-demo)
### 原Domo目录
[https://github.com/PaddlePaddle/Paddle-Lite/tree/develop/mobile/demo](https://github.com/PaddlePaddle/Paddle-Lite/tree/develop/mobile/demo)
## Documentation
### Documentation of design
If you want to know more details about the documentation of paddle-mobile design, please refer to the link as follows. There are many previous designs and discussion: [issue](https://github.com/PaddlePaddle/Paddle-Lite/issues).
[link of documentation of design](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/design_doc.md)
### Documentation of development
Documentation of development is mainly about building, running and other tasks.As a developer,you can use it with the help of contributed documents.
* [iOS](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_ios.md)
* [Android_CPU](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_android.md)
* [Android_GPU](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_android_GPU.md)
* [FPGA](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_fpga.md)
* [ARM_LINUX](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_arm_linux.md)
### How to contribute your documents
- [tutorial link to contribute documents](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/CONTRIBUTING.md)
- Main procedure of contributing code is covered in the document above.If you have other problems during the procedure,please send them as [issue](https://github.com/PaddlePaddle/Paddle-Lite/issues). We will deal with it as quickly as possible.
## 文档
### 设计文档
关于paddle-mobile设计文档在下面链接中,如果想了解更多内容。[issue](https://github.com/PaddlePaddle/Paddle-Lite/issues)中会有很多早期的设计和讨论过程。
[设计文档链接](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/design_doc.md)
### 开发文档
开发文档主要是关于编译、运行等问题。做为开发者,它可以和贡献文档共同结合使用。
* [iOS](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_ios.md)
* [Android_CPU](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_android.md)
* [Android_GPU](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_android_GPU.md)
* [FPGA](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_fpga.md)
* [ARM_LINUX](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/doc/development_arm_linux.md)
### 贡献文档
- [贡献文档链接](https://github.com/PaddlePaddle/Paddle-Lite/blob/develop/mobile/CONTRIBUTING.md)
- 上面文档中涵盖了主要的贡献代码流程,如果在实践中您还遇到了其他问题,可以发[issue](https://github.com/PaddlePaddle/Paddle-Lite/issues)。我们看到后会尽快处理。
## Acquision of Models
At present Paddle-Mobile only supports Paddle fluid training model. Models wiil be operated regularly after transformation if you have various models.
### 1. Use Paddle Fluid directly to train
It is the most reliable method to be recommanded
### 2. Transform Caffe to Paddle Fluid model
[caffe2fluid](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/caffe2fluid)
### 3. ONNX
ONNX is expanded as Open Neural Network Exchange. The project is aimed to make a full communication and usage among diffrent nerual network development frameworks.
Except for directly using fluid models trained by PaddlePaddle,you can also get certain Paddle fluid models through onnx transformation.
At present,work in support of onnx is also under operation in Baidu. Related tranformation project can be referred to here:
[https://github.com/PaddlePaddle/paddle-onnx](https://github.com/PaddlePaddle/paddle-onnx)
### 4. Download parts of testing models and testing pictures
[http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)
- input data generated by tools from `tools/python/imagetools`.
## 模型获得
目前Paddle-Mobile仅支持Paddle fluid训练的模型。如果你手中的模型是不同种类的模型,需要进行模型转换才可以运行。
### 1. 直接使用Paddle Fluid训练
该方式最为可靠,推荐方式
### 2. caffe转为Paddle Fluid模型
[caffe2fluid](https://github.com/PaddlePaddle/models/tree/develop/PaddleCV/caffe2fluid)
### 3. ONNX
ONNX全称为“Open Neural Network Exchange”,即“开放的神经网络切换”。该项目的目的是让不同的神经网络开发框架做到互通互用。
除直接使用PaddlePaddle训练fluid版本的模型外,还可以通过onnx转换得到个别Paddle fluid模型。
目前,百度也在做onnx支持工作。相关转换项目在这里:
[https://github.com/PaddlePaddle/paddle-onnx](https://github.com/PaddlePaddle/paddle-onnx)
### 4. 部分测试模型和测试图片下载
[http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip](http://mms-graph.bj.bcebos.com/paddle-mobile%2FmodelsAndImages.zip)
- 测试输入数据可由本仓库下的脚本`tools/python/imagetools`生成。
## Communication
- [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): bug reports, feature requests, install issues, usage issues, etc.
- QQ discussion group: 696965088 (Paddle-Mobile).
- [Forums](http://ai.baidu.com/forum/topic/list/168?pageNo=1): discuss implementations, research, etc.
## 交流与反馈
- 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)来提交问题、报告与建议
- QQ群: 696965088 (Paddle-Mobile)
- [论坛](http://ai.baidu.com/forum/topic/list/168): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验, 营造良好的论坛氛围
## Old version Mobile-Deep-Learning
Original MDL(Mobile-Deep-Learning) project has been transferred to [Mobile-Deep-Learning](https://github.com/allonli/mobile-deep-learning)
## 旧版 Mobile-Deep-Learning
原MDL(Mobile-Deep-Learning)工程被迁移到了这里 [Mobile-Deep-Learning](https://github.com/allonli/mobile-deep-learning)
## Copyright and License
[Apache-2.0 license](LICENSE).
|mobilenet arm v7|1线程|2线程|4线程|
|------------|----|-----|-----|
|麒麟970(ms)|108.180|63.935|37.545|
|麒麟960(ms)|108.588|63.073|36.822|
|高通845(ms)|85.952|48.890|28.641|
|高通835(ms)|105.434|62.752|37.131|
|||||
|mobilenetssd arm v7|1线程|2线程|4线程|
|麒麟970(ms)|212.686|127.205|77.485|
|麒麟960(ms)|212.641|125.338|75.250|
|高通845(ms)|182.863|95.671|56.857|
|高通835(ms)|213.849|127.717|77.006|
|||||
|googlenet(v1) arm v7|1线程|2线程|4线程|
|麒麟970(ms)|335.288|234.559|161.295|
|麒麟960(ms)|354.443|232.642|157.815|
|高通845(ms)|282.007|173.146|122.148|
|高通835(ms)|341.250|233.354|158.554|
|||||
|squeezenet arm v7|1线程|2线程|4线程|
|麒麟970(ms)|83.726|57.944|36.923|
|麒麟960(ms)|85.835|55.762|36.496|
|高通845(ms)|71.301|41.618|28.785|
|高通835(ms)|82.407|56.176|36.455|
|||||
|yolo arm v7|1线程|2线程|4线程|
|麒麟970(ms)|129.658|79.993|49.969|
|麒麟960(ms)|130.208|78.791|48.390|
|高通845(ms)|109.244|61.736|40.600|
|高通835(ms)|130.402|80.863|50.359|
测试机型信息:
麒麟970:荣耀v10 (2.36GHz * 4 + 1.8GHz * 4)
麒麟960:华为mate9 (2.36GHz * 4 + 1.8GHz * 4)
骁龙835:小米6 (2.45GHz * 4 + 1.9GHz * 4)
骁龙845:OPPO FindX (2.80GHz * 4 + 1.8GHz * 4)
|mobilenetfssd|速度|
|------------|-----|
|A9(ms)|33.78|
|A10(ms)|24.05|
|A11(ms)|17.15|
|||
|genet|速度|
|A9(ms) |3.49|
|A10(ms)|2.54|
|A11(ms)|1.43|
## Demo 下载路径
- [ANDROID](http://mms-graph.bj.bcebos.com/paddle-mobile%2FPaddleMobile_Android.zip)
- [IOS](http://mms-graph.bj.bcebos.com/paddle-mobile%2FPaddleMobileDemo_iOS.zip)
- 原demo亦可使用getDemo.sh进行下载
```
sh getDemo.sh
```
#!/usr/bin/env bash
wget http://mms-graph.bj.bcebos.com/paddle-mobile%2FPaddleMobile_Android.zip
wget http://mms-graph.bj.bcebos.com/paddle-mobile%2FPaddleMobileDemo_iOS.zip
unzip paddle-mobile%2FPaddleMobile_Android.zip
unzip paddle-mobile%2FPaddleMobileDemo_iOS.zip
rm -rf paddle-mobile%2FPaddleMobile_Android.zip
rm -rf paddle-mobile%2FPaddleMobileDemo_iOS.zip
rm -rf __MACOSX
# 环境搭建
## 使用 docker
### 1. 安装 docker
安装 docker 的方式,参考官方文档 [https://docs.docker.com/install/](https://docs.docker.com/install/)
### 2. 使用 docker 搭建构建环境
首先进入 paddle-mobile 的目录下,执行 `docker build`
以 Linux/Mac 为例 (windows 建议在 'Docker Quickstart Terminal' 中执行)
```
$ docker build -t paddle-mobile:dev - < Dockerfile
```
使用 `docker images` 可以看到我们新建的 image
```
$ docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
paddle-mobile dev 33b146787711 45 hours ago 372MB
```
### 3. 使用 docker 构建
进入 paddle-mobile 目录,执行 docker run
```
$ docker run -it --mount type=bind,source=$PWD,target=/paddle-mobile paddle-mobile:dev
root@5affd29d4fc5:/ # cd /paddle-mobile
###
### paddle-mobile 支持 arm 架构下的各种平台,包括 android 以及 linux 等,可以使用不同的
### toolchain 文件生成满足需要的 makefile
###
# 生成构建 android 产出的 Makefile
root@5affd29d4fc5:/ # rm CMakeCache.txt
root@5affd29d4fc5:/ # cmake -DCMAKE_TOOLCHAIN_FILE=tools/toolchains/arm-android-neon.cmake
# 生成构建 linux 产出的 Makefile
root@5affd29d4fc5:/ # rm CMakeCache.txt
root@5affd29d4fc5:/ # cmake -DCMAKE_TOOLCHAIN_FILE=tools/toolchains/arm-linux-gnueabi.cmake
```
### 4. 设置编译选项
可以通过 ccmake 设置编译选项
```
root@5affd29d4fc5:/ # ccmake .
Page 1 of 1
CMAKE_ASM_FLAGS
CMAKE_ASM_FLAGS_DEBUG
CMAKE_ASM_FLAGS_RELEASE
CMAKE_BUILD_TYPE
CMAKE_INSTALL_PREFIX /usr/local
CMAKE_TOOLCHAIN_FILE /paddle-mobile/tools/toolchains/arm-android-neon.cmake
CPU ON
DEBUGING ON
FPGA OFF
LOG_PROFILE ON
NET googlenet
USE_EXCEPTION ON
USE_OPENMP OFF
```
修改选项后,按 `c`, `g` 更新 Makefile
### 5. 构建
使用 make 命令进行构建
```
root@5affd29d4fc5:/ # make
```
### 6. 查看构建产出
构架产出可以在 host 机器上查看,在 paddle-mobile 的目录下,build 以及 test/build 下,可以使用 adb 指令或者 scp 传输到 device 上执行
## 不使用 docker
不使用 docker 的方法,可以直接用 cmake 生成 makefile 后构建。使用 ndk 构建 android 应用需要正确设置 NDK_ROOT。构建 linux 应用需要安装 arm-linux-gnueabi-gcc 或者类似的交叉编译工具,可能需要设置 CC,CXX 环境变量,或者在 tools/toolchains/ 中修改 arm-linux-gnueabi.cmake,或者增加自己需要的 toolchain file。
# paddle-mobile 设计文档
#### 以下是 paddle-mobile 代码的执行流程图:
![执行流程图](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/flow_chart.png)
#### 主要分为: Loader 模块、 Program 模块、 Executor 模块、 op 模块、 kernel 模块、scope variable Tensor 模块
#### 下面展开说一下各个模块的作用以及设计思路
### 一. Loader
先来看一下模型, 模型分为两种结构:
一种为参数文件是散开的, 如下图, 红框为模型结构的 protobuf 文件, 其余为参数文件
![模型描述](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc.png)
另一种为参数文件结合在一起的, 如下图, 红框内为模型结构描述的 protobuf 文件, 另一个文件为结合在一起的参数文件
![模型描述combined](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/model_desc_combined.png)
loader 模块的作用是将模型结构信息 load 进内存, 将红框内的 protobuf 文件 load 进内存, 并对模型结构进行优化(如将几个细粒度的 op 融合成 粗粒度的 op, 如将 conv、 add、 batchnorm、 relu 融合为 conv\_add\_batchnorm\_relu).
方便进行算法优化.
__那么为什么融合在一起能够做算法优化 ?__
如果未融合的 conv add batchnorm relu 运算是这样的
```
[n]
[conv_res] = conv([n])
for &res in conv_res {
res = add_biase(res)
}
for &res in conv_res {
res = batchnorm(res)
}
for &res in conv_res {
res = relu(res)
}
```
融合后的 conv\_add\_batchnorm\_relu 运算是这样的:
```
[n]
[conv_res] = conv([n])
for &res in conv_res {
res = relu(batchnorm(add_biase(res)))
}
```
由于 conv 可以转换为两个大矩阵相乘, 更进一步可以分为若干个一行一列的小矩阵相乘, 那最终的运算是这样的:
```
[n]
for &res in [res] {
res = relu(batchnorm(add_biase(A * B)))
}
其中 A 和 B 为 1 * k 和 k * 1 矩阵
```
### 二. Program
program 为 loader 模块的结果, 包含了优化前的模型结构对象, 以及优化后的模型结构对象, 此模块基本对应着 paddle 模型的结构, 关于paddle 模型的一些概念的定义, 详细设计可以参考 [program.md](https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/program.md), 以下是一个简单的概况:
* programDesc 中包含着若干个(googlenet mobilenet yolo squeezenet resnet 常见的模型只有一个)可以嵌套的 block, blocks中的第一个block中的某个 op 可能会执行 blocks 中后边 block 中的一系列 op 运算(只有多个block才会有此概念)
* block 包含着 ops 和 vars
* ops 为一系列 op 的描述, 描述着每个 op 的类型, 输入输出, 所需参数
* vars 里包含的为所有 op 运算所需的参数描述
### 三. Executor
executor 主要是用于 op 运算的上层调度操作, 主要有两个操作, executor 实例化 和 暴露给上层的 predict 方法
* executor 实例化过程中, 主要进行了这几个操作
1. 根据 loader 产出的 program 初始化 operator 对象
2. 分配所有需要用到的内存, 包括每个op 的输入输出, 权重参数, 目前模型的权重参数文件的内存格式为 NCHW, op 的输入输出中间矩阵参数也是 NCHW 格式
3. 调用每个 op 的 init 方法, init 方法是每个 op 实现者进行参数预处理的地方, 有助于减少 predict 的耗时
* predict, 主要用于拿到外部的输入, 顺序调用 op 的 run 方法进行运算, 并返回最终的结果.
### 四. op
关于 op 模块代码的详细设计可以参考 [operator部分代码设计](https://github.com/PaddlePaddle/paddle-mobile/issues/300), operator主要包含一个kernel用于运算、一个 param 用于存储属性, operator 主要有三个操作, Init、RunImp、InferShape
* Init: Init 函数主要用于参数预处理, 如对 batchNorm 参数进行预处理, 可以将 batchNorm 运算转化为 a * x + b 形式的运算, 这个函数也会调用, kernel 的 Init 函数对 kernel 进行初始化
* RunImp: RunImp 函数会调用自己的kernel 的 compute 方法进行运算
* InferShape: InferShape 函数会根据输入和参数得出输出的形状, 这个函数会在 executor 实例化时, 内存初始化前调用
每个 operator 都需要进行注册才可以被使用, 以 conv 为例, 需在 conv_op.cpp 底部这样写:
```c++
// 三个平台都注册了 conv op
namespace ops = paddle_mobile::operators;
#ifdef PADDLE_MOBILE_CPU
USE_OP_CPU(conv2d);
REGISTER_OPERATOR_CPU(conv2d, ops::ConvOp);
#endif
#ifdef PADDLE_MOBILE_FPGA
USE_OP_FPGA(conv2d);
REGISTER_OPERATOR_FPGA(conv2d, ops::ConvOp);
#endif
```
__一个关于包大小的优化__:
每个 operator 都由一个宏控制编译, 如 conv_op.h(除了 conv_op.h , conv_op.cpp、conv_kernle.h、conv_kernle.cpp 也都需要加此宏控制)
```c++
#ifdef CONV_OP //这个宏控制着 conv_op 是否被编译, 除了 conv_op.h , conv_op.cpp、conv_kernle.h conv_kernle.cpp 也都需要加此宏控制
#pragma once
#include <string>
#include "framework/operator.h"
#include "operators/kernel/conv_kernel.h"
namespace paddle_mobile {
namespace operators {
using std::string;
template <typename DeviceType, typename T>
class ConvOp
//impl
};
} // namespace operators
} // namespace paddle_mobile
#endif
```
这样做的目的是为了根据不同类型的网络编译特定的op, 在 cmake 中已经配置好不同网络编译的宏, 如果你要进行编译支持 yolo 的模型, 仅需执行:
```sh
cd toools
sh build.sh android yolo
```
这样只会编译 yolo 所包含的四种 op, 极大的减小了包体积和编译时间
### 五. kernel
kernel 为 op 的底层运算实现, 主要有两个函数, Init 和 Compute, 分别用来初始化、预处理 和 运算操作, 值得提出的是, kernel 会根据泛型特化到不同的平台, 如图所示:
![设备特化](http://mms-graph.bj.bcebos.com/paddle-mobile/git_images/devices.png)
不同平台的 kernel 实现, 为同一个 kernel 类不同泛型的特化实现, 目前有三个平台, arm、mali、fpga, 图中的 central-arm-func\ 目录为 op kernel 的 arm 实现, 它承担了 arm\ 目录下 kernel 的底层实现, 同时 arm 处理器作为中央处理器, central-arm-func\ 也可以作为其他协处理器的底层实现, 如: fpga 的某一个 op kernel 还没有 fpga 协处理器的实现, 就可以直接调用使用这里的 arm 实现.
__如果你有兴趣新增一个协处理器实现, 就可以在次添加一个 kernel 目录, 提供协处理器实现, 如果某个 kernel 你没有实现完, 你也可以直接使用 arm 实现__
### 六. scope variable Tensor
* scope 用来存储管理所需用到的所有 variable(用来存储不同类型的对象, 主要是矩阵Tensor, 也就是说 scpoe 管理着 op 运算过程中所有参数矩阵, 输入输出矩阵), 可以将 scope 理解为一个 map, 这里在 map 上封了一层 scope 的概念是为了方便内存管理
* variable 可以用来存储不同类型的对象, paddle-mobile 里主要用它来存储矩阵 Tensor
* tensor 代表着矩阵, 通过泛型可以用来存储不同类型的矩阵, 但需要注意的是, 存入和取出时的类型必须保持一致, 如果类型不一致, 使用 inline const T \*data() const 获取指针会不能通过类型检查, 通过 inline T \*mutable_data() 获取指针会重新分配内存, 以下是关于 Tensor 的一些小概念:
1. DDim: 用来存储矩阵的维度信息.
2. Slice(): 这个函数用来获取 N 维 (NCHW中的 N) 上切片
3. 当实例化未分配内存时, 调用 inline T *mutable_data() 会分配内存
# Android开发文档
用户可通过如下两种方式进行编译:
- 基于macOS 、Linux交叉编译
- 基于Docker容器编译
## 基于macOS 、Linux交叉编译
需要: NDK17及以上、cmake 3.0及以上
### 执行编译
在paddle-mobile根目录中,执行以下命令:
```shell
cd tools
sh build.sh android
# 如果想编译只支持某些特定网络的库 (可以控制包体积, 编译出来的库就只包含了支持这些特定模型的算子), 可以使用
sh build.sh android mobilenet googlenet
# 当然这些网络是需要在 cmakelist 中配置的(https://github.com/PaddlePaddle/paddle-mobile/blob/73769e7d05ef4820a115ad3fb9b1ca3f55179d03/CMakeLists.txt#L216), 目前配置了几个常见模型
```
执行完毕后,生成的`so`位于`build/release/`目录中:
- jni 头文件位于 [https://github.com/PaddlePaddle/paddle-mobile/tree/develop/src/io/jni](https://github.com/PaddlePaddle/paddle-mobile/tree/develop/src/io/jni)
- c++ 头文件位于 [https://github.com/PaddlePaddle/paddle-mobile/blob/develop/src/io/paddle_inference_api.h](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/src/io/paddle_inference_api.h)
单测可执行文件位于`test/build`目录中。
如果有环境问题, 可以看接下来的环节
### 环境配置
##### 下载Android NDK
如果你的电脑安装了Android Studio, 可以在 Android Studio 中直接下载安装`NDK`或者可以在 [https://developer.android.com/ndk/](https://developer.android.com/ndk/) 这里自行下载,也可以通过以下命令获取:
- Mac平台
```shell
wget https://dl.google.com/android/repository/android-ndk-r17b-darwin-x86_64.zip
unzip android-ndk-r17b-darwin-x86_64.zip
```
- Linux平台
```shell
wget https://dl.google.com/android/repository/android-ndk-r17b-linux-x86_64.zip
unzip android-ndk-r17b-linux-x86_64.zip
```
##### 设置环境变量
工程中自带的独立工具链会根据环境变量`NDK_ROOT`查找NDK,因此需要配置环境变量:
```shell
export NDK_ROOT = "path to ndk"
```
##### 安装 CMake
- Mac平台
mac 平台下可以使用`homebrew`安装
```shell
brew install cmake
```
- Linux平台
linux 下可以使用`apt-get`进行安装
```shell
apt-get install cmake
```
##### Tips:
如果想要获得体积更小的库,可选择编译支持指定模型结构的库。
如执行如下命令:
```shell
sh build.sh android googlenet
```
会得到一个支持googlnet的体积更小的库。
## 基于Docker容器编译
### 1. 安装 docker
安装 docker 的方式,参考官方文档 [https://docs.docker.com/install/](https://docs.docker.com/install/)
### 2. 使用 docker 搭建构建环境
首先进入 paddle-mobile 的目录下,执行 `docker build`
以 Linux/Mac 为例 (windows 建议在 'Docker Quickstart Terminal' 中执行)
```shell
$ docker build -t paddle-mobile:dev - < Dockerfile
```
使用 `docker images` 可以看到我们新建的 image
```shell
$ docker images
REPOSITORY TAG IMAGE ID CREATED SIZE
paddle-mobile dev 33b146787711 45 hours ago 372MB
```
### 3. 使用 docker 构建
进入 paddle-mobile 目录,执行 docker run
```shell
$ docker run -it --mount type=bind,source=$PWD,target=/paddle-mobile paddle-mobile:dev
root@5affd29d4fc5:/ # cd /paddle-mobile
# 生成构建 android 产出的 Makefile
root@5affd29d4fc5:/ # rm CMakeCache.txt
root@5affd29d4fc5:/ # cmake -DCMAKE_TOOLCHAIN_FILE=tools/toolchains/arm-android-neon.cmake
# 生成构建 linux 产出的 Makefile
root@5affd29d4fc5:/ # rm CMakeCache.txt
root@5affd29d4fc5:/ # cmake -DCMAKE_TOOLCHAIN_FILE=tools/toolchains/arm-linux-gnueabi.cmake
```
### 4. 设置编译选项
可以通过 ccmake 设置编译选项
```
root@5affd29d4fc5:/ # ccmake .
Page 1 of 1
CMAKE_ASM_FLAGS
CMAKE_ASM_FLAGS_DEBUG
CMAKE_ASM_FLAGS_RELEASE
CMAKE_BUILD_TYPE
CMAKE_INSTALL_PREFIX /usr/local
CMAKE_TOOLCHAIN_FILE /paddle-mobile/tools/toolchains/arm-android-neon.cmake
CPU ON
DEBUGING ON
FPGA OFF
LOG_PROFILE ON
MALI_GPU OFF
NET googlenet
USE_EXCEPTION ON
USE_OPENMP OFF
```
修改选项后,按 `c`, `g` 更新 Makefile
### 5. 构建
使用 make 命令进行构建
```
root@5affd29d4fc5:/ # make
```
### 6. 查看构建产出
构架产出可以在 host 机器上查看,在 paddle-mobile 的目录下,build 以及`test/build`下,可以使用`adb`指令或`scp`传输到`device`上执行
## 测试
在编译完成后,我们提供了自动化的测试脚本,帮助用户将运行单测文件所需要的模型及库文件push到Android设备
执行下面的脚本,该脚本会下载测试需要的 [mobilenet和test_image_1x3x224x224_float(预处理过的 NCHW 文件) 文件](http://mms-graph.bj.bcebos.com/paddle-mobile/opencl_test_src.zip),在项目下的`test`目录创建模型和图片文件夹,并将`mobilenet`复制到`paddle-mobile/test/models`目录下,将`test_image_1x3x224x224_float`复制到`paddle-mobile/test/images`目录下
```shell
cd tools
sh ./prepare_images_and_models.sh
```
* 执行下面命令将可执行文件和预测需要的文件部署到手机
```shell
cd tools/android-debug-script
sh push2android.sh
```
* mobilenet cpu模型预测结果
假设mobilenet和`test_image_1x3x224x224_float`文件已经推送到手机上,执行下面命令进行mobilenet cpu的预测
```shell
adb shell
cd /data/local/tmp/bin/
export LD_LIBRARY_PATH=.
./test-mobilenet
```
## paddle-mobile GPU开发文档
编译环境配置方法请参考`development_android.md`文档
1. 下载 paddle-mobile
```shell
git clone https://github.com/PaddlePaddle/paddle-mobile.git
adb pull /system/vendor/lib/libOpenCL.so paddle-mobile/third_party/opencl
# 修改paddle-mobile/CMakeLists.txt文件,执行如下操作:
# option(GPU_CL "opencl gpu" OFF)->option(GPU_CL "opencl gpu" ON)
cd paddle-mobile/tools
sh build.sh android
```
2. 将单测可执行文件和模型部署到手机
执行下面的脚本,该脚本会下载测试需要的 [mobilenet和test_image_1x3x224x224_float(预处理过的 NCHW 文件) 文件](http://mms-graph.bj.bcebos.com/paddle-mobile/opencl_test_src.zip),在项目下的`test`目录创建模型>和图片文件夹,并将`mobilenet`复制到`paddle-mobile/test/models`目录下,将`test_image_1x3x224x224_float`复制到`paddle-mobile/test/images`目录下
```shell
cd tools
sh ./prepare_images_and_models.sh
```
执行下面命令将可执行文件和预测需要的文件部署到手机
```shell
cd ../tools/android-debug-script
sh push2android.sh
```
3.`adb shell`中执行对应的可执行文件(目前只支持mobilenet,后续会支持更多的网络模型)
```shell
adb shell
cd /data/local/tmp/bin/
export LD_LIBRARY_PATH=.
./test-mobilenetgpu
```
4. mobilenet cpu模型预测结果
执行下面命令进行mobilenet cpu的预测
```shell
adb shell
cd /data/local/tmp/bin/
export LD_LIBRARY_PATH=.
./test-mobilenet
```
5. 预测结果
手机型号:小米6(CPU 835,GPU Adreno 540)
mobilenet gpu:预测性能,耗时41ms左右。
mobilenet cpu:
1线程:108ms
2线程:65ms
4线程:38ms
手机型号:OPPO Findx(CPU 845,GPU Adreno 630)
mobilenet gpu:预测性能,耗时27ms左右。
mobilenet cpu:
1线程:90ms
2线程:50ms
4线程:29ms
备注: GPU 在打开log之后, 会大幅增加性能开销,测试benchmark请关闭CmakeList中Log选项
# ARM Linux开发文档
在ARM Linux如Raspberrypi3,或Firefly-RK3399上编译paddle-mobile(**注:暂不支持ARM Linux GPU**)。
## 预先安装
```shell
$ sudo apt update
$ sudo apt-get install -y cmake git
$ git clone https://github.com/PaddlePaddle/paddle-mobile.git
```
## 编译
在paddle-mobile根目录中,执行以下命令:
```shell
# 进入paddle-mobile根目录
$ cd <your-paddle-mobile>
# 可选:开启GPU支持,在CMakeLists.txt开启GPU_CL选项为ON
$ cp /usr/lib/aarch64-linux-gnu/libMali.so ./third_party/opencl/
$ cp /usr/lib/aarch64-linux-gnu/libOpenCL.so ./third_party/opencl/
$ ln -s ./third_party/opencl/libMali.so ./third_party/opencl/
# 编译
$ cd ./tools
$ /bin/bash build.sh arm_linux
```
- 动态库`so`文件位于`<paddle-mobile-repo>/build/release/arm-linux/build`目录;
- 单元测试位于`<paddle-model-repo>/test/build`目录,若只编译如`googlenet`,可以执行`bash build.sh arm_linux googlenet`
## 运行
接着刚刚的命令,执行MobileNet模型:
```shell
# 导入编译好的动态库路径到LD_LIBRARY_PATH中
$ cd ../build/release/arm-linux/build
$ export LD_LIBRARY_PATH=.
# 执行MobileNet
# 可选:GPU执行./test-mobilenetgpu
$ cd ../../../../test/build/
$ ./test-mobilenet
# 执行顺利会打印如下日志
load cost :0ms
Max element is 0.985921 at position 954
predict cost :121.462ms
如果结果Nan请查看: test/images/g_test_image_1x3x224x224_banana 是否存在?
```
注意:
1. 如果本地仓库中`test`目录下没有模型,脚本会自动下载官方demo模型并解压;
2. 因为ARM Linux设备算力限制,编译卡死重启机器尝试单线程编译(修改`tools/build.sh``build_for_arm_linux`的编译为`make -j`),或指定编译某个模型(如googlenet)或扩大系统的swap交换空间。
## 其它
- 若编译中提示有不识别的编译选项等ARM Linux平台的编译问题,可尝试修改`tools/build.sh`中的相关编译参数;
- Android平台请参考Android开发文档.
# FPGA开发文档
FPGA平台的代码分为V1和V2。要复现V1运行的结果,需要准备专门的硬件、底层驱动程序、FPGA工程。这些都在之前的版本[1.1.1](https://github.com/PaddlePaddle/paddle-mobile/releases/tag/1.1.1) 中提供了链接。根据链接的使用说明,可以复现resnet50的推测结果。
后续PaddleMobile版本,不再提供相关的辅助文件。
# iOS开发文档
## CPU
需要: xcode
### 编译
```sh
# 在 paddle-mobile 目录下:
cd tools
sh build.sh ios
# 如果只想编译某个特定模型的 op, 则需执行以下命令
sh build.sh ios googlenet
# 在这个文件夹下, 你可以拿到生成的 .a 库
cd ../build/release/ios/build
```
#### 常见问题:
1. No iOS SDK's found in default search path ...
这个问题是因为 tools/ios-cmake/ios.toolchain.cmake 找不到你最近使用的 iOS SDK 路径, 所以需要自己进行指定,
以我当前的环境为例: 在 tools/ios-cmake/ios.toolchain.cmake 143行前添加我本地的 iOS SDK 路径: set(CMAKE_IOS_SDK_ROOT "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS.sdk")
### 集成
```
将上一步生成的:
libpaddle-mobile.a
/src/ios_io/ 下的
PaddleMobileCPU.h
```
拖入工程
#### oc 接口
接口如下:
```
/*
创建对象
*/
- (instancetype)init;
/*
load 模型, 开辟内存
*/
- (BOOL)load:(NSString *)modelPath andWeightsPath:(NSString *)weighsPath;
/*
进行预测, means 和 scale 为训练模型时的预处理参数, 如训练时没有做这些预处理则直接使用 predict
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim means:(NSArray<NSNumber *> *)means scale:(float)scale;
/*
进行预测
*/
- (NSArray *)predict:(CGImageRef)image dim:(NSArray<NSNumber *> *)dim;
/*
清理内存
*/
- (void)clear;
```
## GPU
需要: xcode、cocoapods
```
# 在 paddle-mobile 目录下:
cd metal
pod install
open paddle-mobile.xcworkspace
```
# Quantification 模型量化、反量化
## 背景故事
部分网络如AlexNet训练出的模型体积较大,不适宜在移动设备上使用。
## 解决模型过大办法
1. 选用适合移动端的模型结构如:mobilenet、googlenet、 yolo、squeezenet 等;
2. 使用我们提供的量化工具,可以在几乎不影响精度的情况下将float32模型减小至原模型的 1/4;
- - - - -
## 量化工具介绍
### 模型转化工具目录:
- [量化工具目录](https://github.com/PaddlePaddle/paddle-mobile/tree/develop/tools/quantification)
- [模型转化工具](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/convert.cpp)
#### 使用说明
- [工具使用](https://github.com/PaddlePaddle/paddle-mobile/blob/develop/tools/quantification/README.md)
## 如何读取量化后的模型
load方法中添加了 quantification 参数,默认为false。 如果需要load量化后的模型,按需传参即可。
[我是源代码](https://github.com/PaddlePaddle/paddle-mobile/blob/55302b33ea3bd68c9797d8f65e527544792b8095/src/io/paddle_mobile.h)
```c++
bool Load(const std::string &dirname, bool optimize = false,
bool quantification = false, int batch_size = 1);
```
- - - - -
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <chrono> // NOLINT
namespace paddle_mobile {
using Time = decltype(std::chrono::high_resolution_clock::now());
inline Time time() { return std::chrono::high_resolution_clock::now(); }
inline double time_diff(Time t1, Time t2) {
typedef std::chrono::microseconds ms;
auto diff = t2 - t1;
ms counter = std::chrono::duration_cast<ms>(diff);
return counter.count() / 1000.0;
}
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef ENABLE_EXCEPTION
#include <stdio.h>
#include <stdlib.h>
#include <exception>
#include <string>
#endif
namespace paddle_mobile {
#ifdef ENABLE_EXCEPTION
struct PaddleMobileException : public std::exception {
const std::string exception_prefix = "paddle mobile C++ Exception: \n";
std::string message;
PaddleMobileException(const char *header, const char *detail,
const char *file, const int line) {
char buffer[1500];
snprintf(buffer, sizeof(buffer),
"%s| %s \n| [in file] : %s\n| [on line] : %d\n| [detail] : %s\n",
exception_prefix.c_str(), header, file, line, detail);
message = std::string(buffer);
}
const char *what() const noexcept { return message.c_str(); }
};
#define PADDLE_MOBILE_THROW_EXCEPTION(...) \
{ \
char buffer[1000]; \
snprintf(buffer, sizeof(buffer), __VA_ARGS__); \
throw paddle_mobile::PaddleMobileException("Custom Exception", buffer, \
__FILE__, __LINE__); \
} \
exit(0);
#define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \
if (stat) { \
} else { \
char buffer[1000]; \
snprintf(buffer, sizeof(buffer), __VA_ARGS__); \
throw paddle_mobile::PaddleMobileException("paddle-mobile enforce", \
buffer, __FILE__, __LINE__); \
} \
}
#else
#define PADDLE_MOBILE_THROW_EXCEPTION(...)
#define PADDLE_MOBILE_ENFORCE(stat, ...) \
{ \
if (stat) { \
} else { \
} \
}
#endif
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#ifdef PADDLE_MOBILE_DEBUG
#include <cstring>
#include <iostream>
#include <sstream>
#include <string>
#endif
#ifdef ANDROID
#include <android/log.h>
#endif
namespace paddle_mobile {
#ifdef PADDLE_MOBILE_DEBUG
#ifdef ANDROID
static const char *ANDROID_LOG_TAG =
"paddle_mobile LOG built on " __DATE__ " " __TIME__;
#ifdef PADDLE_ENABLE_COLORABLE_LOG
#define PADDLE_RED "\033[1;31;40m"
#define PADDLE_GREEN "\033[1;32;40m"
#define PADDLE_YELLOW "\033[1;33;40m"
#define PADDLE_LIGHT_RED "\033[1;35;40m"
#define PADDLE_BLUE "\033[1;34;40m"
#define PADDLE_WHITE "\033[1;37;40m"
#define PADDLE_CONON "\033[0m"
#else
#define PADDLE_RED ""
#define PADDLE_GREEN ""
#define PADDLE_YELLOW ""
#define PADDLE_LIGHT_RED ""
#define PADDLE_BLUE ""
#define PADDLE_WHITE ""
#define PADDLE_CONON ""
#endif
#define ANDROIDLOGI(...) \
__android_log_print(ANDROID_LOG_INFO, ANDROID_LOG_TAG, __VA_ARGS__); \
fprintf(stderr, PADDLE_YELLOW "%s\n" PADDLE_CONON, __VA_ARGS__); \
fflush(stderr)
#define ANDROIDLOGW(...) \
__android_log_print(ANDROID_LOG_WARN, ANDROID_LOG_TAG, __VA_ARGS__); \
fprintf(stderr, PADDLE_LIGHT_RED "%s\n" PADDLE_CONON, __VA_ARGS__); \
fflush(stderr)
#define ANDROIDLOGD(...) \
__android_log_print(ANDROID_LOG_DEBUG, ANDROID_LOG_TAG, __VA_ARGS__); \
fprintf(stderr, PADDLE_WHITE "%s\n" PADDLE_CONON, __VA_ARGS__); \
fflush(stderr)
#define ANDROIDLOGE(...) \
__android_log_print(ANDROID_LOG_ERROR, ANDROID_LOG_TAG, __VA_ARGS__); \
fprintf(stderr, PADDLE_RED "%s\n" PADDLE_CONON, __VA_ARGS__); \
fflush(stderr)
#define ANDROIDLOGV(...) \
__android_log_print(ANDROID_LOG_VERBOSE, ANDROID_LOG_TAG, __VA_ARGS__); \
fprintf(stderr, PADDLE_GREEN "%s\n" PADDLE_CONON, __VA_ARGS__); \
fflush(stderr)
#else
#define ANDROIDLOGI(...)
#define ANDROIDLOGW(...)
#define ANDROIDLOGD(...)
#define ANDROIDLOGE(...)
#define ANDROIDLOGV(...)
#endif
enum LogLevel {
kLOG_ERROR,
kLOG_WARNING,
kLOG_INFO,
kLOG_VERBOSE,
kLOG_DEBUG,
kLOG_DEBUG1,
kLOG_DEBUG2,
kLOG_DEBUG3,
kLOG_DEBUG4,
kNO_LOG,
};
// log level
static LogLevel log_level = kLOG_DEBUG4;
static std::vector<std::string> logs{"ERROR ", "WARNING", "INFO ", "VERBOSE",
"DEBUG ", "DEBUG1 ", "DEBUG2 ", "DEBUG3 ",
"DEBUG4 ", "NO "};
struct ToLog;
struct Print;
struct Print {
friend struct ToLog;
template <typename T>
Print &operator<<(T const &value) {
buffer_ << value;
return *this;
}
private:
void print(LogLevel level) {
// buffer_ << std::endl;
if (level == kLOG_ERROR) {
#ifdef ANDROID
ANDROIDLOGE(buffer_.str().c_str());
#else
std::cerr << buffer_.str() << std::endl;
#endif
} else if (level == kLOG_INFO) {
#ifdef ANDROID
ANDROIDLOGI(buffer_.str().c_str());
#else
std::cerr << buffer_.str() << std::endl;
#endif
} else if (level == kLOG_VERBOSE) {
#ifdef ANDROID
ANDROIDLOGV(buffer_.str().c_str());
#else
std::cerr << buffer_.str() << std::endl;
#endif
} else if (level == kLOG_WARNING) {
#ifdef ANDROID
ANDROIDLOGW(buffer_.str().c_str());
#else
std::cerr << buffer_.str() << std::endl;
#endif
} else {
#ifdef ANDROID
ANDROIDLOGD(buffer_.str().c_str());
#else
std::cout << buffer_.str() << std::endl;
#endif
}
}
std::ostringstream buffer_;
};
struct ToLog {
explicit ToLog(LogLevel level = kLOG_DEBUG, const std::string &info = "")
: level_(level) {
unsigned blanks =
(unsigned)(level > kLOG_DEBUG ? (level - kLOG_DEBUG) * 4 : 1);
printer_ << logs[level] << " " << info << ":" << std::string(blanks, ' ');
}
template <typename T>
ToLog &operator<<(T const &value) {
printer_ << value;
return *this;
}
~ToLog() { printer_.print(level_); }
private:
LogLevel level_;
Print printer_;
};
#define LOG(level) \
if (level > paddle_mobile::log_level) { \
/* NOLINTNEXTLINE */ \
} else \
paddle_mobile::ToLog( \
level, static_cast<const std::stringstream &>( \
std::stringstream() \
<< "[file: " \
<< (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) \
: __FILE__) \
<< "] [line: " << __LINE__ << "] ") \
.str())
#define DLOG \
if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) { \
/* NOLINTNEXTLINE */ \
} else \
paddle_mobile::ToLog( \
paddle_mobile::kLOG_DEBUG, \
static_cast<const std::stringstream &>( \
std::stringstream() \
<< "[file: " \
<< (strrchr(__FILE__, '/') ? (strrchr(__FILE__, '/') + 1) \
: __FILE__) \
<< "] [line: " << __LINE__ << "] ") \
.str())
#define LOGF(level, format, ...) \
if (level > paddle_mobile::log_level) { \
/* NOLINTNEXTLINE */ \
} else \
printf(format, ##__VA_ARGS__)
#define DLOGF(format, ...) \
if (paddle_mobile::kLOG_DEBUG > paddle_mobile::log_level) { \
/* NOLINTNEXTLINE */ \
} else \
printf(format, ##__VA_ARGS__)
#else
#define ANDROIDLOGI(...)
#define ANDROIDLOGW(...)
#define ANDROIDLOGD(...)
#define ANDROIDLOGE(...)
#define ANDROIDLOGV(...)
enum LogLevel {
kLOG_ERROR,
kLOG_WARNING,
kLOG_INFO,
kLOG_VERBOSE,
kLOG_DEBUG,
kLOG_DEBUG1,
kLOG_DEBUG2,
kLOG_DEBUG3,
kLOG_DEBUG4,
kNO_LOG
};
struct ToLog;
struct Print {
friend struct ToLog;
template <typename T>
Print &operator<<(T const &value) {
return *this;
}
};
struct ToLog {
explicit ToLog(LogLevel level) {}
template <typename T>
ToLog &operator<<(T const &value) {
return *this;
}
};
#define LOG(level) \
if (true) { \
/* NOLINTNEXTLINE */ \
} else \
paddle_mobile::ToLog(level)
#define DLOG \
if (true) { \
/* NOLINTNEXTLINE */ \
} else \
paddle_mobile::ToLog(paddle_mobile::kLOG_DEBUG)
#define LOGF(level, format, ...)
#define DLOGF(format, ...)
#endif
template <typename T>
Print &operator<<(Print &printer, const std::vector<T> &v) {
printer << "[ ";
for (int i = 0; i < v.size(); ++i) {
const auto &value = v[i];
printer << value << " ";
if (i % 10 == 9) {
printer << "\n";
}
}
printer << " ]";
return printer;
}
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <condition_variable>
#include <functional>
#include <future>
#include <memory>
#include <mutex>
#include <queue>
#include <stdexcept>
#include <thread>
#include <vector>
namespace paddle_mobile {
class ThreadPool {
public:
static ThreadPool& getThreadPool();
static int getThreadPoolThreadId();
explicit ThreadPool(size_t);
template <class F, class... Args>
auto enqueue(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type>;
~ThreadPool();
int getTid(const std::thread::id& id) {
for (int i = 0; i < workers.size(); i++) {
if (workers[i].get_id() == id) {
return i;
}
}
return -1;
}
private:
// need to keep track of threads so we can join them
std::vector<std::thread> workers;
// the task queue
std::queue<std::function<void()>> tasks;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
// the constructor just launches some amount of workers
inline ThreadPool::ThreadPool(size_t threads) : stop(false) {
for (size_t i = 0; i < threads; ++i)
workers.emplace_back([this] {
for (;;) {
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(
lock, [this] { return this->stop || !this->tasks.empty(); });
// for (;;) {
// if (this->stop || !this->tasks.empty()) {
// break;
// }
// lock.unlock();
// lock.lock();
// }
if (this->stop && this->tasks.empty()) return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
});
}
// add new work item to the pool
template <class F, class... Args>
auto ThreadPool::enqueue(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type> {
using return_type = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared<std::packaged_task<return_type()>>(
std::bind(std::forward<F>(f), std::forward<Args>(args)...));
std::future<return_type> res = task->get_future();
{
std::unique_lock<std::mutex> lock(queue_mutex);
// don't allow enqueueing after stopping the pool
// if(stop)
// throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace([task]() { (*task)(); });
}
condition.notify_one();
return res;
}
// the destructor joins all threads
inline ThreadPool::~ThreadPool() {
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for (std::thread& worker : workers) worker.join();
}
ThreadPool& ThreadPool::getThreadPool() {
static ThreadPool threadPool(3);
return threadPool;
}
int ThreadPool::getThreadPoolThreadId() {
return getThreadPool().getTid(std::this_thread::get_id());
}
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <functional>
#include <string>
#include <vector>
namespace paddle_mobile {
typedef enum {
_void = 0,
_float,
_int,
_uint16_t,
_double,
_int64_t,
_size_t,
_int16_t,
_int8_t,
_uint8_t,
_bool,
_string,
_floats = 100,
_ints,
_int64_ts,
_size_ts,
_bools,
_strings,
_const_float = 200,
_const_int,
_block = 300,
_tensor,
_lod_tensor,
_blocks,
_tensors,
_lod_tensors,
_p_block = 400,
_p_tensor,
_p_lod_tensor,
_p_blocks,
_p_tensors,
_p_lod_tensors,
_scopes = 500,
_selected_rows,
_dim0 = 600,
_dim1,
_dim2,
_dim3,
_dim4,
_dim5,
_dim6,
_dim7,
_dim8,
_dim9,
#ifdef PADDLE_MOBILE_CL
_cl_image,
#endif
} kTypeId_t;
template <typename T>
struct TypeIdWrapper {
inline std::string name();
inline kTypeId_t hash_code();
};
template <typename T>
struct type_id {
const kTypeId_t hash_code() const { return TypeIdWrapper<T>().hash_code(); }
const std::string name() const { return TypeIdWrapper<T>().name(); }
template <typename OtherType>
bool operator==(const type_id<OtherType> &operand) const {
return this->hash_code() == operand.hash_code();
}
};
#define OVERIDE_TYPEID_OPERATOR(oprand) \
template <typename T> \
inline bool operator oprand(const kTypeId_t &t0, const type_id<T> &t1) { \
return t0 oprand t1.hash_code(); \
} \
template <typename T> \
inline bool operator oprand(const type_id<T> &t0, const kTypeId_t &t1) { \
return t1 oprand t0.hash_code(); \
}
OVERIDE_TYPEID_OPERATOR(==)
OVERIDE_TYPEID_OPERATOR(!=)
namespace framework {
class BlockDesc;
class Tensor;
class LoDTensor;
class SelectedRows;
class Scope;
#ifdef PADDLE_MOBILE_CL
class CLImage;
#endif
template <int>
struct Dim;
} // namespace framework
#define REGISTER_TYPE_ID(Type, TypeName) \
template <> \
struct TypeIdWrapper<Type> { \
inline std::string name() { return std::string(#TypeName); } \
inline kTypeId_t hash_code() { return kTypeId_t::TypeName; } \
};
REGISTER_TYPE_ID(void, _void)
REGISTER_TYPE_ID(float, _float)
REGISTER_TYPE_ID(int, _int)
REGISTER_TYPE_ID(uint16_t, _uint16_t)
REGISTER_TYPE_ID(double, _double)
REGISTER_TYPE_ID(int64_t, _int64_t)
REGISTER_TYPE_ID(size_t, _size_t)
REGISTER_TYPE_ID(int16_t, _int16_t)
REGISTER_TYPE_ID(int8_t, _int8_t)
REGISTER_TYPE_ID(uint8_t, _uint8_t)
REGISTER_TYPE_ID(bool, _bool)
REGISTER_TYPE_ID(std::string, _string)
REGISTER_TYPE_ID(std::vector<float>, _floats)
REGISTER_TYPE_ID(std::vector<int>, _ints)
REGISTER_TYPE_ID(std::vector<int64_t>, _int64_ts)
REGISTER_TYPE_ID(std::vector<size_t>, _size_ts)
REGISTER_TYPE_ID(std::vector<bool>, _bools)
REGISTER_TYPE_ID(std::vector<std::string>, _strings)
REGISTER_TYPE_ID(float const, _const_float)
REGISTER_TYPE_ID(int const, _const_int)
REGISTER_TYPE_ID(framework::BlockDesc, _block)
REGISTER_TYPE_ID(framework::Tensor, _tensor)
REGISTER_TYPE_ID(framework::LoDTensor, _lod_tensor)
REGISTER_TYPE_ID(std::vector<framework::BlockDesc>, _blocks)
REGISTER_TYPE_ID(std::vector<framework::Tensor>, _tensors)
REGISTER_TYPE_ID(std::vector<framework::LoDTensor>, _lod_tensors)
REGISTER_TYPE_ID(framework::BlockDesc *, _p_block)
REGISTER_TYPE_ID(framework::Tensor *, _p_tensor)
REGISTER_TYPE_ID(framework::LoDTensor *, _p_lod_tensor)
REGISTER_TYPE_ID(std::vector<framework::BlockDesc *>, _p_blocks)
REGISTER_TYPE_ID(std::vector<framework::Tensor *>, _p_tensors)
REGISTER_TYPE_ID(std::vector<framework::LoDTensor *>, _p_lod_tensors)
REGISTER_TYPE_ID(std::vector<framework::Scope *>, _scopes);
REGISTER_TYPE_ID(framework::SelectedRows, _selected_rows)
REGISTER_TYPE_ID(framework::Dim<0>, _dim0)
REGISTER_TYPE_ID(framework::Dim<1>, _dim1)
REGISTER_TYPE_ID(framework::Dim<2>, _dim2)
REGISTER_TYPE_ID(framework::Dim<3>, _dim3)
REGISTER_TYPE_ID(framework::Dim<4>, _dim4)
REGISTER_TYPE_ID(framework::Dim<5>, _dim5)
REGISTER_TYPE_ID(framework::Dim<6>, _dim6)
REGISTER_TYPE_ID(framework::Dim<7>, _dim7)
REGISTER_TYPE_ID(framework::Dim<8>, _dim8)
REGISTER_TYPE_ID(framework::Dim<9>, _dim9)
#ifdef PADDLE_MOBILE_CL
REGISTER_TYPE_ID(framework::CLImage, _cl_image)
#endif
} // namespace paddle_mobile
namespace std {
template <>
struct hash<paddle_mobile::kTypeId_t> {
size_t operator()(const paddle_mobile::kTypeId_t &t) const {
return std::hash<int>{}(static_cast<int>(t));
}
};
} // namespace std
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/types.h"
#include <vector>
namespace paddle_mobile {
const char *G_OP_TYPE_CONV = "conv2d";
const char *G_OP_TYPE_BATCHNORM = "batch_norm";
const char *G_OP_TYPE_INSTANCENORM = "instance_norm";
const char *G_OP_TYPE_BOX_CODER = "box_coder";
const char *G_OP_TYPE_CONCAT = "concat";
const char *G_OP_TYPE_ELEMENTWISE_ADD = "elementwise_add";
const char *G_OP_TYPE_ELEMENTWISE_SUB = "elementwise_sub";
const char *G_OP_TYPE_ELEMENTWISE_MUL = "elementwise_mul";
const char *G_OP_TYPE_FILL_CONSTANT = "fill_constant";
const char *G_OP_TYPE_FUSION_CONV_ADD_RELU = "fusion_conv_add_relu";
const char *G_OP_TYPE_FUSION_CONV_ADD_PRELU = "fusion_conv_add_prelu";
const char *G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU = "fusion_conv_add_add_prelu";
const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU = "fusion_conv_add_bn_relu";
const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU = "fusion_conv_bn_add_relu";
const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU = "fusion_dwconv_bn_relu";
const char *G_OP_TYPE_FUSION_CONV_RELU = "fusion_conv_relu";
const char *G_OP_TYPE_FUSION_CONV_BN_RELU = "fusion_conv_bn_relu";
const char *G_OP_TYPE_FC = "fusion_fc";
const char *G_OP_TYPE_FUSION_CONV_ADD = "fusion_conv_add";
const char *G_OP_TYPE_LRN = "lrn";
const char *G_OP_TYPE_MUL = "mul";
const char *G_OP_TYPE_MULTICLASS_NMS = "multiclass_nms";
const char *G_OP_TYPE_NORM = "norm";
const char *G_OP_TYPE_POLYGON_BOX_TRANSFORM = "polygon_box_transform";
const char *G_OP_TYPE_POOL2D = "pool2d";
const char *G_OP_TYPE_PRIOR_BOX = "prior_box";
const char *G_OP_TYPE_DENSITY_PRIOR_BOX = "density_prior_box";
const char *G_OP_TYPE_RELU = "relu";
const char *G_OP_TYPE_RELU6 = "relu6";
const char *G_OP_TYPE_LEAKY_RELU = "leaky_relu";
const char *G_OP_TYPE_RESHAPE = "reshape";
const char *G_OP_TYPE_RESHAPE2 = "reshape2";
const char *G_OP_TYPE_SCALE = "scale";
const char *G_OP_TYPE_SIGMOID = "sigmoid";
const char *G_OP_TYPE_SOFTMAX = "softmax";
const char *G_OP_TYPE_TRANSPOSE = "transpose";
const char *G_OP_TYPE_TRANSPOSE2 = "transpose2";
const char *G_OP_TYPE_SPLIT = "split";
const char *G_OP_TYPE_FEED = "feed";
const char *G_OP_TYPE_FETCH = "fetch";
const char *G_OP_TYPE_DEPTHWISE_CONV = "depthwise_conv2d";
const char *G_OP_TYPE_IM2SEQUENCE = "im2sequence";
const char *G_OP_TYPE_DROPOUT = "dropout";
const char *G_OP_TYPE_FUSION_CONV_ADD_BN = "fusion_conv_add_bn";
const char *G_OP_TYPE_FUSION_POOL_BN = "fusion_pool_bn";
const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU =
"fusion_elementwise_add_relu";
const char *G_OP_TYPE_FUSION_FC_RELU = "fusion_fc_relu";
const char *G_OP_TYPE_REGION = "region";
const char *G_OP_TYPE_FUSION_CONV_BN = "fusion_conv_bn";
const char *G_OP_TYPE_CONV_TRANSPOSE = "conv2d_transpose";
const char *G_OP_TYPE_PRELU = "prelu";
const char *G_OP_TYPE_LOOKUP_TABLE = "lookup_table";
const char *G_OP_TYPE_GRU = "gru";
const char *G_OP_TYPE_GRU_UNIT = "gru_unit";
const char *G_OP_TYPE_CRF = "crf_decoding";
const char *G_OP_TYPE_BILINEAR_INTERP = "bilinear_interp";
const char *G_OP_TYPE_NEAREST_INTERP = "nearest_interp";
const char *G_OP_TYPE_FLATTEN = "flatten";
const char *G_OP_TYPE_FLATTEN2 = "flatten2";
const char *G_OP_TYPE_SHAPE = "shape";
const char *G_OP_TYPE_SUM = "sum";
const char *G_OP_TYPE_TOP_K = "top_k";
const char *G_OP_TYPE_CAST = "cast";
const char *G_OP_TYPE_LOG = "log";
const char *G_OP_TYPE_LOD_RESET = "lod_reset";
const char *G_OP_TYPE_LESS_THAN = "less_than";
const char *G_OP_TYPE_LOGICAL_AND = "logical_and";
const char *G_OP_TYPE_LOGICAL_OR = "logical_or";
const char *G_OP_TYPE_LOGICAL_NOT = "logical_not";
const char *G_OP_TYPE_LOGICAL_XOR = "logical_xor";
const char *G_OP_TYPE_WRITE_TO_ARRAY = "write_to_array";
const char *G_OP_TYPE_READ_FROM_ARRAY = "read_from_array";
const char *G_OP_TYPE_IS_EMPTY = "is_empty";
const char *G_OP_TYPE_INCREMENT = "increment";
const char *G_OP_TYPE_EXP = "exp";
const char *G_OP_TYPE_QUANTIZE = "quantize";
const char *G_OP_TYPE_DEQUANTIZE = "dequantize";
const char *G_OP_TYPE_FUSION_DEQUANT_BN = "fusion_dequant_bn";
const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN = "fusion_dequant_add_bn";
const char *G_OP_TYPE_FUSION_DEQUANT_BN_RELU = "fusion_dequant_bn_relu";
const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU = "fusion_dequant_add_bn_relu";
const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN_QUANT =
"fusion_dequant_add_bn_quant";
const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU_QUANT =
"fusion_dequant_add_bn_relu_quant";
const char *G_OP_TYPE_TANH = "tanh";
const char *G_OP_TYPE_FUSION_DECONV_RELU = "fusion_deconv_relu";
const char *G_OP_TYPE_FUSION_DECONV_ADD = "fusion_deconv_add";
const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU = "fusion_deconv_add_relu";
const char *G_OP_TYPE_SEQUENCE_EXPAND = "sequence_expand";
const char *G_OP_TYPE_SEQUENCE_POOL = "sequence_pool";
const char *G_OP_TYPE_SEQUENCE_SOFTMAX = "sequence_softmax";
const char *G_OP_TYPE_SLICE = "slice";
const char *G_OP_TYPE_ANCHOR_GENERATOR = "anchor_generator";
const char *G_OP_TYPE_GENERATE_PROPOSALS = "generate_proposals";
const char *G_OP_TYPE_PSROI_POOL = "psroi_pool";
const char *G_OP_TYPE_ROIALIGN_POOL = "roialign_pool";
const char *G_OP_TYPE_ROI_PERSPECTIVE = "roi_perspective_transform";
const char *G_OP_TYPE_PAD2D = "pad2d";
const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU = "fusion_deconv_add_bn_relu";
const char *G_OP_TYPE_FUSION_DECONV_ADD_BN = "fusion_deconv_add_bn";
const char *G_OP_TYPE_FUSION_DECONV_BN_RELU = "fusion_deconv_bn_relu";
const char *G_OP_TYPE_ASSIGN = "assign";
const char *G_OP_TYPE_REDUCE_PROD = "reduce_prod";
const char *G_OP_TYPE_EQUAL = "equal";
const char *G_OP_TYPE_CONDITIONAL_BLOCK = "conditional_block";
const char *G_OP_TYPE_RANGE = "range";
const char *G_OP_TYPE_WHILE = "while";
const char *G_OP_TYPE_BEAM_SEARCH_DECODE = "beam_search_decode";
const char *G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE =
"fill_constant_batch_size_like";
const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU = "fusion_instancenorm_relu";
const char *G_OP_TYPE_PIXEL_SHUFFLE = "pixel_shuffle";
const char *G_OP_TYPE_EXPAND = "expand";
const char *G_OP_TYPE_GRID_SAMPLER = "grid_sampler";
std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key = {
{G_OP_TYPE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FUSION_DWCONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PRELU, {{"X", "Alpha"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD, {{"Input"}, {"Out"}}},
{G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_RELU6, {{"X"}, {"Out"}}},
{G_OP_TYPE_LEAKY_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_SCALE, {{"X"}, {"Out"}}},
{G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_SIGMOID, {{"X"}, {"Out"}}},
{G_OP_TYPE_MUL, {{"X"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_ADD, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_SUB, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_ELEMENTWISE_MUL, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_POOL2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_BATCHNORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_INSTANCENORM, {{"X"}, {"Y"}}},
{G_OP_TYPE_FUSION_INSTANCENORM_RELU, {{"X"}, {"Out"}}},
{G_OP_TYPE_LRN, {{"X"}, {"Out"}}},
{G_OP_TYPE_CONCAT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SPLIT, {{"X"}, {"Out"}}},
{G_OP_TYPE_FEED, {{"X"}, {"Out"}}},
{G_OP_TYPE_FETCH, {{"X"}, {"Out"}}},
{G_OP_TYPE_TRANSPOSE, {{"X"}, {"Out"}}},
{G_OP_TYPE_TRANSPOSE2, {{"X"}, {"Out", "XShape"}}},
{G_OP_TYPE_BOX_CODER,
{{"PriorBox", "PriorBoxVar", "TargetBox"}, {"OutputBox"}}},
{G_OP_TYPE_FUSION_CONV_ADD_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN_ADD_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PRIOR_BOX, {{"Image", "Input"}, {"Boxes", "Variances"}}},
{G_OP_TYPE_DENSITY_PRIOR_BOX,
{{"Image", "Input"}, {"Boxes", "Variances"}}},
{G_OP_TYPE_MULTICLASS_NMS, {{"BBoxes", "Scores"}, {"Out"}}},
{G_OP_TYPE_POLYGON_BOX_TRANSFORM, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FC, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_RESHAPE, {{"X"}, {"Out"}}},
{G_OP_TYPE_RESHAPE2, {{"X"}, {"Out", "XShape"}}},
{G_OP_TYPE_DEPTHWISE_CONV, {{"Input"}, {"Output"}}},
{G_OP_TYPE_FILL_CONSTANT, {{}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_PRELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_IM2SEQUENCE, {{"X"}, {"Out"}}},
{G_OP_TYPE_DROPOUT, {{"X"}, {"Out"}}},
{G_OP_TYPE_EXP, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_ADD_BN, {{"Input"}, {"Y"}}},
{G_OP_TYPE_FUSION_POOL_BN, {{"X"}, {"Y"}}},
{G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_FUSION_FC_RELU, {{"X", "Y", "Z"}, {"Out"}}},
{G_OP_TYPE_REGION, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_CONV_BN, {{"Input"}, {"Y"}}},
{G_OP_TYPE_LOOKUP_TABLE, {{"W", "Ids"}, {"Out"}}},
{G_OP_TYPE_GRU,
{{"Input", "H0", "Weight", "Bias"},
{"BatchGate", "BatchResetHiddenPrev", "BatchHidden", "Hidden"}}},
{G_OP_TYPE_GRU_UNIT,
{{"Input", "HiddenPrev", "Weight", "Bias"},
{"Gate", "ResetHiddenPrev", "Hidden"}}},
{G_OP_TYPE_CRF, {{"Emission", "Transition", "Label"}, {"ViterbiPath"}}},
{G_OP_TYPE_BILINEAR_INTERP, {{"OutSize", "X"}, {"Out"}}},
{G_OP_TYPE_NEAREST_INTERP, {{"OutSize", "X"}, {"Out"}}},
{G_OP_TYPE_FLATTEN, {{"X"}, {"Out"}}},
{G_OP_TYPE_FLATTEN2, {{"X"}, {"Out"}}},
{G_OP_TYPE_SHAPE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_CONV_TRANSPOSE, {{"Input"}, {"Output"}}},
{G_OP_TYPE_SUM, {{"X"}, {"Out"}}},
{G_OP_TYPE_TOP_K, {{"X"}, {"Out", "Indices"}}},
{G_OP_TYPE_CAST, {{"X"}, {"Out"}}},
{G_OP_TYPE_QUANTIZE, {{"X"}, {"Out", "OutScale"}}},
{G_OP_TYPE_DEQUANTIZE, {{"X", "Scale"}, {"Out"}}},
{G_OP_TYPE_FUSION_DEQUANT_BN, {{"X", "Scale"}, {"Out"}}},
{G_OP_TYPE_FUSION_DEQUANT_ADD_BN, {{"X", "Scale"}, {"Out"}}},
{G_OP_TYPE_FUSION_DEQUANT_BN_RELU, {{"X", "Scale"}, {"Out"}}},
{G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU, {{"X", "Scale"}, {"Out"}}},
{G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU_QUANT,
{{"X", "Scale"}, {"Out", "OutScale"}}},
{G_OP_TYPE_FUSION_DEQUANT_ADD_BN_QUANT,
{{"X", "Scale"}, {"Out", "OutScale"}}},
{G_OP_TYPE_TANH, {{"X"}, {"Out"}}},
{G_OP_TYPE_FUSION_DECONV_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_DECONV_ADD, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_DECONV_ADD_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_SEQUENCE_EXPAND, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_SEQUENCE_POOL, {{"X"}, {"Out"}}},
{G_OP_TYPE_SEQUENCE_SOFTMAX, {{"X"}, {"Out"}}},
{G_OP_TYPE_NORM, {{"X"}, {"Out", "Norm"}}},
{G_OP_TYPE_LOG, {{"X"}, {"Out"}}},
{G_OP_TYPE_LOD_RESET, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LESS_THAN, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_AND, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_OR, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_XOR, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_LOGICAL_NOT, {{"X"}, {"Out"}}},
{G_OP_TYPE_WRITE_TO_ARRAY, {{"X", "I"}, {"Out"}}},
{G_OP_TYPE_READ_FROM_ARRAY, {{"X", "I"}, {"Out"}}},
{G_OP_TYPE_IS_EMPTY, {{"X"}, {"Out"}}},
{G_OP_TYPE_INCREMENT, {{"X"}, {"Out"}}},
{G_OP_TYPE_SLICE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_ANCHOR_GENERATOR, {{"Input"}, {"Anchors", "Variances"}}},
{G_OP_TYPE_GENERATE_PROPOSALS,
{{"Scores", "BboxDeltas", "ImInfo", "Anchors", "Variances"},
{"RpnRois", "RpnRoiProbs"}}},
{G_OP_TYPE_PSROI_POOL, {{"X", "ROIs"}, {"Out"}}},
{G_OP_TYPE_ROIALIGN_POOL, {{"X", "ROIs"}, {"Out"}}},
{G_OP_TYPE_ROI_PERSPECTIVE, {{"X", "ROIs"}, {"Out"}}},
{G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_DECONV_ADD_BN, {{"Input"}, {"Out"}}},
{G_OP_TYPE_FUSION_DECONV_BN_RELU, {{"Input"}, {"Out"}}},
{G_OP_TYPE_REDUCE_PROD, {{"X"}, {"Out"}}},
{G_OP_TYPE_ASSIGN, {{"X"}, {"Out"}}},
{G_OP_TYPE_EQUAL, {{"X", "Y"}, {"Out"}}},
{G_OP_TYPE_RANGE, {{"Start", "End", "Step"}, {"Out"}}},
{G_OP_TYPE_CONDITIONAL_BLOCK, {{"Input", "Cond"}, {"Out", "Scope"}}},
{G_OP_TYPE_WHILE, {{"Condition", "X"}, {"Out", "StepScopes"}}},
{G_OP_TYPE_BEAM_SEARCH_DECODE,
{{"Ids", "Scores"}, {"SentenceIds", "SentenceScores"}}},
{G_OP_TYPE_FILL_CONSTAN_BATCH_SIZE_LIKE, {{"Input"}, {"Out"}}},
{G_OP_TYPE_PAD2D, {{"X"}, {"Out"}}},
{G_OP_TYPE_PIXEL_SHUFFLE, {{"X"}, {"Out"}}},
{G_OP_TYPE_EXPAND, {{"X"}, {"Out"}}},
{G_OP_TYPE_GRID_SAMPLER, {{"X", "Grid"}, {"Output"}}}};
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <map>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
namespace paddle_mobile {
enum class Precision : int { FP32 = 0, FP16 = 1 };
typedef int16_t half;
template <Precision p>
struct PrecisionTrait {
typedef void ptype;
};
template <>
struct PrecisionTrait<Precision::FP32> {
typedef float ptype;
};
template <>
struct PrecisionTrait<Precision::FP16> {
typedef half ptype;
};
//! device type
enum DeviceTypeEnum {
kINVALID = -1,
kCPU = 0,
kFPGA = 1,
kGPU_MALI = 2,
kGPU_CL = 3
};
template <DeviceTypeEnum T>
struct DeviceType {};
typedef DeviceType<kCPU> CPU;
typedef DeviceType<kFPGA> FPGA;
typedef DeviceType<kGPU_CL> GPU_CL;
//! data type
enum DataType {
PM_INVALID = -1,
PM_HALF = 0,
PM_FLOAT = 1,
PM_DOUBLE = 2,
PM_INT8 = 3,
PM_INT16 = 4,
PM_INT32 = 5,
PM_INT64 = 6,
PM_UINT8 = 7,
PM_UINT16 = 8,
PM_UINT32 = 9,
PM_STRING = 10,
PM_BOOL = 11,
PM_SHAPE = 12,
PM_TENSOR = 13
};
//!
enum PMStatus {
PMSuccess = 0xFF, /*!< No errors */
PMNotInitialized = 0x01, /*!< Data not initialized. */
PMInvalidValue = 0x02, /*!< Incorrect variable value. */
PMMemAllocFailed = 0x03, /*!< Memory allocation error. */
PMUnKownError = 0x04, /*!< Unknown error. */
PMOutOfAuthority = 0x05, /*!< Try to modified data not your own*/
PMOutOfMem = 0x06, /*!< OOM error*/
PMUnImplError = 0x07, /*!< Unimplement error. */
PMWrongDevice = 0x08, /*!< un-correct device. */
PMException = 0x09 /*!< throw exception. */
};
enum PrePostType {
NONE_PRE_POST = 0,
UINT8_255 = 1,
};
enum RoundType {
ROUND_NEAREST_AWAY_ZERO = 0,
ROUND_NEAREST_TOWARDS_ZERO = 1,
ROUND_NEAREST_TO_EVEN = 2,
};
enum ActivationType {
IDENTITY = 0,
RELU = 1,
RELU6 = 2,
PRELU = 3,
LEAKY_RELU = 4,
TANH = 5,
SIGMOID = 6,
LOG = 7,
};
enum PoolingType {
MAX = 0,
AVG = 1,
SUM = 2,
FIRST = 3,
LAST = 4,
};
enum PowerMode {
PERFORMANCE_PRIORITY = 0, // let threads run on big cores if
// thread_num <= big_cores_num,
// otherwise the power mode will be
// set to AUTO and all threads are
// scheduled by system
EFFICIENCY_PRIORITY = 1, // let threads run on little cores if
// thread_num <= little_cores_num,
// otherwise the power mode will be
// set to AUTO and all threads are
// scheduled by system
PERFORMANCE_ONLY = 2, // force threads run on big cores,
// and the remains are ignored if
// exceed the number big cores
EFFICIENCY_ONLY = 3, // force threads run on little cores,
// and the remains are ignored if
// exceed the number of little cores
AUTO = 4, // scheduled by system
};
enum MemoryOptimizationLevel {
NoMemoryOptimization = 0,
MemoryOptimizationWithoutFeeds = 1,
FullMemoryOptimization = 2,
};
struct PaddleMobileConfigInternal {
bool load_when_predict = false;
MemoryOptimizationLevel memory_optimization_level =
MemoryOptimizationWithoutFeeds;
std::string model_obfuscate_key = "";
PrePostType pre_post_type = NONE_PRE_POST;
};
enum ARMArch {
APPLE = 0,
A53 = 53,
A55 = 55,
A57 = 57,
A72 = 72,
A73 = 73,
A75 = 75,
A76 = 76,
ARM_UNKOWN = -1
};
extern const char *G_OP_TYPE_CONV;
extern const char *G_OP_TYPE_BATCHNORM;
extern const char *G_OP_TYPE_INSTANCENORM;
extern const char *G_OP_TYPE_BOX_CODER;
extern const char *G_OP_TYPE_CONCAT;
extern const char *G_OP_TYPE_ELEMENTWISE_ADD;
extern const char *G_OP_TYPE_ELEMENTWISE_SUB;
extern const char *G_OP_TYPE_ELEMENTWISE_MUL;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_PRELU;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_ADD_PRELU;
extern const char *G_OP_TYPE_FC;
extern const char *G_OP_TYPE_FUSION_CONV_ADD;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_BN_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_DWCONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_CONV_RELU;
extern const char *G_OP_TYPE_GRU;
extern const char *G_OP_TYPE_GRU_UNIT;
extern const char *G_OP_TYPE_CRF;
extern const char *G_OP_TYPE_BILINEAR_INTERP;
extern const char *G_OP_TYPE_NEAREST_INTERP;
extern const char *G_OP_TYPE_FLATTEN;
extern const char *G_OP_TYPE_FLATTEN2;
extern const char *G_OP_TYPE_SHAPE;
extern const char *G_OP_TYPE_LRN;
extern const char *G_OP_TYPE_MUL;
extern const char *G_OP_TYPE_MULTICLASS_NMS;
extern const char *G_OP_TYPE_NORM;
extern const char *G_OP_TYPE_POOL2D;
extern const char *G_OP_TYPE_PRIOR_BOX;
extern const char *G_OP_TYPE_RELU;
extern const char *G_OP_TYPE_RELU6;
extern const char *G_OP_TYPE_LEAKY_RELU;
extern const char *G_OP_TYPE_RESHAPE;
extern const char *G_OP_TYPE_SCALE;
extern const char *G_OP_TYPE_SIGMOID;
extern const char *G_OP_TYPE_SOFTMAX;
extern const char *G_OP_TYPE_TRANSPOSE;
extern const char *G_OP_TYPE_SPLIT;
extern const char *G_OP_TYPE_FEED;
extern const char *G_OP_TYPE_FETCH;
extern const char *G_OP_TYPE_DEPTHWISE_CONV;
extern const char *G_OP_TYPE_IM2SEQUENCE;
extern const char *G_OP_TYPE_DROPOUT;
extern const char *G_OP_TYPE_FUSION_CONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_POOL_BN;
extern const char *G_OP_TYPE_FUSION_ELEMENTWISE_ADD_RELU;
extern const char *G_OP_TYPE_FUSION_FC_RELU;
extern const char *G_OP_TYPE_REGION;
extern const char *G_OP_TYPE_FUSION_CONV_BN;
extern const char *G_OP_TYPE_CONV_TRANSPOSE;
extern const char *G_OP_TYPE_PRELU;
extern const char *G_OP_TYPE_SUM;
extern const char *G_OP_TYPE_TOP_K;
extern const char *G_OP_TYPE_CAST;
extern const char *G_OP_TYPE_LOG;
extern const char *G_OP_TYPE_LOD_RESET;
extern const char *G_OP_TYPE_LESS_THAN;
extern const char *G_OP_TYPE_LOGICAL_AND;
extern const char *G_OP_TYPE_LOGICAL_OR;
extern const char *G_OP_TYPE_LOGICAL_NOT;
extern const char *G_OP_TYPE_LOGICAL_XOR;
extern const char *G_OP_TYPE_WRITE_TO_ARRAY;
extern const char *G_OP_TYPE_READ_FROM_ARRAY;
extern const char *G_OP_TYPE_IS_EMPTY;
extern const char *G_OP_TYPE_INCREMENT;
extern const char *G_OP_TYPE_QUANTIZE;
extern const char *G_OP_TYPE_DEQUANTIZE;
extern const char *G_OP_TYPE_FUSION_DEQUANT_BN;
extern const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DEQUANT_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN_QUANT;
extern const char *G_OP_TYPE_FUSION_DEQUANT_ADD_BN_RELU_QUANT;
extern const char *G_OP_TYPE_TANH;
extern const char *G_OP_TYPE_FUSION_DECONV_RELU;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_RELU;
extern const char *G_OP_TYPE_SEQUENCE_EXPAND;
extern const char *G_OP_TYPE_SEQUENCE_POOL;
extern const char *G_OP_TYPE_SEQUENCE_SOFTMAX;
extern const char *G_OP_TYPE_SLICE;
extern const char *G_OP_TYPE_ANCHOR_GENERATOR;
extern const char *G_OP_TYPE_GENERATE_PROPOSALS;
extern const char *G_OP_TYPE_PSROI_POOL;
extern const char *G_OP_TYPE_ROIALIGN_POOL;
extern const char *G_OP_TYPE_ROI_PERSPECTIVE;
extern const char *G_OP_TYPE_PAD2D;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN_RELU;
extern const char *G_OP_TYPE_FUSION_DECONV_ADD_BN;
extern const char *G_OP_TYPE_FUSION_DECONV_BN_RELU;
extern const char *G_OP_TYPE_FUSION_INSTANCENORM_RELU;
extern const char *G_OP_TYPE_PIXEL_SHUFFLE;
extern const char *G_OP_TYPE_EXPAND;
extern const char *G_OP_TYPE_GRID_SAMPLER;
extern std::unordered_map<
std::string, std::pair<std::vector<std::string>, std::vector<std::string>>>
op_input_output_key;
typedef std::map<std::string, std::vector<std::string>> VariableNameMap;
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "common/util.h"
namespace paddle_mobile {
char *ReadFileToBuff(std::string filename) {
FILE *file = fopen(filename.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
fseek(file, 0, SEEK_END);
int64_t size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "file should not be empty");
rewind(file);
char *data = new char[size];
size_t bytes_read = fread(data, 1, size, file);
PADDLE_MOBILE_ENFORCE(bytes_read == size,
"read binary file bytes do not match with fseek");
fclose(file);
return data;
}
int GetFileLength(std::string filename) {
FILE *file = fopen(filename.c_str(), "rb");
PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
filename.c_str());
fseek(file, 0, SEEK_END);
int size = ftell(file);
PADDLE_MOBILE_ENFORCE(size > 0, "file should not be empty");
fclose(file);
return size;
}
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include "common/enforce.h"
namespace paddle_mobile {
char *ReadFileToBuff(std::string filename);
int GetFileLength(std::string filename);
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdlib>
#include <cstring>
#include <memory>
#include <string>
#include <utility>
#include "common/enforce.h"
#include "common/log.h"
#include "common/type_define.h"
namespace paddle_mobile {
template <int ID, typename Type>
struct IDToType {
typedef Type type_t;
};
template <typename F, typename... Ts>
struct VariantHelper {
inline static void Destroy(kTypeId_t type, void *raw_ptr) {
if (type == type_id<F>()) {
auto ptr = reinterpret_cast<F *>(raw_ptr);
delete ptr;
} else {
VariantHelper<Ts...>::Destroy(type, raw_ptr);
}
}
};
template <typename F>
struct VariantHelper<F> {
inline static void Destroy(kTypeId_t type, void *raw_ptr) {
if (type == type_id<F>()) {
auto ptr = reinterpret_cast<F *>(raw_ptr);
delete ptr;
}
}
};
template <typename... Ts>
struct VariantDeleter {
kTypeId_t type_ = type_id<void>().hash_code();
explicit VariantDeleter(kTypeId_t type) { type_ = type; }
void operator()(void *raw_ptr) {
// DLOG << "variant delete: " << type_ << " " << raw_ptr;
VariantHelper<Ts...>::Destroy(type_, raw_ptr);
}
};
template <typename... Ts>
struct Variant {
Variant() : type_(invalid_type()) {}
Variant(const Variant &variant) {
type_ = variant.type_;
data_ = variant.data_;
}
virtual ~Variant() {
// DLOG << "variant deinit: " << type_ << " " << (void *)data_.get();
data_.reset();
}
template <typename T, typename... Args>
void Set(Args &&... args) {
auto raw_ptr = new T(std::forward<Args>(args)...);
type_ = type_id<T>().hash_code();
// DLOG << "variant new: " << type_ << " " << (void *)raw_ptr;
data_.reset(raw_ptr, VariantDeleter<Ts...>(type_));
}
template <typename T>
T &Get() const {
return *const_cast<T *>(reinterpret_cast<const T *>(data_.get()));
}
kTypeId_t TypeId() const { return type_; }
private:
static inline kTypeId_t invalid_type() { return type_id<void>().hash_code(); }
typedef VariantHelper<Ts...> helper;
kTypeId_t type_ = type_id<void>().hash_code();
std::shared_ptr<void> data_;
};
template <typename T>
struct Vistor {
typedef T type_t;
};
} // namespace paddle_mobile
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef alignment_h
#define alignment_h
#include <stdio.h>
#include "llapi/zynqmp_api.h"
namespace paddle_mobile {
namespace zynqmp {
inline int align_image(int wc) { return align_to_x(wc, IMAGE_ALIGNMENT); }
} // namespace zynqmp
} // namespace paddle_mobile
#endif /* alignment_h */
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef Context_hpp
#define Context_hpp
#include <stdio.h>
#include "pe.hpp"
#include "pes/conv_pe.hpp"
#include "pes/depthwise_conv_pe.hpp"
#include "pes/fully_connected_pe.hpp"
#include "pes/input_pe.hpp"
#include "pes/output_pe.hpp"
#include "pes/pooling_pe.hpp"
#include "pes/softmax_pe.hpp"
namespace paddle_mobile {
namespace zynqmp {
class Context {
public:
template <typename Ptype>
Ptype& pe() {
if (pe_ == nullptr) {
pe_ = new Ptype();
}
return static_cast<Ptype&>(*pe_);
}
~Context() {
if (pe_ != nullptr) {
delete pe_;
}
}
private:
PE* pe_ = nullptr;
};
} // namespace zynqmp
} // namespace paddle_mobile
#endif /* Context_hpp */
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "dl_engine.hpp"
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <stdio.h>
namespace paddle_mobile {
namespace zynqmp {
class DLEngine {
public:
static DLEngine& get_instance() {
static DLEngine s_instance;
return s_instance;
}
private:
DLEngine();
};
} // namespace zynqmp
} // namespace paddle_mobile
此差异已折叠。
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "fpga/KD/alignment.h"
namespace paddle_mobile {
namespace zynqmp {
enum LayoutType {
N,
NC,
NCHW,
NHWC,
NHW,
};
class Layout {
public:
virtual int numIndex() = 0;
virtual int channelIndex() { return -1; }
virtual int heightIndex() { return -1; }
virtual int widthIndex() { return -1; }
virtual int alignedElementCount(const std::vector<int>& dims) = 0;
virtual int elementCount(const std::vector<int>& dims) = 0;
};
struct NCHW : Layout {
int numIndex() { return 0; }
int channelIndex() { return 1; }
int heightIndex() { return 2; }
int widthIndex() { return 3; }
int alignedElementCount(const std::vector<int>& dims) {
return dims[0] * dims[2] * align_image(dims[1] * dims[3]);
}
virtual int elementCount(const std::vector<int>& dims) {
return dims[0] * dims[1] * dims[2] * dims[3];
}
};
struct NHWC : Layout {
int numIndex() { return 0; }
int heightIndex() { return 1; }
int widthIndex() { return 2; }
int channelIndex() { return 3; }
int alignedElementCount(const std::vector<int>& dims) {
return dims[0] * dims[1] * align_image(dims[2] * dims[3]);
}
virtual int elementCount(const std::vector<int>& dims) {
return dims[0] * dims[1] * dims[2] * dims[3];
}
};
struct NC : Layout {
int numIndex() { return 0; }
int channelIndex() { return 1; }
int alignedElementCount(const std::vector<int>& dims) {
return dims[0] * dims[1];
}
virtual int elementCount(const std::vector<int>& dims) {
return dims[0] * dims[1];
}
};
struct N : Layout {
int numIndex() { return 0; }
int alignedElementCount(const std::vector<int>& dims) { return dims[0]; }
virtual int elementCount(const std::vector<int>& dims) { return dims[0]; }
};
struct NHW : Layout {
int numIndex() { return 0; }
int heightIndex() { return 1; }
int widthIndex() { return 2; }
int alignedElementCount(const std::vector<int>& dims) {
// TODO(chonwhite) align it;
return dims[0] * dims[1] * dims[2];
}
virtual int elementCount(const std::vector<int>& dims) {
return dims[0] * dims[1] * dims[2];
}
};
} // namespace zynqmp
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory.h>
#include "fpga/KD/llapi/bias_scale.h"
#include "fpga/KD/llapi/zynqmp_api.h"
namespace paddle_mobile {
namespace zynqmp {
namespace bias_scale {
void align_element(float **data_in, int num_per_div_before_alignment, int num) {
int copynum = 0;
float *ptr_unaligned = *data_in;
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, BS_NUM_ALIGNMENT);
int num_element =
2 * div_num * num_per_div_after_alignment; // including bias & scale
float *ptr_aligned =
(float *)fpga_malloc(num_element * sizeof(float)); // NOLINT
memset(ptr_aligned, 0, num_element * sizeof(float));
for (int i = 0; i < div_num; i++) {
if (i == div_num - 1) {
copynum = (num_per_div_after_alignment * div_num > num)
? (num % num_per_div_after_alignment)
: (num_per_div_before_alignment);
} else {
copynum = num_per_div_before_alignment;
}
memcpy(ptr_aligned + i * num_per_div_after_alignment,
ptr_unaligned + num_per_div_before_alignment * i,
copynum * sizeof(float));
memcpy(ptr_aligned + (div_num + i) * num_per_div_after_alignment,
ptr_unaligned + num_per_div_before_alignment * i + num,
copynum * sizeof(float));
}
fpga_free(ptr_unaligned);
*data_in = ptr_aligned;
}
void interleave(float **data_in, int num_after_alignment) {
float *ptr_uninterleaved = *data_in;
float *ptr_interleaved =
(float *)fpga_malloc(2 * num_after_alignment * sizeof(float)); // NOLINT
int num = num_after_alignment / 4;
for (int i = 0; i < num; i++) {
memcpy(ptr_interleaved + 8 * i, ptr_uninterleaved + 4 * i,
4 * sizeof(float));
memcpy(ptr_interleaved + 8 * i + 4,
ptr_uninterleaved + num_after_alignment + 4 * i, 4 * sizeof(float));
}
fpga_free(ptr_uninterleaved);
*data_in = ptr_interleaved;
}
void format_bias_scale_array(float **bias_scale_array,
int element_num_per_division, int num) {
align_element(bias_scale_array, element_num_per_division, num);
int div_num = (num + element_num_per_division - 1) / element_num_per_division;
int element_num_after_division =
align_to_x(element_num_per_division, BS_NUM_ALIGNMENT);
interleave(bias_scale_array, div_num * element_num_after_division);
fpga_flush(*bias_scale_array, 2 * element_num_after_division * sizeof(float));
}
void format_bias_array(float **bias_array, int num) {
float *ptr_unaligned = *bias_array;
int num_before_align = num;
int num_after_align = align_to_x(num_before_align, BIAS_NUM_ALIGNMENT);
int16_t *ptr_aligned =
(int16_t *)fpga_malloc(num_after_align * sizeof(int16_t)); // NOLINT
memset(ptr_aligned, 0, num_after_align * sizeof(int16_t));
for (int i = 0; i < num_before_align; i++) {
float value = ptr_aligned[i];
ptr_aligned[i] = fp32_2_fp16(ptr_unaligned[i]);
}
*bias_array = (float *)ptr_aligned; // NOLINT
fpga_free(ptr_unaligned);
}
} // namespace bias_scale
} // namespace zynqmp
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
namespace paddle_mobile {
namespace zynqmp {
namespace bias_scale {
void align_element(float** data_in, int num_per_div_before_alignment, int num);
void interleave(float** data_in, int num_after_alignment);
void format_bias_scale_array(float** bias_scale_array,
int element_num_per_division, int num);
void format_bias_array(float** bias_array, int num);
} // namespace bias_scale
} // namespace zynqmp
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define PADDLE_MOBILE_ZU5
#define FPGA_PRINT_MODE
#define PADDLE_MOBILE_PROFILE
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "fpga/KD/llapi/filter.h"
#include <memory.h>
#include <algorithm>
#include "fpga/KD/float16.hpp"
#include "fpga/KD/llapi/zynqmp_api.h"
namespace paddle_mobile {
namespace zynqmp {
namespace filter {
int calc_division_capacity(int chw) {
int n = 2048 / ((chw + 15) / 16) * 32;
return n < 2048 ? n : 2048;
}
int calc_split_num(int num, int division_capacity) {
return (num + division_capacity - 1) / division_capacity;
}
int calc_division_number(int num, int group_num, int division_capacity) {
int split_num = calc_split_num(num, division_capacity);
// PADDLE_MOBILE_ENFORCE(group_num == 1 || split_num == 1,
// "Split number or group number should be 1");
return group_num * split_num;
}
int calc_num_per_div(int num, int group_num, int division_capacity) {
if (group_num == 1) {
if (num > division_capacity) {
return division_capacity;
} else {
return num;
}
} else {
return (num + group_num - 1) / group_num;
}
}
void convert_to_hwc(char **data_in, int num, int channel, int height,
int width) {
char *tmp = *data_in;
int chw = channel * height * width;
char *data_tmp = (char *)fpga_malloc(chw * num * sizeof(char)); // NOLINT
for (int n = 0; n < num; n++) {
int64_t amount_per_row = width * channel;
for (int c = 0; c < channel; c++) {
for (int h = 0; h < height; h++) {
int64_t offset_height = h * amount_per_row;
for (int w = 0; w < width; w++) {
*(data_tmp + n * chw + offset_height + w * channel + c) =
*((*data_in)++);
}
}
}
}
*data_in = data_tmp;
fpga_free(tmp);
}
float find_max(float *data_in, int data_size) {
float max = 0.0;
for (int i = 0; i < data_size; ++i) {
float value = data_in[i];
float abs = value > 0 ? value : -value;
max = std::max(max, abs);
}
return max;
}
signed char float_to_int8(float fdata) {
if (fdata < 0.0) {
fdata -= 0.5;
} else {
fdata += 0.5;
}
return (signed char)fdata;
}
void quantize(float **data_in, int data_size, float max) {
float *tmp = *data_in;
float fix_range = 127;
float scale = fix_range / max;
signed char *tmp_data = (signed char *)fpga_malloc(data_size * sizeof(char));
for (int i = 0; i < data_size; i++) {
tmp_data[i] = float_to_int8(
(*data_in)[i] * scale); // (signed char)((*data_in)[i] * scale);
}
*data_in = (float *)tmp_data; // NOLINT
fpga_free(tmp);
}
void align_element(char **data_in, int num, int chw) {
int j = 0;
int align_chw = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
if (align_chw != chw) {
char *tmp = *data_in;
char *data_tmp =
(char *)fpga_malloc(num * align_chw * sizeof(char)); // NOLINT
memset(data_tmp, 0, num * align_chw);
for (j = 0; j < num; j++) {
memcpy(data_tmp + j * align_chw, (*data_in) + j * chw, chw);
}
*data_in = data_tmp;
fpga_free(tmp);
}
}
void align_num(char **data_in, int num_per_div_before_alignment, int num,
int chw) {
int i = 0;
int align_chw = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
char *tmp = *data_in;
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int num_element = div_num * num_per_div_after_alignment * align_chw;
char *data_tmp = (char *)fpga_malloc(num_element * sizeof(char)); // NOLINT
memset(data_tmp, 0, num_element * sizeof(char));
for (i = 0; i < div_num - 1; i++) {
memcpy(data_tmp + num_per_div_after_alignment * align_chw * i,
*data_in + num_per_div_before_alignment * align_chw * i,
num_per_div_before_alignment * align_chw);
}
memcpy(data_tmp + num_per_div_after_alignment * align_chw * i,
*data_in + num_per_div_before_alignment * align_chw * i,
(num - (div_num - 1) * num_per_div_before_alignment) * align_chw);
*data_in = data_tmp;
fpga_free(tmp);
}
void reorder(char **data_in, int num_after_alignment, int chw) {
int index = 0;
int new_index = 0;
int chw_align = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
char *data_tmp =
(char *)fpga_malloc(chw_align * num_after_alignment * // NOLINT
sizeof(char));
char *tmp = *data_in;
for (index = 0; index < num_after_alignment; index++) {
new_index = index / 32 * 32 + (index % 16 / 4 * 8) + (index % 16 % 4) +
(index / 16 % 2 * 4);
memcpy(data_tmp + index * chw_align, *data_in + new_index * chw_align,
chw_align);
}
*data_in = data_tmp;
fpga_free(tmp);
}
size_t interleave(char **data_in, int num_after_alignment, int chw) {
int i = 0;
int j = 0;
int k = 0;
int interleave_per_num = 16;
int chw_align = align_to_x(chw, FILTER_ELEMENT_ALIGNMENT);
char *data_tmp =
(char *)fpga_malloc(chw_align * num_after_alignment * // NOLINT
sizeof(char));
std::cout << "interleave size:" << chw_align * num_after_alignment
<< std::endl;
char *tmp = *data_in;
int interleave_num = chw_align * 2 / interleave_per_num;
for (i = 0; i < num_after_alignment; i += 2) {
for (j = 0, k = 0; j < interleave_num; j += 2, k++) {
memcpy(data_tmp + i * chw_align + interleave_per_num * j,
*data_in + i * chw_align + interleave_per_num * k,
interleave_per_num);
memcpy(data_tmp + i * chw_align + interleave_per_num * (j + 1),
*data_in + (i + 1) * chw_align + interleave_per_num * k,
interleave_per_num);
}
}
*data_in = data_tmp;
fpga_free(tmp);
return chw_align * num_after_alignment;
}
size_t format_filter(float **data_in, int num, int channel, int height,
int width, int group_num, float max) {
int data_size = channel * height * width * num;
int chw = channel * height * width;
int division_capacity = calc_division_capacity(chw);
int num_per_div_before_alignment =
calc_num_per_div(num, group_num, division_capacity);
int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int residual = num % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
quantize(data_in, data_size, max);
char **quantize_data = (char **)data_in; // NOLINT
convert_to_hwc(quantize_data, num, channel, height, width);
align_element(quantize_data, num, chw);
if (num_after_alignment != num) {
align_num(quantize_data, num_per_div_before_alignment, num, chw);
}
reorder(quantize_data, num_after_alignment, chw);
size_t mem_size = interleave(quantize_data, num_after_alignment, chw);
fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) *
num_after_alignment * sizeof(char));
return mem_size;
}
void convert_fc_filter(char **data_in, int num, int chw) {
char *tmp = *data_in;
char *data_tmp = (char *)fpga_malloc(chw * num * sizeof(char)); // NOLINT
for (int n = 0; n < num; n++) {
for (int c = 0; c < chw; c++) {
data_tmp[n * chw + c] = (*data_in)[num * c + n];
}
}
*data_in = data_tmp;
fpga_free(tmp);
}
void format_fc_filter(float **data_in, int num, int channel, int height,
int width, int group_num, float max) {
int data_size = channel * height * width * num;
int chw = channel * height * width;
int division_capacity = calc_division_capacity(chw);
int num_per_div_before_alignment =
calc_num_per_div(num, group_num, division_capacity);
int num_per_div_after_alignment =
align_to_x(num_per_div_before_alignment, FILTER_NUM_ALIGNMENT);
int div_num =
(num + num_per_div_before_alignment - 1) / num_per_div_before_alignment;
int residual = num % num_per_div_before_alignment;
int num_after_alignment = num_per_div_after_alignment *
((residual == 0) ? div_num : (div_num - 1)) +
align_to_x(residual, FILTER_NUM_ALIGNMENT);
quantize(data_in, data_size, max);
char **quantize_data = (char **)data_in; // NOLINT
convert_fc_filter(quantize_data, num, chw);
align_element(quantize_data, num, chw);
if (num_after_alignment != num) {
align_num(quantize_data, num_per_div_before_alignment, num, chw);
}
reorder(quantize_data, num_after_alignment, chw);
interleave(quantize_data, num_after_alignment, chw);
fpga_flush(*quantize_data, align_to_x(chw, FILTER_ELEMENT_ALIGNMENT) *
num_after_alignment * sizeof(char));
}
void convert_to_hwn(int16_t **data_in, int num, int height, int width) {
int16_t *tmp = *data_in;
int16_t *data_tmp =
(int16_t *)fpga_malloc(height * width * num * sizeof(int16_t)); // NOLINT
for (int n = 0; n < num; n++) {
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
*(data_tmp + h * width * num + w * num + n) = *((*data_in)++);
}
}
}
*data_in = data_tmp;
fpga_free(tmp);
}
void align_element_n(int16_t **data_in, int num, int height, int width) {
int unalign_n = num;
int align_n = align_to_x(num, FILTER_ELEMENT_ALIGNMENT);
if (unalign_n == align_n) {
return;
} else {
int16_t *tmp = *data_in;
int num_element = height * width * align_n;
int16_t *data_tmp =
(int16_t *)fpga_malloc(num_element * sizeof(int16_t)); // NOLINT
memset(data_tmp, 0, num_element * sizeof(int16_t));
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
int offset_unalign = h * width * unalign_n + w * unalign_n;
int offset_align = h * width * align_n + w * align_n;
for (int n = 0; n < unalign_n; n++) {
data_tmp[offset_align + n] = *((*data_in) + offset_unalign + n);
}
}
}
*data_in = data_tmp;
free(tmp);
}
}
void quantize_to_fp16(float **data_in, int num, int height, int width,
float *scale_ptr) {
float *tmp = *data_in;
int size = num * height * width;
float16 *tmp_data = (float16 *)fpga_malloc(size * sizeof(float16)); // NOLINT
for (int n = 0; n < num; n++) {
float scale_val = scale_ptr[n];
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
int index = n * height * width + h * width + w;
float value = tmp[index] * scale_val;
tmp_data[index] = float_to_half(value);
}
}
}
fpga_flush(tmp_data, size * sizeof(int16_t));
*data_in = (float *)tmp_data; // NOLINT
fpga_free(tmp);
}
void format_dwconv_filter(float **data_in, int num, int height, int width,
float *scale_ptr) {
quantize_to_fp16(data_in, num, height, width, scale_ptr);
int16_t **quantize_data = (int16_t **)data_in; // NOLINT
convert_to_hwn(quantize_data, num, height, width);
align_element_n(quantize_data, num, height, width);
fpga_flush(*quantize_data, align_to_x(num, FILTER_ELEMENT_ALIGNMENT) *
height * width * sizeof(int16_t));
}
} // namespace filter
} // namespace zynqmp
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
#include <cstdlib>
#include <cwchar>
namespace paddle_mobile {
namespace zynqmp {
namespace filter {
int calc_division_capacity(int chw);
int calc_split_num(int num, int division_capacity);
int calc_division_number(int num, int group_num, int division_capacity);
int calc_num_per_div(int num, int group_num, int division_capacity);
void convert_to_hwc(char** data_in, int num, int channel, int height,
int width);
float find_max(float* data_in, int data_size);
void quantize(float** data_in, int data_size, float max);
void align_element(char** data_in, int num, int chw);
void align_num(char** data_in, int num_per_div_before_alignment, int num,
int chw);
void reorder(char** data_in, int num_after_alignment, int chw);
size_t interleave(char** data_in, int num_after_alignment, int chw);
size_t format_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max);
void convert_fc_filter(char** data_in, int num, int chw);
void format_fc_filter(float** data_in, int num, int channel, int height,
int width, int group_num, float max);
void convert_to_hwn(int16_t** data_in, int num, int height, int width);
void align_element_n(int16_t** data_in, int num, int height, int width);
void quantize_to_fp16(float** data_in, int num, int height, int width,
float* scale_ptr);
void format_dwconv_filter(float** data_in, int num, int height, int width,
float* scale_ptr);
} // namespace filter
} // namespace zynqmp
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <memory.h>
#include <algorithm>
#include "fpga/KD/llapi/image.h"
#include "fpga/KD/llapi/zynqmp_api.h"
namespace paddle_mobile {
namespace zynqmp {
namespace image {
void convert_to_hwc(float **data_in, int channel, int height, int width) {
float *tmp = *data_in;
float *data_tmp =
(float *)fpga_malloc(channel * height * width * sizeof(float)); // NOLINT
int64_t amount_per_row = width * channel;
for (int c = 0; c < channel; c++) {
for (int h = 0; h < height; h++) {
int64_t offset_height = h * amount_per_row;
for (int w = 0; w < width; w++) {
*(data_tmp + offset_height + w * channel + c) = *((*data_in)++);
}
}
}
*data_in = data_tmp;
fpga_free(tmp);
}
void align_element_conv(float **data_in, int height, int cw) {
int h = 0;
int align_cw = align_to_x(cw, IMAGE_ALIGNMENT);
if (align_cw != cw) {
float *tmp = *data_in;
float *data_tmp =
(float *)fpga_malloc(height * align_cw * sizeof(float)); // NOLINT
memset(data_tmp, 0, height * align_cw * sizeof(float));
for (h = 0; h < height; h++) {
memcpy((void *)(data_tmp + h * align_cw), // NOLINT
(void *)(*data_in + h * cw), // NOLINT
cw * sizeof(float));
}
*data_in = data_tmp;
fpga_free(tmp);
}
}
void format_image(float **data_in, int channel, int height, int width) {
// convert_to_hwc(data_in, channel, height, width);
align_element_conv(data_in, height, channel * width);
fpga_flush(*data_in, align_to_x(channel * width, IMAGE_ALIGNMENT) * height *
sizeof(float));
}
void concat_images(int16_t **images_in, float **scales_in, void *image_out,
float *scale_out, int image_num, uint32_t *channel_num,
int height, int width) {
int i = 0;
int j = 0;
int k = 0;
int each_out_line_channel = 0;
int align_each_out_area_cw = 0;
int align_each_in_area_cw = 0;
int align_each_out_area_cw_differ = 0;
int tmp_channel = 0;
scale_out[0] = 0.0;
scale_out[1] = 0.0;
for (i = 0; i < image_num; i++) {
each_out_line_channel += channel_num[i];
scale_out[0] = std::max(*scale_out, scales_in[i][0]);
// fpga_invalidate(images_in[i],
// height *
// align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT) *
// sizeof(int16_t));
}
scale_out[1] = 1 / scale_out[0];
align_each_out_area_cw =
align_to_x(each_out_line_channel * width, IMAGE_ALIGNMENT);
align_each_out_area_cw_differ =
align_each_out_area_cw - each_out_line_channel * width;
for (k = 0; k < height; k++) {
for (j = 0; j < width; j++) {
for (i = 0; i < image_num; i++) {
align_each_in_area_cw =
align_to_x(channel_num[i] * width, IMAGE_ALIGNMENT);
memcpy((int16_t *)image_out + tmp_channel + // NOLINT
k * align_each_out_area_cw_differ,
images_in[i] + j * channel_num[i] + k * align_each_in_area_cw,
channel_num[i] * sizeof(int16_t));
tmp_channel += channel_num[i];
}
}
}
fpga_flush(image_out, height * align_each_out_area_cw * sizeof(int16_t));
}
void split_image(int16_t *image_in, const float *scale_in, void **images_out,
float **scales_out, int image_num,
const uint32_t *channel_nums, int height, int width) {
int total_channel = 0;
for (int i = 0; i < image_num; i++) {
scales_out[i][0] = scale_in[0];
scales_out[i][1] = scale_in[1];
total_channel += channel_nums[i];
}
int element_num = height * align_to_x(width * total_channel, IMAGE_ALIGNMENT);
fpga_invalidate(image_in, element_num * sizeof(int16_t));
int src_offset = 0;
int des_offset = 0;
for (int h = 0; h < height; h++) {
for (int w = 0; w < width; w++) {
src_offset = h * align_to_x(total_channel * width, IMAGE_ALIGNMENT) +
w * total_channel;
for (int i = 0; i < image_num; i++) {
des_offset = h * align_to_x(channel_nums[i] * width, IMAGE_ALIGNMENT) +
w * channel_nums[i];
memcpy(reinterpret_cast<int16_t *>(images_out[i] + des_offset),
image_in + src_offset, channel_nums[i] * sizeof(int16_t));
src_offset += channel_nums[i];
}
}
}
for (int i = 0; i < image_num; i++) {
element_num = height * align_to_x(width * channel_nums[i], IMAGE_ALIGNMENT);
fpga_flush(images_out[i], element_num * sizeof(int16_t));
}
}
} // namespace image
} // namespace zynqmp
} // namespace paddle_mobile
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <cstdint>
namespace paddle_mobile {
namespace zynqmp {
namespace image {
void convert_to_hwc(float** data_in, int channel, int height, int width);
void align_element_conv(float** data_in, int height, int cw);
void format_image(float** data_in, int channel, int height, int width);
// Concat featuremaps along channel direction
void concat_images(int16_t** images_in, float** scales_in, void* image_out,
float* scale_out, int image_num, uint32_t* channel_num,
int height, int width);
// Split featuremap along channel direction
void split_image(int16_t* image_in, const float* scale_in, void** images_out,
float** scales_out, int image_num,
const uint32_t* channel_nums, int height, int width);
} // namespace image
} // namespace zynqmp
} // namespace paddle_mobile
此差异已折叠。
此差异已折叠。
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifndef PE_hpp
#define PE_hpp
#include <stdio.h>
#include <iostream>
#include "pe_params.hpp"
#include "tensor_util.hpp"
namespace paddle_mobile {
namespace zynqmp {
class PE {
public:
virtual bool init() { return false; }
virtual void apply() {}
virtual bool dispatch() {
std::cout << "pe dispatch \n";
return false;
}
virtual ~PE() {}
};
} // namespace zynqmp
} // namespace paddle_mobile
#endif /* PE_hpp */
此差异已折叠。
/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <algorithm>
#include "../pe.hpp"
#include "../pe_params.hpp"
namespace paddle_mobile {
namespace zynqmp {
class ConcatPE : public PE {
public:
bool init() {
Tensor* output = param_.output;
output->setAligned(true);
return true;
}
void apply() {}
bool dispatch() {
Tensor* output = param_.output;
Shape& output_shape = output->shape();
float16* out_data = param_.output->data<float16>();
int channel_sum = 0;
int out_channel = output_shape.channel();
float scale = 0;
for (int n = 0; n < param_.inputs.size(); n++) {
Tensor* input = param_.inputs[n];
input->invalidate();
scale = std::max(scale, input->scale()[0]);
Shape& input_shape = input->shape();
int wh = output_shape.width() * output_shape.height();
for (int j = 0; j < wh; j++) {
float16* src = input->data<float16>() + j * input_shape.channel();
memcpy(out_data + j * out_channel + channel_sum, src,
input_shape.channel() * sizeof(float16));
}
channel_sum += input_shape.channel();
}
output->scale()[0] = scale;
output->scale()[1] = 1.0f / scale;
std::cout << "conv scale::" << scale << std::endl;
output->flush();
return true;
}
ConcatParam& param() { return param_; }
private:
ConcatParam param_;
};
} // namespace zynqmp
} // namespace paddle_mobile
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#define PADDLE_MOBILE_ZU5
#define FPGA_PRINT_MODE
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册