未验证 提交 310fd514 编写于 作者: T tensor-tang 提交者: GitHub

[Lite] enable cross compile and run on mobile of lite (#17541)

* add cmake

* update

* fix proto pd

* fix compile

* tmp save

* fix protobuf device version

* fix protobuf and host compile

* fix std c++11 support on android

* change array to vector to fix ndk c++_static

* fix rt and add dockerfile

* fix android compile issue with latest merge

* init arm kernels

* enable run on arm

* update format

* update format

* update format
上级 e9f33320
...@@ -19,6 +19,19 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}) ...@@ -19,6 +19,19 @@ set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
include(system) include(system)
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cmake_minimum_required(VERSION 3.10)
# only android yet
set(ANDROID TRUE)
include(cross_compiling/android)
include(cross_compiling/host)
set(CMAKE_SYSTEM_NAME Android)
set(CMAKE_SYSTEM_VERSION ${ANDROID_API_LEVEL})
set(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ARCH_ABI})
set(CMAKE_ANDROID_NDK ${ANDROID_NDK})
set(CMAKE_ANDROID_STL_TYPE ${ANDROID_STL_TYPE})
endif()
project(paddle CXX C) project(paddle CXX C)
message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: " message(STATUS "CXX compiler: ${CMAKE_CXX_COMPILER}, version: "
"${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}") "${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}")
...@@ -41,7 +54,9 @@ if(WIN32) ...@@ -41,7 +54,9 @@ if(WIN32)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${PADDLE_LINK_FLAGS}")
endif(WIN32) endif(WIN32)
find_package(CUDA QUIET) if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
find_package(CUDA QUIET)
endif()
find_package(Git REQUIRED) find_package(Git REQUIRED)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
...@@ -79,6 +94,30 @@ option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VER ...@@ -79,6 +94,30 @@ option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VER
option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON) option(WITH_FAST_MATH "Make use of fast math library, might affect the precision to some extent" ON)
option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ON) option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ON)
if(ANDROID OR IOS)
set(WITH_GPU OFF CACHE STRING
"Disable GPU when cross-compiling for Android and iOS" FORCE)
set(WITH_DSO OFF CACHE STRING
"Disable DSO when cross-compiling for Android and iOS" FORCE)
set(WITH_AVX OFF CACHE STRING
"Disable AVX when cross-compiling for Android and iOS" FORCE)
set(WITH_PYTHON OFF CACHE STRING
"Disable PYTHON when cross-compiling for Android and iOS" FORCE)
set(WITH_RDMA OFF CACHE STRING
"Disable RDMA when cross-compiling for Android and iOS" FORCE)
set(WITH_MKL OFF CACHE STRING
"Disable MKL when cross-compiling for Android and iOS" FORCE)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING
"Default use Release in android" FORCE)
endif()
if(NOT THIRD_PARTY_BUILD_TYPE)
set(THIRD_PARTY_BUILD_TYPE "MinSizeRel" CACHE STRING
"Default use MinSizeRel in android" FORCE)
endif()
endif()
# for lite, both server and mobile framework. # for lite, both server and mobile framework.
option(WITH_LITE "Enable lite framework" OFF) option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
...@@ -89,8 +128,6 @@ option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) ...@@ -89,8 +128,6 @@ option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING set(THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party" CACHE STRING
"A path setting third party libraries download & build directories.") "A path setting third party libraries download & build directories.")
set(THIRD_PARTY_BUILD_TYPE Release)
# CMAKE_BUILD_TYPE # CMAKE_BUILD_TYPE
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING
...@@ -107,7 +144,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) ...@@ -107,7 +144,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
include(external/gtest) # download, build, install gtest include(external/gtest) # download, build, install gtest
include(external/zlib) # download, build, install gtest #include(external/zlib) # download, build, install gtest
include(external/protobuf) # download, build, install protobuf include(external/protobuf) # download, build, install protobuf
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
...@@ -115,7 +152,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) ...@@ -115,7 +152,7 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include(configure) # add paddle env configuration include(configure) # add paddle env configuration
add_definitions(-std=c++11) add_definitions(-std=c++11)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
add_subdirectory(paddle) add_subdirectory(paddle)
return() return()
......
FROM ubuntu:16.04
RUN echo '\
deb <mirror> <version> main restricted universe multiverse\n\
deb <mirror> <version>-updates main restricted universe multiverse\n\
deb <mirror> <version>-backports main restricted universe multiverse\n\
deb <mirror> <version>-security main restricted universe multiverse\n'\
> /etc/apt/sources.list
RUN sed -ie 's|<mirror>|http://mirrors.tuna.tsinghua.edu.cn/ubuntu/|' /etc/apt/sources.list
RUN sed -ie 's|<version>|xenial|' /etc/apt/sources.list
RUN apt-get update && apt-get upgrade -y
RUN apt-get install -y --no-install-recommends \
curl \
unzip \
git \
make \
cmake-curses-gui \
python \
python-pip \
python-setuptools \
clang-format-5.0 \
graphviz \
g++-arm-linux-gnueabi \
gcc-arm-linux-gnueabi \
gcc \
g++
RUN apt-get autoremove -y && apt-get clean
RUN ln -s clang-format-5.0 /usr/bin/clang-format
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple --upgrade pip
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple wheel
RUN pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pre-commit
RUN cd /tmp && curl -O https://dl.google.com/android/repository/android-ndk-r17c-linux-x86_64.zip
RUN curl -O https://mms-res.cdn.bcebos.com/cmake-3.10.3-Linux-x86_64.tar.gz && \
tar xzf cmake-3.10.3-Linux-x86_64.tar.gz && \
mv cmake-3.10.3-Linux-x86_64 /opt/cmake-3.10 && \
mv /usr/bin/cmake /usr/bin/cmake.bak && ln -s /opt/cmake-3.10/bin/cmake /usr/bin/cmake && \
mv /usr/bin/ccmake /usr/bin/ccmake.bak && ln -s /opt/cmake-3.10/bin/ccmake /usr/bin/ccmake
RUN cd /opt && unzip /tmp/android-ndk-r17c-linux-x86_64.zip
ENV NDK_ROOT /opt/android-ndk-r17c
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT ANDROID)
return()
endif()
if(NOT DEFINED ANDROID_NDK)
set(ANDROID_NDK $ENV{NDK_ROOT})
if(NOT ANDROID_NDK)
message(FATAL_ERROR "Must set ANDROID_NDK or env NDK_ROOT")
endif()
endif()
if(NOT DEFINED ANDROID_ARCH_ABI)
set(ANDROID_ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform")
endif()
if(NOT DEFINED ANDROID_API_LEVEL)
set(ANDROID_API_LEVEL "22")
endif()
if(NOT DEFINED ANDROID_STL_TYPE)
set(ANDROID_STL_TYPE "c++_static" CACHE STRING "stl type")
endif()
set(ANDROID_ARCH_ABI_LIST "arm64-v8a" "armeabi-v7a" "armeabi-v6" "armeabi"
"mips" "mips64" "x86" "x86_64")
set_property(CACHE ANDROID_ARCH_ABI PROPERTY STRINGS ${ANDROID_ARCH_ABI_LIST})
if (NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST)
message(FATAL_ERROR "ANDROID_ARCH_ABI must be in one of ${ANDROID_ARCH_ABI_LIST}")
endif()
if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a")
message(STATUS "NEON is enabled on arm-v7a")
endif()
set(ANDROID_STL_TYPE_LITS "gnustl_static" "c++_static")
set_property(CACHE ANDROID_STL_TYPE PROPERTY STRINGS ${ANDROID_STL_TYPE_LITS})
if (NOT ANDROID_STL_TYPE IN_LIST ANDROID_STL_TYPE_LITS)
message(FATAL_ERROR "ANDROID_STL_TYPE must be in one of ${ANDROID_STL_TYPE_LITS}")
endif()
set(ANDROID_PIE TRUE)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set(HOST_C_COMPILER $ENV{CC})
set(HOST_CXX_COMPILER $ENV{CXX})
if(NOT HOST_C_COMPILER)
find_program(HOST_C_COMPILER NAMES gcc PATH
/usr/bin
/usr/local/bin)
endif()
if(NOT HOST_CXX_COMPILER)
find_program(HOST_CXX_COMPILER NAMES g++ PATH
/usr/bin
/usr/local/bin)
endif()
if(NOT HOST_C_COMPILER OR NOT EXISTS ${HOST_C_COMPILER})
MESSAGE(FATAL_ERROR "Cannot find host C compiler. export CC=/path/to/cc")
ENDIF()
if(NOT HOST_CXX_COMPILER OR NOT EXISTS ${HOST_CXX_COMPILER})
MESSAGE(FATAL_ERROR "Cannot find host C compiler. export CC=/path/to/cc")
ENDIF()
MESSAGE(STATUS "Found host C compiler: " ${HOST_C_COMPILER})
MESSAGE(STATUS "Found host CXX compiler: " ${HOST_CXX_COMPILER})
...@@ -25,6 +25,24 @@ ENDIF(WIN32) ...@@ -25,6 +25,24 @@ ENDIF(WIN32)
INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR}) INCLUDE_DIRECTORIES(${GFLAGS_INCLUDE_DIR})
SET(OPTIONAL_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
"-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
"-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
"-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
"-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
"-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
if(ANDROID)
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS}
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}")
endif()
ExternalProject_Add( ExternalProject_Add(
extern_gflags extern_gflags
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -32,19 +50,12 @@ ExternalProject_Add( ...@@ -32,19 +50,12 @@ ExternalProject_Add(
GIT_TAG 77592648e3f3be87d6c7123eb81cbad75f9aef5a GIT_TAG 77592648e3f3be87d6c7123eb81cbad75f9aef5a
PREFIX ${GFLAGS_SOURCES_DIR} PREFIX ${GFLAGS_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DBUILD_STATIC_LIBS=ON
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DBUILD_STATIC_LIBS=ON
-DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DBUILD_TESTING=OFF -DBUILD_TESTING=OFF
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE} -DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
${OPTIONAL_ARGS}
${EXTERNAL_OPTIONAL_ARGS} ${EXTERNAL_OPTIONAL_ARGS}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${GFLAGS_INSTALL_DIR}
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON -DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
......
...@@ -31,6 +31,24 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) ...@@ -31,6 +31,24 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
SET(GLOG_REPOSITORY "https://github.com/google/glog.git") SET(GLOG_REPOSITORY "https://github.com/google/glog.git")
SET(GLOG_TAG "v0.3.5") SET(GLOG_TAG "v0.3.5")
SET(OPTIONAL_ARGS "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
"-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
"-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}"
"-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
"-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}"
"-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}")
if(ANDROID)
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS}
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}")
endif()
ExternalProject_Add( ExternalProject_Add(
extern_glog extern_glog
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
...@@ -39,14 +57,7 @@ ExternalProject_Add( ...@@ -39,14 +57,7 @@ ExternalProject_Add(
GIT_TAG ${GLOG_TAG} GIT_TAG ${GLOG_TAG}
PREFIX ${GLOG_SOURCES_DIR} PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS ${OPTIONAL_ARGS}
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_FLAGS=${GLOG_CMAKE_CXX_FLAGS}
-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}
-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
-DCMAKE_C_FLAGS_DEBUG=${CMAKE_C_FLAGS_DEBUG}
-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}
-DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib -DCMAKE_INSTALL_LIBDIR=${GLOG_INSTALL_DIR}/lib
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
......
...@@ -142,7 +142,6 @@ IF (WIN32) ...@@ -142,7 +142,6 @@ IF (WIN32)
ENDIF(WIN32) ENDIF(WIN32)
if (NOT "${PROTOBUF_ROOT}" STREQUAL "") if (NOT "${PROTOBUF_ROOT}" STREQUAL "")
find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH) find_path(PROTOBUF_INCLUDE_DIR google/protobuf/message.h PATHS ${PROTOBUF_ROOT}/include NO_DEFAULT_PATH)
find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_LIBRARY protobuf libprotobuf.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH) find_library(PROTOBUF_LITE_LIBRARY protobuf-lite libprotobuf-lite.lib PATHS ${PROTOBUF_ROOT}/lib NO_DEFAULT_PATH)
...@@ -178,12 +177,28 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -178,12 +177,28 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
"${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}" "${PROTOBUF_INSTALL_DIR}/bin/protoc${CMAKE_EXECUTABLE_SUFFIX}"
PARENT_SCOPE) PARENT_SCOPE)
SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git")
SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
SET(OPTIONAL_CACHE_ARGS "") SET(OPTIONAL_CACHE_ARGS "")
SET(OPTIONAL_ARGS "") SET(OPTIONAL_ARGS "")
IF(BUILD_FOR_HOST) IF(BUILD_FOR_HOST)
SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF")
ELSE()
SET(OPTIONAL_ARGS SET(OPTIONAL_ARGS
"-DCMAKE_C_COMPILER=${HOST_C_COMPILER}"
"-DCMAKE_CXX_COMPILER=${HOST_CXX_COMPILER}"
"-Dprotobuf_WITH_ZLIB=OFF"
"-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}")
SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
ELSE()
# protobuf have compile issue when use android stl c++_static
SET(PROTOBUF_REPO "https://github.com/tensor-tang/protobuf.git")
SET(PROTOBUF_TAG "mobile")
SET(OPTIONAL_ARGS "-Dprotobuf_WITH_ZLIB=OFF"
"-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}"
"-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}"
"-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}"
"-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}"
"-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}"
"-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}"
"-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}"
"-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}"
...@@ -191,25 +206,18 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) ...@@ -191,25 +206,18 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST)
"-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}" "-DCMAKE_C_FLAGS_RELEASE=${CMAKE_C_FLAGS_RELEASE}"
"-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}" "-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}"
"-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}" "-DCMAKE_CXX_FLAGS_RELEASE=${CMAKE_CXX_FLAGS_RELEASE}"
"-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}" "-DCMAKE_CXX_FLAGS_DEBUG=${CMAKE_CXX_FLAGS_DEBUG}")
"-Dprotobuf_WITH_ZLIB=ON"
"-DZLIB_ROOT:FILEPATH=${ZLIB_ROOT}"
${EXTERNAL_OPTIONAL_ARGS})
SET(OPTIONAL_CACHE_ARGS "-DZLIB_ROOT:STRING=${ZLIB_ROOT}")
ENDIF() ENDIF()
IF(WIN32) IF(WIN32)
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} "-DCMAKE_GENERATOR_PLATFORM=x64")
ENDIF() ENDIF()
SET(PROTOBUF_REPO "https://github.com/protocolbuffers/protobuf.git")
SET(PROTOBUF_TAG "9f75c5aa851cd877fb0d93ccc31b8567a6706546")
ExternalProject_Add( ExternalProject_Add(
${TARGET_NAME} ${TARGET_NAME}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${PROTOBUF_SOURCES_DIR} PREFIX ${PROTOBUF_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
DEPENDS zlib #DEPENDS zlib
GIT_REPOSITORY ${PROTOBUF_REPO} GIT_REPOSITORY ${PROTOBUF_REPO}
GIT_TAG ${PROTOBUF_TAG} GIT_TAG ${PROTOBUF_TAG}
CONFIGURE_COMMAND CONFIGURE_COMMAND
...@@ -233,6 +241,13 @@ ENDFUNCTION() ...@@ -233,6 +241,13 @@ ENDFUNCTION()
SET(PROTOBUF_VERSION 3.1.0) SET(PROTOBUF_VERSION 3.1.0)
IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
build_protobuf(protobuf_host TRUE)
LIST(APPEND external_project_dependencies protobuf_host)
SET(PROTOBUF_PROTOC_EXECUTABLE ${protobuf_host_PROTOC_EXECUTABLE}
CACHE FILEPATH "protobuf executable." FORCE)
ENDIF()
IF(NOT PROTOBUF_FOUND) IF(NOT PROTOBUF_FOUND)
build_protobuf(extern_protobuf FALSE) build_protobuf(extern_protobuf FALSE)
...@@ -245,7 +260,12 @@ IF(NOT PROTOBUF_FOUND) ...@@ -245,7 +260,12 @@ IF(NOT PROTOBUF_FOUND)
SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY} SET(PROTOBUF_PROTOC_LIBRARY ${extern_protobuf_PROTOC_LIBRARY}
CACHE FILEPATH "protoc library." FORCE) CACHE FILEPATH "protoc library." FORCE)
IF(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
PROMPT_PROTOBUF_LIB(protobuf_host extern_protobuf)
ELSE()
SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE} SET(PROTOBUF_PROTOC_EXECUTABLE ${extern_protobuf_PROTOC_EXECUTABLE}
CACHE FILEPATH "protobuf executable." FORCE) CACHE FILEPATH "protobuf executable." FORCE)
PROMPT_PROTOBUF_LIB(extern_protobuf) PROMPT_PROTOBUF_LIB(extern_protobuf)
ENDIF()
ENDIF(NOT PROTOBUF_FOUND) ENDIF(NOT PROTOBUF_FOUND)
...@@ -93,7 +93,10 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) ...@@ -93,7 +93,10 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
if(NOT APPLE) if(NOT APPLE)
find_package(Threads REQUIRED) find_package(Threads REQUIRED)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl")
if (NOT ANDROID)
set(CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lrt")
endif()
endif(NOT APPLE) endif(NOT APPLE)
set_property(GLOBAL PROPERTY FLUID_MODULES "") set_property(GLOBAL PROPERTY FLUID_MODULES "")
......
...@@ -23,10 +23,10 @@ ...@@ -23,10 +23,10 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include "paddle/fluid/lite/utils/logging.h" // #include "paddle/fluid/lite/utils/logging.h"
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK // #ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include <glog/logging.h> #include <glog/logging.h>
#endif // #endif
namespace paddle { namespace paddle {
namespace inference { namespace inference {
......
...@@ -39,3 +39,4 @@ add_subdirectory(kernels) ...@@ -39,3 +39,4 @@ add_subdirectory(kernels)
add_subdirectory(model_parser) add_subdirectory(model_parser)
add_subdirectory(utils) add_subdirectory(utils)
add_subdirectory(api) add_subdirectory(api)
set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host optimizer_lite model_parser_lite) set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite)
if(LITE_WITH_CUDA) if(LITE_WITH_CUDA)
set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda) set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda)
cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda) cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda)
...@@ -17,7 +17,7 @@ endif() ...@@ -17,7 +17,7 @@ endif()
cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels}) cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels})
message(STATUS "get ops ${ops_lite}") message(STATUS "get ops ${ops_lite}")
message(STATUS "get kernels ${host_kernels}") message(STATUS "get kernels ${host_kernels} ${arm_kernels}")
include(ExternalProject) include(ExternalProject)
set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url") set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url")
...@@ -36,4 +36,11 @@ endif(WITH_TESTING) ...@@ -36,4 +36,11 @@ endif(WITH_TESTING)
lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL)
cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc DEPS cxx_api_lite model_parser_lite target_wrapper_host ${ops_lite} ${host_kernels}) cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc
DEPS
cxx_api_lite
model_parser_lite
target_wrapper_host
mir_passes
${ops_lite} ${host_kernels} ${arm_kernels})
...@@ -13,28 +13,36 @@ ...@@ -13,28 +13,36 @@
// limitations under the License. // limitations under the License.
#include "paddle/fluid/lite/api/cxx_api.h" #include "paddle/fluid/lite/api/cxx_api.h"
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/passes.h"
#endif
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
namespace paddle { namespace paddle {
namespace lite { namespace lite {
void Run(const char* model_dir) { void Run(const char* model_dir) {
lite::Executor predictor; lite::ExecutorLite predictor;
#ifndef LITE_WITH_CUDA // #ifndef LITE_WITH_CUDA
std::vector<Place> valid_places({Place{TARGET(kHost), PRECISION(kFloat)}}); // std::vector<Place> valid_places({Place{TARGET(kHost),
#else // PRECISION(kFloat)}});
std::vector<Place> valid_places({ // #elif defined(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, // #else
Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, // std::vector<Place> valid_places({
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, // Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, // Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)},
Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, // Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)},
Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, // Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)},
}); // Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)},
#endif // Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)},
// });
// #endif
std::vector<Place> valid_places({Place{TARGET(kARM), PRECISION(kFloat)}});
predictor.Build(model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, predictor.Build(model_dir, Place{TARGET(kARM), PRECISION(kFloat)},
valid_places); valid_places);
auto* input_tensor = predictor.GetInput(0); auto* input_tensor = predictor.GetInput(0);
...@@ -71,12 +79,12 @@ USE_LITE_OP(fc); ...@@ -71,12 +79,12 @@ USE_LITE_OP(fc);
USE_LITE_OP(scale); USE_LITE_OP(scale);
USE_LITE_OP(feed); USE_LITE_OP(feed);
USE_LITE_OP(fetch); USE_LITE_OP(fetch);
USE_LITE_OP(io_copy); // USE_LITE_OP(io_copy);
USE_LITE_KERNEL(fc, kHost, kFloat, kNCHW, def); USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kHost, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kHost, kFloat, kNCHW, def); USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
#ifdef LITE_WITH_CUDA #ifdef LITE_WITH_CUDA
USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def);
......
...@@ -16,7 +16,7 @@ proto_library(framework_proto_lite SRCS framework.proto) ...@@ -16,7 +16,7 @@ proto_library(framework_proto_lite SRCS framework.proto)
cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite) cc_library(kernel_lite SRCS kernel.cc DEPS type_system target_wrapper_lite)
cc_library(variable_lite SRCS variable.cc) cc_library(variable_lite SRCS variable.cc)
cc_library(op_registry_lite SRCS op_registry.cc) cc_library(op_registry_lite SRCS op_registry.cc DEPS framework_proto_lite)
cc_library(scope_lite SRCS scope.cc) cc_library(scope_lite SRCS scope.cc)
cc_library(context_lite SRCS context.cc DEPS any_lite) cc_library(context_lite SRCS context.cc DEPS any_lite)
cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite compatible_pb_lite) cc_library(op_lite SRCS op_lite.cc DEPS scope_lite op_registry_lite compatible_pb_lite)
...@@ -46,3 +46,4 @@ lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor) ...@@ -46,3 +46,4 @@ lite_cc_test(test_tensor_lite SRCS lite_tensor_test.cc DEPS lite_tensor)
lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite) lite_cc_test(test_type_system SRCS type_system_test.cc DEPS type_system utils_lite)
#lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite) #lite_cc_test(test_optimizer_lite SRCS optimizer_test.cc DEPS mir_pass_manager program_fake_utils mir_passes optimizer_lite fc_op_lite)
lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite) lite_cc_test(test_types_lite SRCS types_test.cc DEPS types_lite)
cc_library(mir_node SRCS node.cc) cc_library(mir_node SRCS node.cc DEPS framework_proto_lite)
cc_library(mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node) cc_library(mir_ssa_graph SRCS ssa_graph.cc DEPS mir_node)
cc_library(mir_pass SRCS pass.cc DEPS mir_ssa_graph) cc_library(mir_pass SRCS pass.cc DEPS mir_ssa_graph)
cc_library(mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes) cc_library(mir_pass_manager SRCS pass_manager.cc DEPS mir_pass mir_ssa_graph mir_passes)
...@@ -48,3 +48,4 @@ if (LITE_WITH_CUDA) ...@@ -48,3 +48,4 @@ if (LITE_WITH_CUDA)
endif() endif()
cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS cc_test(test_variable_place_infrence_pass SRCS variable_place_inference_pass_test.cc DEPS
${test_variable_place_infrence_pass_DEPS}) ${test_variable_place_infrence_pass_DEPS})
...@@ -15,7 +15,6 @@ ...@@ -15,7 +15,6 @@
#pragma once #pragma once
#include <string> #include <string>
#include "paddle/fluid/lite/core/mir/pass.h"
#include "paddle/fluid/lite/core/mir/pass_manager.h" #include "paddle/fluid/lite/core/mir/pass_manager.h"
namespace paddle { namespace paddle {
...@@ -32,6 +31,10 @@ class PassRegistry { ...@@ -32,6 +31,10 @@ class PassRegistry {
bool Touch() const { return true; } bool Touch() const { return true; }
}; };
} // namespace mir
} // namespace lite
} // namespace paddle
#define REGISTER_MIR_PASS(name__, class__) \ #define REGISTER_MIR_PASS(name__, class__) \
paddle::lite::mir::PassRegistry mir_pass_registry##name__(#name__, \ paddle::lite::mir::PassRegistry mir_pass_registry##name__(#name__, \
new class__); \ new class__); \
...@@ -43,7 +46,3 @@ class PassRegistry { ...@@ -43,7 +46,3 @@ class PassRegistry {
extern bool mir_pass_registry##name__##_fake(); \ extern bool mir_pass_registry##name__##_fake(); \
static bool mir_pass_usage##name__ __attribute__((unused)) = \ static bool mir_pass_usage##name__ __attribute__((unused)) = \
mir_pass_registry##name__##_fake(); mir_pass_registry##name__##_fake();
} // namespace mir
} // namespace lite
} // namespace paddle
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy import numpy
import sys, os import sys, os
import numpy as np import numpy as np
...@@ -26,8 +40,6 @@ data_1 = np.array(numpy.random.random([100, 100]), dtype='float32') ...@@ -26,8 +40,6 @@ data_1 = np.array(numpy.random.random([100, 100]), dtype='float32')
#fluid.default_main_program().desc. #fluid.default_main_program().desc.
#prog = fluid.compiler.CompiledProgram(fluid.default_main_program()) #prog = fluid.compiler.CompiledProgram(fluid.default_main_program())
prog = fluid.default_main_program() prog = fluid.default_main_program()
...@@ -36,11 +48,9 @@ prog = fluid.default_main_program() ...@@ -36,11 +48,9 @@ prog = fluid.default_main_program()
with open('main_program.pb', 'wb') as f: with open('main_program.pb', 'wb') as f:
f.write(prog.desc.serialize_to_string()) f.write(prog.desc.serialize_to_string())
#outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost]) #outs = exe.run(program=prog, feed={'a':data_1, }, fetch_list=[cost])
sys.exit(0) sys.exit(0)
fluid.io.save_inference_model("./model2", [a.name], [a1], exe) fluid.io.save_inference_model("./model2", [a.name], [a1], exe)
print(numpy.array(outs)) print(numpy.array(outs))
...@@ -71,7 +71,7 @@ bool OpLite::Run() { ...@@ -71,7 +71,7 @@ bool OpLite::Run() {
bool OpLite::Attach(const OpDesc &opdesc, lite::Scope *scope) { bool OpLite::Attach(const OpDesc &opdesc, lite::Scope *scope) {
// valid_places_.clear(); // valid_places_.clear();
CHECK(scope != nullptr); CHECK(scope != nullptr);
//CHECK(!op_info_.get()); // CHECK(!op_info_.get());
scope_ = scope; scope_ = scope;
op_info_.reset(new OpInfo); // Force clean the out-of-date infomation. op_info_.reset(new OpInfo); // Force clean the out-of-date infomation.
op_info_->Build(opdesc.ReadonlyProto()); op_info_->Build(opdesc.ReadonlyProto());
......
...@@ -131,7 +131,6 @@ class OpLite : public Registry { ...@@ -131,7 +131,6 @@ class OpLite : public Registry {
return var->GetMutable<T>(); return var->GetMutable<T>();
} }
protected: protected:
lite::Scope *scope_{}; lite::Scope *scope_{};
std::unique_ptr<KernelBase> kernel_; std::unique_ptr<KernelBase> kernel_;
......
...@@ -59,6 +59,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create( ...@@ -59,6 +59,9 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
case TARGET(kCUDA): { case TARGET(kCUDA): {
CREATE_KERNEL(kCUDA); CREATE_KERNEL(kCUDA);
} break; } break;
case TARGET(kARM): {
CREATE_KERNEL(kARM);
} break;
default: default:
CHECK(false) << "not supported kernel target " << TargetToStr(target); CHECK(false) << "not supported kernel target " << TargetToStr(target);
} }
...@@ -67,7 +70,10 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create( ...@@ -67,7 +70,10 @@ std::list<std::unique_ptr<KernelBase>> KernelRegistry::Create(
return std::list<std::unique_ptr<KernelBase>>(); return std::list<std::unique_ptr<KernelBase>>();
} }
KernelRegistry::KernelRegistry() { KernelRegistry::KernelRegistry()
: registries_(static_cast<int>(TARGET(NUM)) *
static_cast<int>(PRECISION(NUM)) *
static_cast<int>(DATALAYOUT(NUM))) {
#define INIT_FOR(target__, precision__, layout__) \ #define INIT_FOR(target__, precision__, layout__) \
registries_[KernelRegistry::GetKernelOffset<TARGET(target__), \ registries_[KernelRegistry::GetKernelOffset<TARGET(target__), \
PRECISION(precision__), \ PRECISION(precision__), \
...@@ -79,10 +85,15 @@ KernelRegistry::KernelRegistry() { ...@@ -79,10 +85,15 @@ KernelRegistry::KernelRegistry() {
// Currently, just register 2 kernel targets. // Currently, just register 2 kernel targets.
INIT_FOR(kCUDA, kFloat, kNCHW); INIT_FOR(kCUDA, kFloat, kNCHW);
INIT_FOR(kCUDA, kAny, kNCHW); INIT_FOR(kCUDA, kAny, kNCHW);
INIT_FOR(kCUDA, kAny, kAny);
INIT_FOR(kHost, kFloat, kNCHW); INIT_FOR(kHost, kFloat, kNCHW);
INIT_FOR(kHost, kAny, kNCHW); INIT_FOR(kHost, kAny, kNCHW);
INIT_FOR(kHost, kAny, kAny); INIT_FOR(kHost, kAny, kAny);
INIT_FOR(kCUDA, kAny, kAny);
INIT_FOR(kARM, kFloat, kNCHW);
INIT_FOR(kARM, kAny, kNCHW);
INIT_FOR(kARM, kAny, kAny);
#undef INIT_FOR #undef INIT_FOR
} }
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include <utility> #include <utility>
#include <vector>
#include "paddle/fluid/lite/core/kernel.h" #include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_lite.h" #include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/target_wrapper.h" #include "paddle/fluid/lite/core/target_wrapper.h"
...@@ -75,7 +76,11 @@ class KernelRegistry final { ...@@ -75,7 +76,11 @@ class KernelRegistry final {
KernelRegistryForTarget<TARGET(kHost), PRECISION(kAny), KernelRegistryForTarget<TARGET(kHost), PRECISION(kAny),
DATALAYOUT(kAny)> *, // DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kCUDA), PRECISION(kAny), KernelRegistryForTarget<TARGET(kCUDA), PRECISION(kAny),
DATALAYOUT(kAny)> * // DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kARM), PRECISION(kAny),
DATALAYOUT(kAny)> *, //
KernelRegistryForTarget<TARGET(kARM), PRECISION(kFloat),
DATALAYOUT(kNCHW)> * //
>; >;
KernelRegistry(); KernelRegistry();
...@@ -92,8 +97,9 @@ class KernelRegistry final { ...@@ -92,8 +97,9 @@ class KernelRegistry final {
using kernel_registor_t = using kernel_registor_t =
KernelRegistryForTarget<Target, Precision, Layout>; KernelRegistryForTarget<Target, Precision, Layout>;
auto &varient = registries_[GetKernelOffset<Target, Precision, Layout>()]; auto &varient = registries_[GetKernelOffset<Target, Precision, Layout>()];
varient.template get<kernel_registor_t *>()->Register(name, auto *reg = varient.template get<kernel_registor_t *>();
std::move(creator)); CHECK(reg) << "Can not be empty of " << name;
reg->Register(name, std::move(creator));
} }
template <TargetType Target, PrecisionType Precision = PRECISION(kFloat), template <TargetType Target, PrecisionType Precision = PRECISION(kFloat),
...@@ -125,23 +131,20 @@ class KernelRegistry final { ...@@ -125,23 +131,20 @@ class KernelRegistry final {
std::string DebugString() const { std::string DebugString() const {
std::stringstream ss; std::stringstream ss;
ss << "KernelCreator<host, float>:" << std::endl; ss << "KernelCreator<host, float>:" << std::endl;
ss << registries_[GetKernelOffset<TARGET(kHost), PRECISION(kFloat), constexpr TargetType tgt = TARGET(kHost);
DATALAYOUT(kAny)>()] constexpr PrecisionType dt = PRECISION(kFloat);
.get<KernelRegistryForTarget<TARGET(kHost), PRECISION(kFloat), constexpr DataLayoutType lt = DATALAYOUT(kNCHW);
DATALAYOUT(kNCHW)> *>() constexpr DataLayoutType kany = DATALAYOUT(kAny);
->DebugString(); using kernel_registor_t = KernelRegistryForTarget<tgt, dt, lt>;
ss << std::endl; auto *reg = registries_[GetKernelOffset<tgt, dt, kany>()]
.template get<kernel_registor_t *>();
ss << reg->DebugString() << std::endl;
return ss.str(); return ss.str();
} }
private: private:
mutable std::array<any_kernel_registor_t, mutable std::vector<any_kernel_registor_t> registries_;
static_cast<int>(TARGET(NUM)) *
static_cast<int>(PRECISION(NUM)) *
static_cast<int>(DATALAYOUT(NUM))>
registries_;
}; };
template <TargetType target, PrecisionType precision, DataLayoutType layout, template <TargetType target, PrecisionType precision, DataLayoutType layout,
......
...@@ -46,6 +46,7 @@ class Optimizer { ...@@ -46,6 +46,7 @@ class Optimizer {
SpecifyKernelPickTactic(kernel_pick_factor); SpecifyKernelPickTactic(kernel_pick_factor);
InitTargetTypeTransformPass(); InitTargetTypeTransformPass();
#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
if (passes.empty()) { if (passes.empty()) {
RunPasses(std::vector<std::string>{{ RunPasses(std::vector<std::string>{{
"static_kernel_pick_pass", // "static_kernel_pick_pass", //
...@@ -62,6 +63,7 @@ class Optimizer { ...@@ -62,6 +63,7 @@ class Optimizer {
} else { } else {
RunPasses(passes); RunPasses(passes);
} }
#endif
exec_scope_ = program.exec_scope; exec_scope_ = program.exec_scope;
} }
......
...@@ -30,6 +30,7 @@ enum class TargetType : int { ...@@ -30,6 +30,7 @@ enum class TargetType : int {
kHost, kHost,
kX86, kX86,
kCUDA, kCUDA,
kARM,
kAny, // any target kAny, // any target
NUM, // number of fields. NUM, // number of fields.
}; };
......
...@@ -4,3 +4,4 @@ endif() ...@@ -4,3 +4,4 @@ endif()
nv_library(target_wrapper_cuda SRCS target_wrapper.cc) nv_library(target_wrapper_cuda SRCS target_wrapper.cc)
nv_library(cuda_blas_lite SRCS blas.cc) nv_library(cuda_blas_lite SRCS blas.cc)
cc_library(target_wrapper_host SRCS target_wrapper.cc DEPS target_wrapper_lite) cc_library(target_wrapper_host SRCS target_wrapper.cc DEPS target_wrapper_lite)
...@@ -4,3 +4,4 @@ add_subdirectory(host) ...@@ -4,3 +4,4 @@ add_subdirectory(host)
add_subdirectory(arm) add_subdirectory(arm)
add_subdirectory(cuda) add_subdirectory(cuda)
add_subdirectory(x86) add_subdirectory(x86)
if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
return()
endif()
message(STATUS "compile with lite ARM kernels") message(STATUS "compile with lite ARM kernels")
cc_library(fc_compute_arm SRCS fc_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(relu_compute_arm SRCS relu_compute.cc DEPS ${lite_kernel_deps})
cc_library(mul_compute_arm SRCS mul_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} eigen3)
cc_library(feed_compute_arm SRCS feed_compute.cc DEPS ${lite_kernel_deps})
cc_library(fetch_compute_arm SRCS fetch_compute.cc DEPS ${lite_kernel_deps})
# lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS ${lite_kernel_deps} fc_compute_arm)
set(arm_kernels
feed_compute_arm
fetch_compute_arm
fc_compute_arm
relu_compute_arm
mul_compute_arm
scale_compute_arm
)
set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels")
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <Eigen/Core>
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
// NOTE should use pure std C++ implementation.
void FcCompute::Run() {
auto& param = this->Param<operators::FcParam>();
CHECK_GE(param.input->dims().size(), 2UL);
CHECK_EQ(param.output->dims().size(), 2UL);
fc_compute_eigen(
param.input->data<float>(), // x
param.input->dims().Slice(0, param.in_num_col_dims).production(),
param.input->dims()
.Slice(param.in_num_col_dims, param.input->dims().size())
.production(),
param.w->data<float>(), // w
param.w->dims()[1], // w_w
param.w->dims()[0], // w_h
param.bias->data<float>(), // b
param.output->mutable_data<float>());
}
TargetType FcCompute::target() const { return TARGET(kARM); }
PrecisionType FcCompute::precision() const { return PRECISION(kFloat); }
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(fc, kARM, kFloat, kNCHW,
paddle::lite::kernels::arm::FcCompute, def)
.BindInput("Input", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Bias", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("W", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/operators/fc_op.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class FcCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::FcParam;
void Run() override;
TargetType target() const override;
PrecisionType precision() const override;
virtual ~FcCompute() = default;
};
template <typename T>
void fc_compute_eigen(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
using matrix_t =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
Eigen::Map<const matrix_t> X(x, x_h, x_w);
Eigen::Map<const matrix_t> W(w, w_h, w_w);
Eigen::Map<matrix_t> Out(out, x_h, w_h);
Out = X * W.transpose();
if (b) {
Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, 1>> B(b, w_h);
Out = Out.array().rowwise() + B.transpose().array();
}
}
template <typename T>
__attribute__((optimize("unroll-loops"))) //
T dot(const T* x, const T* y, int dim) {
T out{};
for (int i = 0; i < dim; i++) {
out += x[i] * y[i];
}
return out;
}
template <typename T>
void fc_compute_naive(const T* x, int x_w, int x_h, //
const T* w, int w_w, int w_h, //
const T* b, //
T* out) {
CHECK_EQ(x_w, w_w);
// out shape: (x_h, w_w)
memset(out, 0, x_h * w_h * sizeof(T));
for (int r = 0; r < x_h; r++) {
for (int c = 0; c < w_h; c++) {
out[r * w_h + c] = dot(&x[r * x_w], &w[c * w_w], w_w) + b[c];
}
}
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/fc_compute.h"
#include <gtest/gtest.h>
#include <vector>
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
TEST(fc_compute_naive, test) {
lite::Tensor x, w, b, out, out1;
const int batch_size = 2;
x.Resize({batch_size, 3});
w.Resize({4, 3});
b.Resize({1, 4});
out.Resize({batch_size, 4});
out1.Resize({batch_size, 4});
auto x_data = x.mutable_data<float>();
auto w_data = w.mutable_data<float>();
auto b_data = b.mutable_data<float>();
auto out_data = out.mutable_data<float>();
auto out_data1 = out1.mutable_data<float>();
for (int i = 0; i < product(x.dims()); i++) x_data[i] = i;
for (int i = 0; i < product(w.dims()); i++) w_data[i] = i;
for (int i = 0; i < product(b.dims()); i++) b_data[i] = i;
fc_compute_naive(x_data, 3, batch_size, //
w_data, 3, 4, //
b_data, out_data);
fc_compute_eigen(x_data, 3, batch_size, //
w_data, 3, 4, //
b_data, out_data1);
for (int i = 0; i < product(out.dims()); i++) {
EXPECT_NEAR(out_data[0], out_data1[0], 1e-6);
}
}
TEST(fc_arm, init) {
FcCompute fc;
ASSERT_EQ(fc.precision(), PRECISION(kFloat));
ASSERT_EQ(fc.target(), TARGET(kARM));
}
TEST(fc_arm, algorithm) {
using matrix_t = Eigen::Matrix<float, Eigen::Dynamic, Eigen::Dynamic>;
using matrix_map_t = Eigen::Map<matrix_t>;
// dim 10, 20
std::vector<float> input(10 * 20);
std::vector<float> w(20 * 20);
std::vector<float> output(10 * 20);
Eigen::Map<const matrix_t> input_mat(input.data(), 10, 20);
Eigen::Map<const matrix_t> weight_mat(w.data(), 20, 20);
matrix_map_t output_mat(output.data(), 10, 20);
output_mat = weight_mat.transpose() * input_mat;
}
TEST(fc_arm, compute) {
FcCompute fc;
operators::FcParam param;
lite::Tensor x;
lite::Tensor w;
lite::Tensor bias;
lite::Tensor output;
x.Resize(DDim(std::vector<int64_t>({1, 10, 20})));
w.Resize(DDim(std::vector<int64_t>({20, 20})));
bias.Resize(DDim(std::vector<int64_t>({1, 10})));
output.Resize(DDim(std::vector<int64_t>({10, 20})));
auto* x_data = x.mutable_data<float>();
auto* w_data = w.mutable_data<float>();
auto* bias_data = bias.mutable_data<float>();
auto* output_data = output.mutable_data<float>();
for (int i = 0; i < 10 * 20; i++) x_data[i] = i;
for (int i = 0; i < 20 * 20; i++) w_data[i] = i;
for (int i = 0; i < 10; i++) bias_data[i] = i;
for (int i = 0; i < 10 * 20; i++) output_data[i] = 0;
param.in_num_col_dims = 2;
param.input = &x;
param.w = &w;
param.bias = &bias;
param.output = &output;
param.in_mat_dims = x.dims();
fc.SetParam(param);
fc.Run();
LOG(INFO) << "x";
for (int i = 0; i < 10 * 20; i++) LOG(INFO) << x_data[i];
LOG(INFO) << "output:";
for (int i = 0; i < 10 * 20; i++) LOG(INFO) << output.data<float>()[i];
}
TEST(fc, retrive_op) {
auto fc =
KernelRegistry::Global().Create<TARGET(kARM), PRECISION(kFloat)>("fc");
ASSERT_TRUE(fc);
}
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class FeedCompute
: public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
using param_t = operators::FeedParam;
void Run() override {
auto &param = Param<operators::FeedParam>();
LOG(INFO) << "feed_list.size: " << param.feed_list->size();
LOG(INFO) << "col " << param.col;
const lite::Tensor &feed_item = (*param.feed_list)[0];
param.out->ShareDataWith(feed_item);
LOG(INFO) << "FEED input " << feed_item << " col " << param.col;
LOG(INFO) << "FEED output " << *param.out;
}
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(feed, kARM, kAny, kAny,
paddle::lite::kernels::arm::FeedCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/type_system.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class FetchCompute
: public KernelLite<TARGET(kARM), PRECISION(kAny), DATALAYOUT(kAny)> {
public:
using param_t = operators::FeedParam;
void Run() override {
auto& param = Param<operators::FetchParam>();
auto* fetch_list = param.fetch_list;
if (fetch_list->size() <= static_cast<size_t>(param.col)) {
fetch_list->resize(param.col + 1);
}
auto& dst = fetch_list->at(param.col);
dst.ShareDataWith(*param.input);
}
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(fetch, kARM, kAny, kAny,
paddle::lite::kernels::arm::FetchCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
DATALAYOUT(kAny), -1)})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM), PRECISION(kAny),
DATALAYOUT(kAny), -1)})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
template <typename T>
void mul_compute_eigen(const T* x, int x_h, int x_w, const T* y, int y_h,
int y_w, T* out) {
using matrix_t =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
Eigen::Map<const matrix_t> X(x, x_h, x_w);
Eigen::Map<const matrix_t> Y(y, y_h, y_w);
Eigen::Map<matrix_t> Out(out, x_h, y_w);
Out = X * Y;
}
class MulCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::MulParam;
void Run() override {
auto& param = Param<operators::MulParam>();
core::dim2 x_shape(
{static_cast<int>(
param.x->dims().Slice(0, param.x_num_col_dims).production()),
static_cast<int>(
param.x->dims()
.Slice(param.x_num_col_dims, param.x->dims().size())
.production())});
core::dim2 y_shape(
{static_cast<int>(
param.y->dims().Slice(0, param.y_num_col_dims).production()),
static_cast<int>(
param.y->dims()
.Slice(param.y_num_col_dims, param.y->dims().size())
.production())});
mul_compute_eigen(param.x->data<float>(), x_shape.x, x_shape.y, //
param.y->data<float>(), y_shape.x, y_shape.y, //
param.output->mutable_data<float>());
LOG(INFO) << "MUL x " << *param.x;
LOG(INFO) << "MUL W " << *param.y;
LOG(INFO) << "MUL out " << *param.output;
}
virtual ~MulCompute() = default;
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(mul, kARM, kFloat, kNCHW,
paddle::lite::kernels::arm::MulCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/kernels/arm/relu_compute.h"
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
class ReluCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
void Run() override {
auto& param = Param<operators::ReluParam>();
auto n = param.input->dims().production();
const float* input = param.input->data<float>();
float* output = param.output->mutable_data<float>();
for (int i = 0; i < n; i++) {
output[i] = std::max(0.f, input[i]);
}
}
TargetType target() const override { return TARGET(kARM); }
PrecisionType precision() const override { return PRECISION(kFloat); }
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(relu, kARM, kFloat, kNCHW,
paddle::lite::kernels::arm::ReluCompute, def)
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <Eigen/Core>
#include "paddle/fluid/lite/core/kernel.h"
#include "paddle/fluid/lite/core/op_registry.h"
#include "paddle/fluid/lite/core/types.h"
namespace paddle {
namespace lite {
namespace kernels {
namespace arm {
template <typename T>
void scale_compute(const T* x, T* out, int size, float scale, float bias,
bool bias_before) {
if (bias_before) bias *= scale;
for (int i = 0; i < size; i++) {
out[i] = x[i] * scale + bias;
}
}
class ScaleCompute : public KernelLite<TARGET(kARM), PRECISION(kFloat)> {
public:
using param_t = operators::MulParam;
void Run() override {
auto& param = Param<operators::ScaleParam>();
scale_compute(param.x->data<float>(), param.output->mutable_data<float>(),
param.x->dims().production(), param.scale, param.bias,
param.bias_after_scale);
}
virtual ~ScaleCompute() = default;
};
} // namespace arm
} // namespace kernels
} // namespace lite
} // namespace paddle
REGISTER_LITE_KERNEL(scale, kARM, kFloat, kNCHW,
paddle::lite::kernels::arm::ScaleCompute, def)
.BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))})
.BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))})
.Finalize();
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/lite/core/op_registry.h"
USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def);
USE_LITE_KERNEL(feed, kARM, kAny, kAny, def);
USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def);
...@@ -8,3 +8,4 @@ nv_library(mul_compute_cuda SRCS mul_compute.cc DEPS ${tensor_lite}) ...@@ -8,3 +8,4 @@ nv_library(mul_compute_cuda SRCS mul_compute.cc DEPS ${tensor_lite})
cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite}) cc_library(io_copy_compute_cuda SRCS io_copy_compute.cc DEPS ${tensor_lite})
nv_library(kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite) nv_library(kernels_cuda DEPS mul_compute_cuda io_copy_compute_cuda cuda_blas_lite)
...@@ -17,3 +17,4 @@ set(host_kernels ...@@ -17,3 +17,4 @@ set(host_kernels
) )
set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels") set(host_kernels "${host_kernels}" CACHE INTERNAL "host kernels")
...@@ -4,3 +4,4 @@ endif() ...@@ -4,3 +4,4 @@ endif()
cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op) cc_library(activation_compute SRCS activation_compute.cc DEPS ${lite_kernel_deps} activation_op)
cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op) cc_library(elementwise_compute SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} elementwise_op)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h" #include "paddle/fluid/lite/core/kernel.h"
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/eigen.h" #include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/lite/core/kernel.h" #include "paddle/fluid/lite/core/kernel.h"
......
...@@ -9,7 +9,7 @@ endif(WITH_TESTING) ...@@ -9,7 +9,7 @@ endif(WITH_TESTING)
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite var_desc_lite) cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite)
else() else()
cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS framework_proto_lite proto_desc) cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS framework_proto_lite proto_desc)
endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) endif(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
...@@ -24,3 +24,4 @@ endif() ...@@ -24,3 +24,4 @@ endif()
cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps}) cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps})
add_subdirectory(pb) add_subdirectory(pb)
cc_library(var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite) cc_library(var_desc_lite SRCS var_desc.cc DEPS framework_proto_lite)
cc_library(op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite) cc_library(op_desc_lite SRCS op_desc.cc DEPS framework_proto_lite)
...@@ -22,3 +22,4 @@ set(ops_lite ...@@ -22,3 +22,4 @@ set(ops_lite
PARENT_SCOPE) PARENT_SCOPE)
lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite fc_compute_host) lite_cc_test(test_fc_op_lite SRCS fc_op_test.cc DEPS fc_op_lite fc_compute_host)
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/op_lite.h" #include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/lite/core/op_lite.h" #include "paddle/fluid/lite/core/op_lite.h"
#include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/op_registry.h"
......
if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) # if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
set(utils_DEPS) # set(utils_DEPS)
lite_cc_test(test_logging_lite SRCS logging_test.cc) # lite_cc_test(test_logging_lite SRCS logging_test.cc)
else() # else()
set(utils_DEPS glog) # endif()
endif()
set(utils_DEPS glog)
lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite) lite_cc_test(test_varient SRCS varient_test.cc DEPS utils_lite)
cc_library(any_lite SRCS any.cc) cc_library(any_lite SRCS any.cc)
cc_library(utils_lite SRCS cp_logging.cc DEPS ${utils_DEPS} any_lite) cc_library(utils_lite SRCS cp_logging.cc DEPS ${utils_DEPS} any_lite)
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
#pragma once #pragma once
#include "paddle/fluid/lite/utils/any.h"
#include "paddle/fluid/lite/utils/check.h" #include "paddle/fluid/lite/utils/check.h"
#include "paddle/fluid/lite/utils/cp_logging.h" #include "paddle/fluid/lite/utils/cp_logging.h"
#include "paddle/fluid/lite/utils/factory.h" #include "paddle/fluid/lite/utils/factory.h"
...@@ -21,4 +22,3 @@ ...@@ -21,4 +22,3 @@
#include "paddle/fluid/lite/utils/io.h" #include "paddle/fluid/lite/utils/io.h"
#include "paddle/fluid/lite/utils/macros.h" #include "paddle/fluid/lite/utils/macros.h"
#include "paddle/fluid/lite/utils/varient.h" #include "paddle/fluid/lite/utils/varient.h"
#include "paddle/fluid/lite/utils/any.h"
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK // #ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include "paddle/fluid/lite/utils/logging.h" // #include "paddle/fluid/lite/utils/logging.h"
#else // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK // #else // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
#include <glog/logging.h> #include <glog/logging.h>
#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK // #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
...@@ -3,3 +3,4 @@ if (NOT LITE_WITH_X86) ...@@ -3,3 +3,4 @@ if (NOT LITE_WITH_X86)
endif() endif()
cc_library(target_wrapper_x86 SRCS target_wrapper.cc) cc_library(target_wrapper_x86 SRCS target_wrapper.cc)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册