diff --git a/CMakeLists.txt b/CMakeLists.txt index 312bdb7f1ae11576abf6f5ec222bae72bcd67bb5..5ac1f7d7698b84977990ff253ffcc73d5c8144ea 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -22,6 +22,7 @@ include(system) if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) cmake_minimum_required(VERSION 3.10) # TODO(TJ): make as function check_default + # check os if(NOT DEFINED ARM_TARGET_OS) set(ARM_TARGET_OS "android" CACHE STRING "Choose ARM Target OS") endif() @@ -31,19 +32,27 @@ if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) message(FATAL_ERROR "ARM_TARGET_OS must be in one of ${ARM_TARGET_OS_LIST}") endif() + # check arch abi if(NOT DEFINED ARM_TARGET_ARCH_ABI) - set(ARM_TARGET_ARCH_ABI "arm64-v8a" CACHE STRING "Choose ARM Target ARCH ABI") + set(ARM_TARGET_ARCH_ABI "armv8" CACHE STRING "Choose ARM Target ARCH ABI") endif() - set(ARM_TARGET_ARCH_ABI_LIST "arm64-v8a" "armeabi-v7a" "armeabi-v7a-softfp" "armeabi-v7a-hf") + set(ARM_TARGET_ARCH_ABI_LIST "armv8" "armv7" "armv7hf" "arm64-v8a" "armeabi-v7a") set_property(CACHE ARM_TARGET_ARCH_ABI PROPERTY STRINGS ${ARM_TARGET_ARCH_ABI_LIST}) if (NOT ARM_TARGET_ARCH_ABI IN_LIST ARM_TARGET_ARCH_ABI_LIST) message(FATAL_ERROR "ARM_TARGET_ARCH_ABI must be in one of ${ARM_TARGET_ARCH_ABI_LIST}") endif() - if(NOT DEFINED TARGET_ARCH_ABI) - set(ARCH_ABI "arm64-v8a" CACHE STRING "Choose android platform") + # check arch abi + if(NOT DEFINED ARM_TARGET_LANG) + set(ARM_TARGET_LANG "clang" CACHE STRING "Choose ARM Target Language") endif() - + set(ARM_TARGET_LANG_LIST "gcc" "clang") + set_property(CACHE ARM_TARGET_LANG PROPERTY STRINGS ${ARM_TARGET_LANG_LIST}) + if (NOT ARM_TARGET_LANG IN_LIST ARM_TARGET_LANG_LIST) + message(FATAL_ERROR "ARM_TARGET_LANG must be in one of ${ARM_TARGET_LANG_LIST}") + endif() + + message(STATUS "Lite ARM Compile ${ARM_TARGET_OS} with ${ARM_TARGET_ARCH_ABI} ${ARM_TARGET_LANG}") include(cross_compiling/host) include(cross_compiling/armlinux) include(cross_compiling/android) @@ -159,6 +168,9 @@ include_directories("${PADDLE_SOURCE_DIR}") # for mobile if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) message(STATUS "Building the mobile framework") + if (ANDROID) + include(cross_compiling/findar) + endif() # include the necessary thirdparty dependencies include(external/gflags) # download, build, install gflags include(external/glog) # download, build, install glog @@ -171,8 +183,20 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) include(generic) # simplify cmake module include(configure) # add paddle env configuration - add_definitions(-std=c++11) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11") + find_package(OpenMP REQUIRED) + if(OPENMP_FOUND OR OpenMP_CXX_FOUND) + add_definitions(-DARM_WITH_OMP) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + message(STATUS "Found OpenMP ${OpenMP_VERSION} ${OpenMP_CXX_VERSION}") + message(STATUS " |-- OpenMP C flags: ${OpenMP_C_FLAGS}") + message(STATUS " |-- OpenMP CXX flags: ${OpenMP_CXX_FLAGS}") + message(STATUS " |-- OpenMP OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}") + message(STATUS " `-- OpenMP OpenMP_CXX_LIBRARIES: ${OpenMP_CXX_LIBRARIES}") + else() + message(FATAL_ERROR "Could not found openmp !") + endif() add_subdirectory(paddle) return() diff --git a/cmake/cross_compiling/android.cmake b/cmake/cross_compiling/android.cmake index e57f32aae7c1d59696ce0b49e3add0ff4c51da0e..c3bdbe202f731596d59e2f464f4d1d0aae4bede2 100644 --- a/cmake/cross_compiling/android.cmake +++ b/cmake/cross_compiling/android.cmake @@ -26,28 +26,34 @@ if(NOT DEFINED ANDROID_NDK) endif() endif() - if(NOT DEFINED ANDROID_API_LEVEL) set(ANDROID_API_LEVEL "22") endif() if(NOT DEFINED ANDROID_STL_TYPE) - set(ANDROID_STL_TYPE "c++_static" CACHE STRING "stl type") + set(ANDROID_STL_TYPE "c++_static" CACHE STRING "stl type") # can also use shared endif() -# TODO(TJ): enable me -if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a-hf") - message(FATAL_ERROR "Not supported building android armeabi-v7a-hf yet") +if(ARM_TARGET_ARCH_ABI STREQUAL "armv7hf") + message(FATAL_ERROR "ANDROID does not support hardfp on v7 use armv7 instead.") endif() set(ANDROID_ARCH_ABI ${ARM_TARGET_ARCH_ABI} CACHE STRING "Choose Android Arch ABI") +if(ARM_TARGET_ARCH_ABI STREQUAL "armv8") + set(ANDROID_ARCH_ABI "arm64-v8a") +endif() + +if(ARM_TARGET_ARCH_ABI STREQUAL "armv7") + set(ANDROID_ARCH_ABI "armeabi-v7a") +endif() + if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a-softfp") set(ANDROID_ARCH_ABI "armeabi-v7a") endif() set(ANDROID_ARCH_ABI_LIST "arm64-v8a" "armeabi-v7a" "armeabi-v6" "armeabi" - "mips" "mips64" "x86" "x86_64" "armeabi-v7a-hf") + "mips" "mips64" "x86" "x86_64") set_property(CACHE ANDROID_ARCH_ABI PROPERTY STRINGS ${ANDROID_ARCH_ABI_LIST}) if(NOT ANDROID_ARCH_ABI IN_LIST ANDROID_ARCH_ABI_LIST) message(FATAL_ERROR "ANDROID_ARCH_ABI must be in one of ${ANDROID_ARCH_ABI_LIST}") @@ -59,21 +65,37 @@ if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a") message(STATUS "NEON is enabled on arm-v7a with softfp") endif() -if(ANDROID_ARCH_ABI STREQUAL "armeabi-v7a-hf") - set(ANDROID_ARCH_ABI "armeabi-v7a") - set(CMAKE_CXX_FLAGS "-std=c++11 -march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_CXX_FLAGS}" ) - set(CMAKE_C_FLAGS "-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 ${CMAKE_C_FLAGS}" ) - message(STATUS "NEON is enabled on arm-v7a with hard float") -endif() - set(ANDROID_STL_TYPE_LITS "gnustl_static" "c++_static") set_property(CACHE ANDROID_STL_TYPE PROPERTY STRINGS ${ANDROID_STL_TYPE_LITS}) if (NOT ANDROID_STL_TYPE IN_LIST ANDROID_STL_TYPE_LITS) message(FATAL_ERROR "ANDROID_STL_TYPE must be in one of ${ANDROID_STL_TYPE_LITS}") endif() +if(ARM_TARGET_LANG STREQUAL "gcc") + # gcc do not need set lang + set(ARM_TARGET_LANG "") +endif() + set(CMAKE_SYSTEM_NAME Android) set(CMAKE_SYSTEM_VERSION ${ANDROID_API_LEVEL}) set(CMAKE_ANDROID_ARCH_ABI ${ANDROID_ARCH_ABI}) set(CMAKE_ANDROID_NDK ${ANDROID_NDK}) +set(CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION ${ARM_TARGET_LANG}) set(CMAKE_ANDROID_STL_TYPE ${ANDROID_STL_TYPE}) + +if (ARM_TARGET_LANG STREQUAL "clang") + if(ARM_TARGET_ARCH_ABI STREQUAL "armv8") + set(triple aarch64-v8a-linux-android) + elseif(ARM_TARGET_ARCH_ABI STREQUAL "armv7") + set(triple arm-v7a-linux-android) + else() + message(FATAL_ERROR "Clang do not support this ${ARM_TARGET_ARCH_ABI}, use armv8 or armv7") + endif() + + set(CMAKE_C_COMPILER clang) + set(CMAKE_C_COMPILER_TARGET ${triple}) + set(CMAKE_CXX_COMPILER clang++) + set(CMAKE_CXX_COMPILER_TARGET ${triple}) + + message(STATUS "CMAKE_CXX_COMPILER_TARGET: ${CMAKE_CXX_COMPILER_TARGET}") +endif() diff --git a/cmake/cross_compiling/armlinux.cmake b/cmake/cross_compiling/armlinux.cmake index 1d752075cca2d48d19016999a60c45d9882b1f73..f0fd26804e19f9dba8f515251c625c0e68933512 100644 --- a/cmake/cross_compiling/armlinux.cmake +++ b/cmake/cross_compiling/armlinux.cmake @@ -20,7 +20,15 @@ set(ARMLINUX TRUE) add_definitions(-DLITE_WITH_LINUX) set(CMAKE_SYSTEM_NAME Linux) -if(ARM_TARGET_ARCH_ABI STREQUAL "arm64-v8a") +set(ARMLINUX_ARCH_ABI ${ARM_TARGET_ARCH_ABI} CACHE STRING "Choose Android Arch ABI") + +set(ARMLINUX_ARCH_ABI_LIST "armv8" "armv7" "armv7hf") +set_property(CACHE ARMLINUX_ARCH_ABI PROPERTY STRINGS ${ARMLINUX_ARCH_ABI_LIST}) +if(NOT ARMLINUX_ARCH_ABI IN_LIST ARMLINUX_ARCH_ABI_LIST) + message(FATAL_ERROR "ARMLINUX_ARCH_ABI(${ARMLINUX_ARCH_ABI}) must be in one of ${ARMLINUX_ARCH_ABI_LIST}") +endif() + +if(ARMLINUX_ARCH_ABI STREQUAL "armv8") set(CMAKE_SYSTEM_PROCESSOR aarch64) set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") @@ -30,13 +38,12 @@ if(ARM_TARGET_ARCH_ABI STREQUAL "arm64-v8a") message(STATUS "NEON is enabled on arm64-v8a") endif() -if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a" - OR ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a-hf") +if(ARMLINUX_ARCH_ABI STREQUAL "armv7" OR ARMLINUX_ARCH_ABI STREQUAL "armv7hf") message(FATAL_ERROR "Not supported building arm linux arm-v7 yet") endif() # TODO(TJ): make sure v7 works -if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a") +if(ARMLINUX_ARCH_ABI STREQUAL "armv7") set(CMAKE_SYSTEM_PROCESSOR arm) set(CMAKE_C_COMPILER "arm-linux-gnueabi-gcc") set(CMAKE_CXX_COMPILER "arm-linux-gnueabi-g++") @@ -46,7 +53,7 @@ if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a") message(STATUS "NEON is enabled on arm-v7a with softfp") endif() -if(ARM_TARGET_ARCH_ABI STREQUAL "armeabi-v7a-hf") +if(ARMLINUX_ARCH_ABI STREQUAL "armv7hf") set(CMAKE_SYSTEM_PROCESSOR arm) set(CMAKE_C_COMPILER "arm-linux-gnueabihf-gcc") set(CMAKE_CXX_COMPILER "arm-linux-gnueabihf-g++") diff --git a/cmake/cross_compiling/findar.cmake b/cmake/cross_compiling/findar.cmake new file mode 100644 index 0000000000000000000000000000000000000000..bcb0dc70fd811a5041244dedb4a4bcf5b540dc3a --- /dev/null +++ b/cmake/cross_compiling/findar.cmake @@ -0,0 +1,33 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if(NOT ARM_TARGET_LANG STREQUAL "clang") + # only clang need find ar tool + return() +endif() + +if(NOT EXISTS "${CMAKE_CXX_COMPILER}") + message(ERROR "Can not find CMAKE_CXX_COMPILER ${CMAKE_CXX_COMPILER}") +endif() + +get_filename_component(AR_PATH ${CMAKE_CXX_COMPILER} PATH) + +find_file(AR_TOOL NAMES llvm-ar PATHS ${AR_PATH}) + +if(NOT AR_TOOL) + message(ERROR "Failed to find AR_TOOL in ${AR_PATH}") +else() + set(CMAKE_AR ${AR_TOOL}) + message(STATUS "Found CMAKE_AR : " ${CMAKE_AR}) +endif() diff --git a/cmake/external/gflags.cmake b/cmake/external/gflags.cmake index 42ce7c644f3e8ee51bb5fbce4391b9423ee22cf8..256e1bbebf0bd4fe0ce6f685a7901888c18aab1d 100644 --- a/cmake/external/gflags.cmake +++ b/cmake/external/gflags.cmake @@ -40,7 +40,8 @@ if(ANDROID) "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}" "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}" "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}" - "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}") + "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}" + "-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}" ) endif() ExternalProject_Add( diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 9ac9b8326431addb503acc10d3188a5f8f4e48a5..80abc2350caddb07aa6a326ac89affc58cb17399 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -46,7 +46,8 @@ if(ANDROID) "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}" "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}" "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}" - "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}") + "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}" + "-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}") endif() ExternalProject_Add( diff --git a/cmake/external/gtest.cmake b/cmake/external/gtest.cmake index de44719803fc4f130d536c2354fa492a57e3e69a..57fd6812879970c07a26f3657983998fb3f9760a 100644 --- a/cmake/external/gtest.cmake +++ b/cmake/external/gtest.cmake @@ -58,7 +58,9 @@ IF(WITH_TESTING OR (WITH_DISTRIBUTE AND NOT WITH_GRPC)) "-DCMAKE_SYSTEM_VERSION=${CMAKE_SYSTEM_VERSION}" "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}" "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}" - "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}") + "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}" + "-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}" + ) endif() ExternalProject_Add( diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 41cd1ebaf33a6ec7c61ee8c965eaa0bccbb618b8..6d2136223d39fed1bdacacea9ba363859b6b1c77 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -199,6 +199,7 @@ FUNCTION(build_protobuf TARGET_NAME BUILD_FOR_HOST) "-DCMAKE_ANDROID_ARCH_ABI=${CMAKE_ANDROID_ARCH_ABI}" "-DCMAKE_ANDROID_NDK=${CMAKE_ANDROID_NDK}" "-DCMAKE_ANDROID_STL_TYPE=${CMAKE_ANDROID_STL_TYPE}" + "-DCMAKE_ANDROID_NDK_TOOLCHAIN_VERSION=${CMAKE_ANDROID_NDK_TOOLCHAIN_VERSION}" "-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}" "-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}" "-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}" diff --git a/paddle/fluid/inference/analysis/passes/CMakeLists.txt b/paddle/fluid/inference/analysis/passes/CMakeLists.txt index a8d0c69a54ab39781613d26474098450398d4c1b..7b1bbbb585ad67e378cfbf0a88c7c10fef41621e 100644 --- a/paddle/fluid/inference/analysis/passes/CMakeLists.txt +++ b/paddle/fluid/inference/analysis/passes/CMakeLists.txt @@ -5,7 +5,7 @@ cc_library(ir_params_sync_among_devices_pass SRCS ir_params_sync_among_devices_p cc_library(ir_graph_to_program_pass SRCS ir_graph_to_program_pass.cc DEPS analysis_pass graph_to_program_pass) cc_library(adjust_cudnn_workspace_size_pass SRCS adjust_cudnn_workspace_size_pass.cc DEPS analysis_pass graph_to_program_pass) -cc_library(analysis_passes SRCS passes.cc DEPS +cc_library(analysis_passes SRCS use_passes.cc DEPS ir_graph_build_pass ir_analysis_pass ir_params_sync_among_devices_pass diff --git a/paddle/fluid/inference/analysis/passes/passes.cc b/paddle/fluid/inference/analysis/passes/use_passes.cc similarity index 100% rename from paddle/fluid/inference/analysis/passes/passes.cc rename to paddle/fluid/inference/analysis/passes/use_passes.cc index a55904ed536bad31c82888ede2db3178f3fd5e47..76043a53b75768bd85298ecb8dd911c68671673c 100644 --- a/paddle/fluid/inference/analysis/passes/passes.cc +++ b/paddle/fluid/inference/analysis/passes/use_passes.cc @@ -12,13 +12,13 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/inference/analysis/passes/passes.h" #include "paddle/fluid/inference/analysis/passes/adjust_cudnn_workspace_size_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_analysis_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_build_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_graph_to_program_pass.h" #include "paddle/fluid/inference/analysis/passes/ir_params_sync_among_devices_pass.h" #include "paddle/fluid/inference/analysis/passes/memory_optimize_pass.h" +#include "paddle/fluid/inference/analysis/passes/passes.h" namespace paddle { namespace inference { diff --git a/paddle/fluid/lite/CMakeLists.txt b/paddle/fluid/lite/CMakeLists.txt index fefc73c75478839c19e3040a4a95378934ad53d8..7b6dd0703d410ad228a11e60dda7ceea9f5a7983 100644 --- a/paddle/fluid/lite/CMakeLists.txt +++ b/paddle/fluid/lite/CMakeLists.txt @@ -10,6 +10,9 @@ message(STATUS "LITE_WITH_ARM:\t${LITE_WITH_ARM}") message(STATUS "LITE_WITH_PROFILE:\t${LITE_WITH_PROFILE}") set(LITE_MODEL_DIR "${THIRD_PARTY_PATH}/install") + +set(LITE_ON_MOBILE ${LITE_WITH_LIGHT_WEIGHT_FRAMEWORK}) + set(LITE_URL "http://paddle-inference-dist.bj.bcebos.com" CACHE STRING "inference download url") function(lite_download_and_uncompress INSTALL_DIR URL FILENAME) @@ -182,3 +185,11 @@ add_subdirectory(model_parser) add_subdirectory(utils) add_subdirectory(api) add_subdirectory(gen_code) + + +if (WITH_TESTING) + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") + if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "mobilenet_v2_relu.tar.gz") + endif() +endif() diff --git a/paddle/fluid/lite/api/CMakeLists.txt b/paddle/fluid/lite/api/CMakeLists.txt index a81d1c9db8d83540d227705d8cd46b2dd5405705..52961d0cc49187fa79e55942a1abaceed9dc2d19 100644 --- a/paddle/fluid/lite/api/CMakeLists.txt +++ b/paddle/fluid/lite/api/CMakeLists.txt @@ -1,20 +1,29 @@ -set(cxx_api_lite_deps scope_lite optimizer_lite target_wrapper_host model_parser_lite) +set(cxx_api_lite_deps + scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite) if(LITE_WITH_CUDA) set(cxx_api_lite_deps ${cxx_api_lite_deps} kernels_cuda) cc_library(cxx_api_lite_cuda SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} target_wrapper_cuda) nv_test(test_cxx_api_lite_cuda SRCS cxx_api_test.cc DEPS cxx_api_lite_cuda) endif() -cc_library(cxx_api_lite SRCS cxx_api.cc DEPS ${cxx_api_lite_deps} ${ops_lite} program_lite) +lite_cc_library(lite_api_test_helper SRCS lite_api_test_helper.cc + DEPS scope_lite optimizer_lite target_wrapper_host model_parser_lite program_lite + ${ops_lite} ${host_kernels} + CUDA_DEPS kernels_cuda + X86_DEPS ${x86_kernels} + ) +lite_cc_library(cxx_api_lite SRCS cxx_api.cc DEPS lite_api_test_helper) set(light_api_deps - scope_lite target_wrapper_host model_parser_lite) + scope_lite target_wrapper_host model_parser_lite program_lite) if(LITE_WITH_CUDA) set(light_api_deps ${light_api_deps} target_wrapper_cuda) endif() -#cc_library(light_api_lite SRCS light_api.cc DEPS ${light_api_deps} ${ops_lite} ${host_kernels}) +lite_cc_library(light_api_lite SRCS light_api.cc + DEPS ${light_api_deps} ${ops_lite} ${host_kernels} + ) message(STATUS "get ops ${ops_lite}") message(STATUS "get Host kernels ${host_kernels}") @@ -24,24 +33,41 @@ include(ExternalProject) set(LITE_DEMO_INSTALL_DIR "${THIRD_PARTY_PATH}/inference_demo" CACHE STRING "A path setting inference demo download directories.") -if((NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) AND WITH_TESTING) +if(WITH_TESTING) + set(eval_model_dir "") + set(test_cxx_api_deps cxx_api_lite mir_passes ${ops_lite} ${host_kernels} ${x86_kernels}) + + if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + set(eval_model_dir ${LITE_MODEL_DIR}/mobilenet_v2_relu) + set(test_cxx_api_deps ${test_cxx_api_deps} ${arm_kernels}) + endif() lite_cc_test(test_cxx_api_lite SRCS cxx_api_test.cc - DEPS cxx_api_lite mir_passes - ${ops_lite} ${host_kernels} ${x86_kernels} + DEPS ${test_cxx_api_deps} ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model - --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) + --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt + --eval_model_dir=eval_model_dir SERIAL) - lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_naive_model.tar.gz") add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) + if(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) + add_dependencies(test_cxx_api_lite extern_lite_download_mobilenet_v2_relu_tar_gz) + endif() endif() -if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK AND WITH_TESTING) - add_dependencies(test_cxx_api_lite extern_lite_download_lite_naive_model_tar_gz) -endif() +# These tests needs CLI arguments, and is not supported in ARM CI. +# TODO(Superjomn) support latter. +if(NOT LITE_ON_MOBILE) + lite_cc_test(test_light_api SRCS light_api_test.cc + DEPS light_api_lite mir_passes + X86_DEPS ${x86_kernels} + ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt + SERIAL) -# if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) -# lite_cc_test(test_light_api SRCS light_api_test.cc DEPS light_api_lite ARGS --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) -# endif() + lite_cc_test(test_apis_lite SRCS apis_test.cc + DEPS cxx_api_lite light_api_lite ${ops_lite} mir_passes + X86_DEPS ${x86_kernels} + ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model + --optimized_model=${LITE_MODEL_DIR}/lite_naive_model_opt SERIAL) +endif() lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc DEPS @@ -51,4 +77,3 @@ lite_cc_binary(cxx_api_lite_bin SRCS cxx_api_bin.cc mir_passes ${ops_lite} ${host_kernels} ARM_DEPS ${arm_kernels}) - diff --git a/paddle/fluid/lite/api/apis_test.cc b/paddle/fluid/lite/api/apis_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..4d99f238dd6b6af6597b2a5f0b41ac7d4580da79 --- /dev/null +++ b/paddle/fluid/lite/api/apis_test.cc @@ -0,0 +1,95 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/* + * We test multiple apis here. + */ +#include +#include +#include +#include "paddle/fluid/lite/api/cxx_api.h" +#include "paddle/fluid/lite/api/light_api.h" +#include "paddle/fluid/lite/core/mir/pass_registry.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" +#include "paddle/fluid/lite/kernels/use_kernels.h" +#include "paddle/fluid/lite/operators/use_ops.h" + +DEFINE_string(model_dir, "", ""); +DEFINE_string(optimized_model, "", ""); + +namespace paddle { +namespace lite { + +void SetConstInput(lite::Tensor* x) { + x->Resize(DDim(std::vector({100, 100}))); + auto* data = x->mutable_data(); + for (int i = 0; i < 100 * 100; i++) { + data[i] = i; + } +} + +bool CompareTensors(const std::string& name, const ExecutorLite& cxx_api, + const LightPredictor& light_api) { + const auto* a = cxx_api.GetTensor(name); + const auto* b = light_api.GetTensor(name); + return TensorCompareWith(*a, *b); +} + +#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK +TEST(CXXApi_LightApi, save_and_load_model) { + lite::ExecutorLite cxx_api; + lite::LightPredictor light_api; + + // CXXAPi + { + std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kX86), PRECISION(kFloat)}}); + cxx_api.Build(FLAGS_model_dir, Place{TARGET(kCUDA), PRECISION(kFloat)}, + valid_places); + + auto* x = cxx_api.GetInput(0); + SetConstInput(x); + + cxx_api.Run(); + + LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; + cxx_api.SaveModel(FLAGS_optimized_model); + } + + // LightApi + { + light_api.Build(FLAGS_optimized_model); + + auto* x = light_api.GetInput(0); + SetConstInput(x); + + light_api.Run(); + } + + const auto* cxx_out = cxx_api.GetOutput(0); + const auto* light_out = light_api.GetOutput(0); + ASSERT_TRUE(TensorCompareWith(*cxx_out, *light_out)); + + std::vector tensors_with_order({ + "a", "fc_0.w_0", "fc_0.tmp_0", "scale_0.tmp_0", + }); + + for (const auto& tensor_name : tensors_with_order) { + ASSERT_TRUE(CompareTensors(tensor_name, cxx_api, light_api)); + } +} +#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/api/cxx_api.h b/paddle/fluid/lite/api/cxx_api.h index 13679413958713dc2fb5e499c50e8dc94c0dbbde..ba2d784b942c04c169a19d4747352d9048fd6ff2 100644 --- a/paddle/fluid/lite/api/cxx_api.h +++ b/paddle/fluid/lite/api/cxx_api.h @@ -78,6 +78,11 @@ class ExecutorLite { return &fetch_list.at(offset); } + const lite::Tensor* GetTensor(const std::string& name) const { + auto* var = program_->exec_scope()->FindVar(name); + return &var->Get(); + } + void Run() { program_->Run(); } const framework::proto::ProgramDesc& program_desc() const { diff --git a/paddle/fluid/lite/api/cxx_api_bin.cc b/paddle/fluid/lite/api/cxx_api_bin.cc index 6e78d2012b2e8857286e9a42e38dbbaacb4f3935..0c0bf3e28570dfc7bece50526aa8a4d72df02ebd 100644 --- a/paddle/fluid/lite/api/cxx_api_bin.cc +++ b/paddle/fluid/lite/api/cxx_api_bin.cc @@ -14,8 +14,9 @@ #include "paddle/fluid/lite/api/cxx_api.h" #include -#include "paddle/fluid/lite/core/mir/passes.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/op_registry.h" + namespace paddle { namespace lite { @@ -66,8 +67,8 @@ void Run(const char* model_dir, int repeat) { } // namespace paddle int main(int argc, char** argv) { - CHECK_EQ(argc, 2) << "usage: ./cmd "; - paddle::lite::Run(argv[1], 1); + CHECK_EQ(argc, 3) << "usage: ./cmd "; + paddle::lite::Run(argv[1], std::stoi(argv[2])); return 0; } diff --git a/paddle/fluid/lite/api/cxx_api_test.cc b/paddle/fluid/lite/api/cxx_api_test.cc index 430bd9b58f80e593e1c85bb6d6113df6962a58e5..1b337c06a981447fd8b8f87905ce5d3d10c56d8c 100644 --- a/paddle/fluid/lite/api/cxx_api_test.cc +++ b/paddle/fluid/lite/api/cxx_api_test.cc @@ -16,59 +16,34 @@ #include #include #include -#include "paddle/fluid/lite/core/mir/passes.h" +#include "paddle/fluid/lite/api/lite_api_test_helper.h" +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/op_registry.h" - -DEFINE_string(model_dir, "", ""); -DEFINE_string(optimized_model, "", ""); +#include "paddle/fluid/lite/kernels/use_kernels.h" +#include "paddle/fluid/lite/operators/use_ops.h" // For training. DEFINE_string(startup_program_path, "", ""); DEFINE_string(main_program_path, "", ""); +// for eval +DEFINE_string(eval_model_dir, "", ""); + namespace paddle { namespace lite { +#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(CXXApi, test) { - lite::ExecutorLite predictor; -#ifndef LITE_WITH_CUDA - std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, - Place{TARGET(kX86), PRECISION(kFloat)}}); -#else - std::vector valid_places({ - Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, - Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, - Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, - Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, - Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, - }); -#endif - - predictor.Build(FLAGS_model_dir, - Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda - valid_places); - - auto* input_tensor = predictor.GetInput(0); - input_tensor->Resize(DDim(std::vector({100, 100}))); - auto* data = input_tensor->mutable_data(); - for (int i = 0; i < 100 * 100; i++) { - data[i] = i; - } - - // LOG(INFO) << "input " << *input_tensor; - - predictor.Run(); - - auto* out = predictor.GetOutput(0); + const lite::Tensor* out = RunHvyModel(); LOG(INFO) << out << " memory size " << out->data_size(); - LOG(INFO) << "out " << out->data()[0]; - LOG(INFO) << "out " << out->data()[1]; + for (int i = 0; i < 10; i++) { + LOG(INFO) << "out " << out->data()[i]; + } LOG(INFO) << "dims " << out->dims(); // LOG(INFO) << "out " << *out; } -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK TEST(CXXApi, save_model) { lite::ExecutorLite predictor; std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, @@ -79,9 +54,7 @@ TEST(CXXApi, save_model) { LOG(INFO) << "Save optimized model to " << FLAGS_optimized_model; predictor.SaveModel(FLAGS_optimized_model); } -#endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK /*TEST(CXXTrainer, train) { Place prefer_place({TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}); std::vector valid_places({prefer_place}); @@ -115,46 +88,37 @@ TEST(CXXApi, save_model) { }*/ #endif // LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -} // namespace lite -} // namespace paddle +#ifdef LITE_WITH_ARM +TEST(CXXApi, eval) { + DeviceInfo::Init(); + lite::ExecutorLite predictor; + std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kARM), PRECISION(kFloat)}}); -USE_LITE_OP(mul); -USE_LITE_OP(fc); -USE_LITE_OP(relu); -USE_LITE_OP(scale); -USE_LITE_OP(feed); -USE_LITE_OP(fetch); -USE_LITE_OP(io_copy); -USE_LITE_OP(elementwise_add) -USE_LITE_OP(elementwise_sub) -USE_LITE_OP(square) -USE_LITE_OP(softmax) -USE_LITE_OP(dropout) -USE_LITE_OP(concat) -USE_LITE_OP(conv2d) -USE_LITE_OP(depthwise_conv2d) -USE_LITE_OP(pool2d) -USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); -USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); - -#ifdef LITE_WITH_X86 -USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def); -#endif + predictor.Build(FLAGS_eval_model_dir, Place{TARGET(kARM), PRECISION(kFloat)}, + valid_places); + + auto* input_tensor = predictor.GetInput(0); + input_tensor->Resize(DDim(std::vector({1, 3, 224, 224}))); + auto* data = input_tensor->mutable_data(); + for (int i = 0; i < input_tensor->dims().production(); i++) { + data[i] = 1; + } -#ifdef LITE_WITH_CUDA -USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); -USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); -USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host); + predictor.Run(); + + auto* out = predictor.GetOutput(0); + std::vector results({0.00097802, 0.00099822, 0.00103093, 0.00100121, + 0.00098268, 0.00104065, 0.00099962, 0.00095181, + 0.00099694, 0.00099406}); + for (int i = 0; i < results.size(); ++i) { + EXPECT_NEAR(out->data()[i], results[i], 1e-5); + } + ASSERT_EQ(out->dims().size(), 2); + ASSERT_EQ(out->dims()[0], 1); + ASSERT_EQ(out->dims()[1], 1000); +} #endif + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/api/light_api.h b/paddle/fluid/lite/api/light_api.h index 474e5da78bd2cd201b17f9a223bd1a177861a532..5085909385c94e2e81b2cfa14167e8ce886060a3 100644 --- a/paddle/fluid/lite/api/light_api.h +++ b/paddle/fluid/lite/api/light_api.h @@ -22,6 +22,7 @@ #include #include #include +#include "paddle/fluid/lite/core/compatible_tensor.h" #include "paddle/fluid/lite/core/context.h" #include "paddle/fluid/lite/core/program.h" #include "paddle/fluid/lite/core/types.h" @@ -62,6 +63,11 @@ class LightPredictor { return &fetch_list.at(offset); } + const lite::Tensor* GetTensor(const std::string& name) const { + auto* var = program_->exec_scope()->FindVar(name); + return &var->Get(); + } + private: void BuildRuntimeProgram(const framework::proto::ProgramDesc& prog) { std::vector insts; @@ -72,9 +78,8 @@ class LightPredictor { // Create the kernels of the target places, and filter out the specific // kernel with the target alias. - for (auto& op : program.ops_) { - lite::pb::OpDesc desc(op->op_info()->desc()); - auto kernel_type = desc.GetAttr(kKernelTypeAttr).get(); + for (auto& op : program.ops()) { + auto kernel_type = op->op_info()->GetAttr(kKernelTypeAttr); std::string op_type, alias; Place place; KernelBase::ParseKernelType(kernel_type, &op_type, &alias, &place); @@ -89,8 +94,8 @@ class LightPredictor { insts.emplace_back(op, std::move(*it)); } program_.reset(new RuntimeProgram(std::move(insts))); - CHECK(program.exec_scope_); - program_->set_exec_scope(program.exec_scope_); + CHECK(program.exec_scope()); + program_->set_exec_scope(program.exec_scope()); } private: diff --git a/paddle/fluid/lite/api/light_api_test.cc b/paddle/fluid/lite/api/light_api_test.cc index b1e6741e09ebd075ef646730f9b5354baefca84f..faf53b8177a4d11fb33017599ecdb9dc650fbc43 100644 --- a/paddle/fluid/lite/api/light_api_test.cc +++ b/paddle/fluid/lite/api/light_api_test.cc @@ -15,6 +15,9 @@ #include "paddle/fluid/lite/api/light_api.h" #include #include +#include "paddle/fluid/lite/core/mir/use_passes.h" +#include "paddle/fluid/lite/kernels/use_kernels.h" +#include "paddle/fluid/lite/operators/use_ops.h" DEFINE_string(optimized_model, "", ""); @@ -33,29 +36,14 @@ TEST(LightAPI, load) { } predictor.Run(); + + const auto* output = predictor.GetOutput(0); + const float* raw_output = output->data(); + + for (int i = 0; i < 10; i++) { + LOG(INFO) << "out " << raw_output[i]; + } } } // namespace lite } // namespace paddle - -USE_LITE_OP(mul); -USE_LITE_OP(fc); -USE_LITE_OP(scale); -USE_LITE_OP(feed); -USE_LITE_OP(fetch); -USE_LITE_OP(io_copy); - -USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); -USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); - -#ifdef LITE_WITH_X86 -USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def); -USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def); -#endif diff --git a/paddle/fluid/lite/api/lite_api_test_helper.cc b/paddle/fluid/lite/api/lite_api_test_helper.cc new file mode 100644 index 0000000000000000000000000000000000000000..490a64bb512bdf31359b6204399b1e1767bb4f17 --- /dev/null +++ b/paddle/fluid/lite/api/lite_api_test_helper.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/api/lite_api_test_helper.h" + +DEFINE_string(model_dir, "", ""); +DEFINE_string(optimized_model, "", ""); + +namespace paddle { +namespace lite { + +const lite::Tensor* RunHvyModel() { + lite::ExecutorLite predictor; +#ifndef LITE_WITH_CUDA + std::vector valid_places({Place{TARGET(kHost), PRECISION(kFloat)}, + Place{TARGET(kX86), PRECISION(kFloat)}}); +#else + std::vector valid_places({ + Place{TARGET(kHost), PRECISION(kFloat), DATALAYOUT(kNCHW)}, + Place{TARGET(kCUDA), PRECISION(kFloat), DATALAYOUT(kNCHW)}, + Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kNCHW)}, + Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kNCHW)}, + Place{TARGET(kCUDA), PRECISION(kAny), DATALAYOUT(kAny)}, + Place{TARGET(kHost), PRECISION(kAny), DATALAYOUT(kAny)}, + }); +#endif + + predictor.Build(FLAGS_model_dir, + Place{TARGET(kX86), PRECISION(kFloat)}, // origin cuda + valid_places); + + auto* input_tensor = predictor.GetInput(0); + input_tensor->Resize(DDim(std::vector({100, 100}))); + auto* data = input_tensor->mutable_data(); + for (int i = 0; i < 100 * 100; i++) { + data[i] = i; + } + + // LOG(INFO) << "input " << *input_tensor; + + predictor.Run(); + + const auto* out = predictor.GetOutput(0); + return out; +} + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/api/lite_api_test_helper.h b/paddle/fluid/lite/api/lite_api_test_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..840de932f0146b7241ba030b02742e34e2c1b9b8 --- /dev/null +++ b/paddle/fluid/lite/api/lite_api_test_helper.h @@ -0,0 +1,31 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include "paddle/fluid/lite/api/cxx_api.h" +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/op_registry.h" + +DECLARE_string(model_dir); +DECLARE_string(optimized_model); + +namespace paddle { +namespace lite { + +const lite::Tensor* RunHvyModel(); + +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/arm/math/CMakeLists.txt b/paddle/fluid/lite/arm/math/CMakeLists.txt index a20b5fa842f37ac7b462b81f77dc7b6340db4bd3..883e7bc4609b09dcea485eb85607fe7e8f2136cf 100644 --- a/paddle/fluid/lite/arm/math/CMakeLists.txt +++ b/paddle/fluid/lite/arm/math/CMakeLists.txt @@ -14,6 +14,7 @@ cc_library(math_arm SRCS scale.cc pooling.cc elementwise.cc + concat.cc sgemv.cc type_trans.cpp conv_impl.cc diff --git a/paddle/fluid/lite/arm/math/concat.cc b/paddle/fluid/lite/arm/math/concat.cc new file mode 100644 index 0000000000000000000000000000000000000000..fd375ab0e7f7700b31013fa55d73ddb732fd2e97 --- /dev/null +++ b/paddle/fluid/lite/arm/math/concat.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/arm/math/concat.h" +#include +#include +#include +#include "paddle/fluid/lite/arm/math/funcs.h" + +namespace paddle { +namespace lite { +namespace arm { +namespace math { + +void concat_func(const std::vector &input, const int axis, + lite::Tensor *output) { + size_t num = input.size(); + int rows = 1; + auto dim_0 = input[0]->dims(); + for (int i = 0; i < axis; ++i) { + rows *= dim_0[i]; + } + int out_rows = rows, out_cols = 0; + + std::vector input_cols(input.size()); + for (int i = 0; i < num; ++i) { + int t_cols = input[i]->numel() / rows; + out_cols += t_cols; + input_cols[i] = t_cols; + } + + // computation + for (int k = 0; k < out_rows; ++k) { + float *dst_ptr = output->mutable_data() + k * out_cols; + int col_idx = 0; + for (int j = 0; j < num; ++j) { + int col_len = input_cols[j]; + const float *src_prt = input[j]->data() + k * col_len; + std::memcpy(dst_ptr + col_idx, src_prt, sizeof(float) * col_len); + col_idx += col_len; + } + } +} + +} // namespace math +} // namespace arm +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/arm/math/concat.h b/paddle/fluid/lite/arm/math/concat.h new file mode 100644 index 0000000000000000000000000000000000000000..bc67523a494559011e79b9d8c687b8521b5b669b --- /dev/null +++ b/paddle/fluid/lite/arm/math/concat.h @@ -0,0 +1,34 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include "paddle/fluid/lite/operators/op_params.h" +#include "paddle/fluid/lite/utils/cp_logging.h" + +namespace paddle { +namespace lite { +namespace arm { +namespace math { + +void concat_func(const std::vector &input, const int axis, + lite::Tensor *output); + +} // namespace math +} // namespace arm +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/arm/math/elementwise.cc b/paddle/fluid/lite/arm/math/elementwise.cc index 2a74e7ee4ec4be51b420b1fa2d2a1be7c3f148fb..7c1ea8d3a70451dd790a9eea516b74f58ec91d5e 100644 --- a/paddle/fluid/lite/arm/math/elementwise.cc +++ b/paddle/fluid/lite/arm/math/elementwise.cc @@ -65,9 +65,61 @@ void elementwise_add(const float* dinx, const float* diny, float* dout, } template <> -void elementwise_add_axis(const float* dinx, const float* diny, - float* dout, int batch, int channels, - int num) { +void elementwise_add_relu(const float* dinx, const float* diny, + float* dout, int num) { + int cnt = num >> 4; + int remain = num % 16; + float32x4_t vzero = vdupq_n_f32(0.f); +#pragma omp parallel for + for (int i = 0; i < cnt; i++) { + const float* dinx_ptr = dinx + (i << 4); + const float* diny_ptr = diny + (i << 4); + float* dout_ptr = dout + (i << 4); + + float32x4_t dinx0 = vld1q_f32(dinx_ptr); + float32x4_t dinx1 = vld1q_f32(dinx_ptr + 4); + float32x4_t dinx2 = vld1q_f32(dinx_ptr + 8); + float32x4_t dinx3 = vld1q_f32(dinx_ptr + 12); + + float32x4_t diny0 = vld1q_f32(diny_ptr); + float32x4_t diny1 = vld1q_f32(diny_ptr + 4); + float32x4_t diny2 = vld1q_f32(diny_ptr + 8); + float32x4_t diny3 = vld1q_f32(diny_ptr + 12); + + dinx0 = vaddq_f32(dinx0, diny0); + dinx1 = vaddq_f32(dinx1, diny1); + dinx2 = vaddq_f32(dinx2, diny2); + dinx3 = vaddq_f32(dinx3, diny3); + + // relu + dinx0 = vmaxq_f32(dinx0, vzero); + dinx1 = vmaxq_f32(dinx1, vzero); + dinx2 = vmaxq_f32(dinx2, vzero); + dinx3 = vmaxq_f32(dinx3, vzero); + + vst1q_f32(dout_ptr, dinx0); + vst1q_f32(dout_ptr + 4, dinx1); + vst1q_f32(dout_ptr + 8, dinx2); + vst1q_f32(dout_ptr + 12, dinx3); + } + if (remain > 0) { + const float* dinx_ptr = dinx + (cnt << 4); + const float* diny_ptr = diny + (cnt << 4); + float* dout_ptr = dout + (cnt << 4); + for (int i = 0; i < remain; i++) { + float tmp = *dinx_ptr + *diny_ptr; + *dout_ptr = tmp > 0.f ? tmp : 0.f; + dout_ptr++; + dinx_ptr++; + diny_ptr++; + } + } +} + +template <> +void elementwise_add_broadcast(const float* dinx, const float* diny, + float* dout, int batch, int channels, + int num) { #pragma omp parallel for collapse(2) for (int i = 0; i < batch; ++i) { for (int j = 0; j < channels; ++j) { @@ -127,6 +179,82 @@ void elementwise_add_axis(const float* dinx, const float* diny, } } +template <> +void elementwise_add_relu_broadcast(const float* dinx, const float* diny, + float* dout, int batch, int channels, + int num) { + float32x4_t vzero = vdupq_n_f32(0.f); +#pragma omp parallel for collapse(2) + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const float* din_ptr = dinx + offset; + const float diny_data = diny[j]; + float* dout_ptr = dout + offset; + + int cnt = num >> 4; + int remain = num % 16; + float32x4_t rb = vdupq_n_f32(diny_data); + for (int k = 0; k < cnt; ++k) { + float32x4_t din0 = vld1q_f32(din_ptr); + float32x4_t din1 = vld1q_f32(din_ptr + 4); + float32x4_t din2 = vld1q_f32(din_ptr + 8); + float32x4_t din3 = vld1q_f32(din_ptr + 12); + + din0 = vaddq_f32(din0, rb); + din1 = vaddq_f32(din1, rb); + din2 = vaddq_f32(din2, rb); + din3 = vaddq_f32(din3, rb); + + // relu + din0 = vmaxq_f32(din0, vzero); + din1 = vmaxq_f32(din1, vzero); + din2 = vmaxq_f32(din2, vzero); + din3 = vmaxq_f32(din3, vzero); + + vst1q_f32(dout_ptr, din0); + vst1q_f32(dout_ptr + 4, din1); + vst1q_f32(dout_ptr + 8, din2); + vst1q_f32(dout_ptr + 12, din3); + din_ptr += 16; + dout_ptr += 16; + } + if (remain >= 8) { + float32x4_t din0 = vld1q_f32(din_ptr); + float32x4_t din1 = vld1q_f32(din_ptr + 4); + din0 = vaddq_f32(din0, rb); + din1 = vaddq_f32(din1, rb); + // relu + din0 = vmaxq_f32(din0, vzero); + din1 = vmaxq_f32(din1, vzero); + vst1q_f32(dout_ptr, din0); + vst1q_f32(dout_ptr + 4, din1); + din_ptr += 8; + dout_ptr += 8; + remain -= 8; + } + if (remain >= 4) { + float32x4_t din0 = vld1q_f32(din_ptr); + din0 = vaddq_f32(din0, rb); + // relu + din0 = vmaxq_f32(din0, vzero); + vst1q_f32(dout_ptr, din0); + din_ptr += 4; + dout_ptr += 4; + remain -= 4; + } + if (remain > 0) { + for (int p = 0; p < remain; p++) { + float tmp = *din_ptr + diny_data; + *dout_ptr = tmp > 0.f ? tmp : 0.f; + dout_ptr++; + din_ptr++; + } + } + } + } +} + } // namespace math } // namespace arm } // namespace lite diff --git a/paddle/fluid/lite/arm/math/elementwise.h b/paddle/fluid/lite/arm/math/elementwise.h index ca8f87895fcea80f9a1a178a0bf43b34c44182bb..9300d73753d695819af6ec7066fd95020457bd29 100644 --- a/paddle/fluid/lite/arm/math/elementwise.h +++ b/paddle/fluid/lite/arm/math/elementwise.h @@ -23,8 +23,15 @@ template void elementwise_add(const T* dinx, const T* diny, T* dout, int num); template -void elementwise_add_axis(const T* dinx, const T* diny, T* dout, int batch, - int channels, int num); +void elementwise_add_relu(const T* dinx, const T* diny, T* dout, int num); + +template +void elementwise_add_broadcast(const T* dinx, const T* diny, T* dout, int batch, + int channels, int num); + +template +void elementwise_add_relu_broadcast(const T* dinx, const T* diny, T* dout, + int batch, int channels, int num); } // namespace math } // namespace arm diff --git a/paddle/fluid/lite/core/CMakeLists.txt b/paddle/fluid/lite/core/CMakeLists.txt index 89101aa03272d98ac08d7830830de6acb9adf271..665d7555e3757188f8a7b76496fa85cb20192670 100644 --- a/paddle/fluid/lite/core/CMakeLists.txt +++ b/paddle/fluid/lite/core/CMakeLists.txt @@ -1,5 +1,5 @@ if (WITH_TESTING) - cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest) + cc_library(lite_gtest_main SRCS lite_gtest_main.cc DEPS gtest gflags) endif() lite_cc_library(target_wrapper_lite SRCS target_wrapper.cc DEPS target_wrapper_host diff --git a/paddle/fluid/lite/core/context.cc b/paddle/fluid/lite/core/context.cc index cd7006f4724ccaa7d8733caff2ed4ef8c5d01f2f..89ec7278c1aaf8e372c45f24a32525df4f223418 100644 --- a/paddle/fluid/lite/core/context.cc +++ b/paddle/fluid/lite/core/context.cc @@ -28,6 +28,10 @@ #endif // TARGET_OS_IPHONE #endif // __APPLE__ +#ifdef ARM_WITH_OMP +#include +#endif + namespace paddle { namespace lite { @@ -84,7 +88,7 @@ ARMContext& Context::operator=(const ARMContext& ctx) { } void Context::BindDev() { -#ifdef USE_OPENMP +#ifdef ARM_WITH_OMP int num_threads = active_ids_.size(); omp_set_num_threads(num_threads); #ifdef LITE_WITH_LINUX @@ -98,12 +102,12 @@ void Context::BindDev() { } for (int i = 0; i < num_threads; i++) { if (ssarets[i] != 0) { - LOGE("set cpu affinity failed, cpuID: %d\n", active_ids_[i]); + LOG(ERROR) << "set cpu affinity failed, cpuID: " << active_ids_[i]; return; } } #endif // LITE_WITH_LINUX -#else // USE_OPENMP +#else // ARM_WITH_OMP #ifdef LITE_WITH_LINUX std::vector cpuid1; cpuid1.push_back(active_ids_[0]); @@ -113,7 +117,7 @@ void Context::BindDev() { return; } #endif // LITE_WITH_LINUX -#endif // USE_OPENMP +#endif // ARM_WITH_OMP } void Context::SetRunMode(PowerMode mode, int threads) { @@ -123,7 +127,7 @@ void Context::SetRunMode(PowerMode mode, int threads) { if (threads > big_core_size + small_core_size) { threads = big_core_size + small_core_size; } -#ifdef USE_OPENMP +#ifdef ARM_WITH_OMP count_++; int shift_num = (count_ / 10) % big_core_size; switch (mode) { @@ -146,8 +150,8 @@ void Context::SetRunMode(PowerMode mode, int threads) { if (big_core_size > 0) { mode_ = LITE_POWER_HIGH; if (threads > big_core_size) { - LOGE("threads: %d, exceed the big cores size: %d\n", threads, - big_core_size); + LOG(ERROR) << "threads: " << threads + << ", exceed the big cores size: " << big_core_size; active_ids_ = dev.big_core_ids_; } else { for (int i = 0; i < threads; ++i) { @@ -156,7 +160,7 @@ void Context::SetRunMode(PowerMode mode, int threads) { } } else { mode_ = LITE_POWER_LOW; - LOGE("HIGH POWER MODE is not support, switch to little cores\n"); + LOG(ERROR) << "HIGH POWER MODE is not support, switch to little cores"; if (threads > small_core_size) { active_ids_ = dev.little_core_ids_; } else { @@ -174,8 +178,8 @@ void Context::SetRunMode(PowerMode mode, int threads) { if (small_core_size > 0) { mode_ = LITE_POWER_LOW; if (threads > small_core_size) { - LOGW("threads: %d, exceed the little cores size: %d\n", threads, - small_core_size); + LOG(WARNING) << "threads: " << threads + << ", exceed the little cores size: " << small_core_size; active_ids_ = dev.little_core_ids_; } else { for (int i = 0; i < threads; ++i) { @@ -184,7 +188,7 @@ void Context::SetRunMode(PowerMode mode, int threads) { } } else { mode_ = LITE_POWER_HIGH; - LOGW("LOW POWER MODE is not support, switch to big cores\n"); + LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores"; if (threads > big_core_size) { active_ids_ = dev.big_core_ids_; } else { @@ -211,8 +215,8 @@ void Context::SetRunMode(PowerMode mode, int threads) { if (big_core_size > 0) { mode_ = LITE_POWER_RAND_HIGH; if (threads > big_core_size) { - LOGW("threads: %d, exceed the big cores size: %d\n", threads, - big_core_size); + LOG(WARNING) << "threads: " << threads + << ", exceed the big cores size: " << big_core_size; active_ids_ = dev.big_core_ids_; } else { for (int i = 0; i < threads; ++i) { @@ -222,7 +226,8 @@ void Context::SetRunMode(PowerMode mode, int threads) { } } else { mode_ = LITE_POWER_LOW; - LOGW("HIGH POWER MODE is not support, switch to little cores\n"); + LOG(WARNING) + << "HIGH POWER MODE is not support, switch to little cores"; if (threads > small_core_size) { active_ids_ = dev.little_core_ids_; } else { @@ -240,8 +245,8 @@ void Context::SetRunMode(PowerMode mode, int threads) { if (small_core_size > 0) { mode_ = LITE_POWER_RAND_LOW; if (threads > small_core_size) { - LOGW("threads: %d, exceed the little cores size: %d\n", threads, - small_core_size); + LOG(WARNING) << "threads: " << threads + << ", exceed the little cores size: " << small_core_size; active_ids_ = dev.little_core_ids_; } else { for (int i = 0; i < threads; ++i) { @@ -251,7 +256,7 @@ void Context::SetRunMode(PowerMode mode, int threads) { } } else { mode_ = LITE_POWER_HIGH; - LOGW("LOW POWER MODE is not support, switch to big cores\n"); + LOG(WARNING) << "LOW POWER MODE is not support, switch to big cores"; if (threads > big_core_size) { active_ids_ = dev.big_core_ids_; } else { diff --git a/paddle/fluid/lite/core/hvy_tensor.h b/paddle/fluid/lite/core/hvy_tensor.h index 748e80c2559718d278a08e3c568532e177c835eb..6dbef9bc86a5e207ea2be1baea2dc96bbc6c0309 100644 --- a/paddle/fluid/lite/core/hvy_tensor.h +++ b/paddle/fluid/lite/core/hvy_tensor.h @@ -86,6 +86,7 @@ class TensorHvy : public TensorBase { template T* mutable_data() { + memory_size_ = framework::product(data_.dims()) * sizeof(T); return data_.mutable_data(data_.dims(), platform::CPUPlace()); } template @@ -128,8 +129,11 @@ class TensorHvy : public TensorBase { const framework::LoDTensor& raw_tensor() const { return data_; } framework::LoDTensor& raw_tensor() { return data_; } + size_t memory_size() const { return memory_size_; } + private: framework::LoDTensor data_; + size_t memory_size_{}; }; } // namespace lite diff --git a/paddle/fluid/lite/core/lite_tensor.h b/paddle/fluid/lite/core/lite_tensor.h index 6cccdc0dd03527434ac1ac49f3e3fb8a78b26c34..9860265bbb342e91cfd8031eef6eb1062c98920f 100644 --- a/paddle/fluid/lite/core/lite_tensor.h +++ b/paddle/fluid/lite/core/lite_tensor.h @@ -90,6 +90,8 @@ class TensorLite : public TensorBase { void *mutable_data(size_t memory_size); void *mutable_data(TargetType target, size_t memory_size); + const void *raw_data() const { return buffer_->data(); } + size_t memory_size() const { return memory_size_; } bool IsInitialized() const { return buffer_->data(); } diff --git a/paddle/fluid/lite/core/mir/CMakeLists.txt b/paddle/fluid/lite/core/mir/CMakeLists.txt index 412c23324cf2a2ca5b04cf21fecd8a380af0d393..6a1ffaf12bdc94ff4ae32a8fb088c41237399319 100644 --- a/paddle/fluid/lite/core/mir/CMakeLists.txt +++ b/paddle/fluid/lite/core/mir/CMakeLists.txt @@ -7,7 +7,8 @@ cc_library(mir_pass_registry SRCS pass_registry.cc DEPS mir_pass_manager) add_subdirectory(fusion) cc_library(mir_passes SRCS fc_fuse_pass.cc - conv_elementwise_add_relu_fuse_pass.cc + conv_elementwise_add_activation_fuse_pass.cc + elementwise_add_activation_fuse_pass.cc conv_bn_fuse_pass.cc quant_dequant_fuse_pass.cc static_kernel_pick_pass.cc @@ -83,7 +84,11 @@ lite_download_and_uncompress(${LITE_MODEL_DIR} ${LITE_URL} "lite_fc_model.tar.gz add_dependencies(test_lite_fc_fuse extern_lite_download_lite_fc_model_tar_gz) -lite_cc_test(test_lite_conv_elementwise_add_relu_fuse - SRCS conv_elementwise_add_relu_fuse_pass_test.cc +lite_cc_test(test_lite_conv_elementwise_add_activation_fuse + SRCS conv_elementwise_add_activation_fuse_pass_test.cc + DEPS cxx_api_lite mir_passes + ${ops_lite} ${host_kernels} ${x86_kernels}) +lite_cc_test(test_lite_elementwise_add_activation_fuse + SRCS elementwise_add_activation_fuse_pass_test.cc DEPS cxx_api_lite mir_passes ${ops_lite} ${host_kernels} ${x86_kernels}) diff --git a/paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.cc b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.cc similarity index 66% rename from paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.cc rename to paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.cc index 3110c7aa6d408d2520d982ec76a77baea7babdbc..27f6413c47b514d3203c5879d7ee7b9697d8cf5a 100644 --- a/paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.cc +++ b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.cc @@ -12,22 +12,23 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.h" +#include "paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h" #include #include -#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.h" +#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h" #include "paddle/fluid/lite/core/mir/pass_registry.h" namespace paddle { namespace lite { namespace mir { -void ConvElementwiseAddReLUFusePass::Apply( +void ConvElementwiseAddActivationFusePass::Apply( const std::unique_ptr& graph) { - fusion::ConvElementwiseAddReLUFuser fuser("conv2d"); + fusion::ConvElementwiseAddActivationFuser fuser("conv2d", "relu"); fuser(graph.get()); - fusion::ConvElementwiseAddReLUFuser depthwise_fuser("depthwise_conv2d"); + fusion::ConvElementwiseAddActivationFuser depthwise_fuser("depthwise_conv2d", + "relu"); depthwise_fuser(graph.get()); } @@ -35,5 +36,5 @@ void ConvElementwiseAddReLUFusePass::Apply( } // namespace lite } // namespace paddle -REGISTER_MIR_PASS(lite_conv_elementwise_add_act_fuse_pass, - paddle::lite::mir::ConvElementwiseAddReLUFusePass); +REGISTER_MIR_PASS(lite_conv_elementwise_add_activation_fuse_pass, + paddle::lite::mir::ConvElementwiseAddActivationFusePass); diff --git a/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h new file mode 100644 index 0000000000000000000000000000000000000000..a5a619f4d0d06da52661282e68f6a3c34c987bc9 --- /dev/null +++ b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h @@ -0,0 +1,32 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/lite/core/mir/pass.h" + +namespace paddle { +namespace lite { +namespace mir { + +class ConvElementwiseAddActivationFusePass : public ProgramPass { + public: + void Apply(const std::unique_ptr& graph) override; +}; + +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass_test.cc b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass_test.cc similarity index 94% rename from paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass_test.cc rename to paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass_test.cc index 30991313ad3ed9ef39c3fb8183f4cfc43c9c49b9..a67e577505f3ee1e099a5a3be3801116210c197d 100644 --- a/paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass_test.cc +++ b/paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass_test.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.h" +#include "paddle/fluid/lite/core/mir/conv_elementwise_add_activation_fuse_pass.h" #include #include #include @@ -20,7 +20,7 @@ #include "paddle/fluid/lite/api/cxx_api.h" #include "paddle/fluid/lite/core/compatible_tensor.h" #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h" -#include "paddle/fluid/lite/core/mir/passes.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/program.h" @@ -135,11 +135,11 @@ TEST(conv_elementwise_add_relu_fuse_pass, fuse_test_op) { auto graph = BuildGraph(&program_desc, scope, places); Visualize(graph.get()); const int num_nodes = graph->nodes().size(); - auto* fuser = new ConvElementwiseAddReLUFusePass; + auto* fuser = new ConvElementwiseAddActivationFusePass; fuser->Apply(graph); Visualize(graph.get()); - ASSERT_EQ(graph->nodes().size(), num_nodes - 5UL * 2 /*nodes removed */ + - 1UL * 2 /* fused fc node*/); + ASSERT_EQ(graph->nodes().size(), + num_nodes - 5UL * 2 /*nodes removed */ + 1UL * 2 /* fused nodes*/); } } // namespace fusion diff --git a/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.cc b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.cc new file mode 100644 index 0000000000000000000000000000000000000000..9ce455dcdafb0d2e8f040bc3244495b2968eebd0 --- /dev/null +++ b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h" +#include +#include +#include "paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h" +#include "paddle/fluid/lite/core/mir/pass_registry.h" + +namespace paddle { +namespace lite { +namespace mir { + +void ElementwiseAddActivationFusePass::Apply( + const std::unique_ptr& graph) { + fusion::ElementwiseAddActivationFuser fuser("relu"); + fuser(graph.get()); +} + +} // namespace mir +} // namespace lite +} // namespace paddle + +REGISTER_MIR_PASS(lite_elementwise_add_activation_fuse_pass, + paddle::lite::mir::ElementwiseAddActivationFusePass); diff --git a/paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.h b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h similarity index 93% rename from paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.h rename to paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h index 4276f1ffc8c258b0b4266abd950fa1ccf541c4a7..213c3f68f6008bfc9c522b3896a678a137e92201 100644 --- a/paddle/fluid/lite/core/mir/conv_elementwise_add_relu_fuse_pass.h +++ b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h @@ -22,7 +22,7 @@ namespace paddle { namespace lite { namespace mir { -class ConvElementwiseAddReLUFusePass : public ProgramPass { +class ElementwiseAddActivationFusePass : public ProgramPass { public: void Apply(const std::unique_ptr& graph) override; }; diff --git a/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..7f64eead9ea82457f504be9955f42ededa3650f4 --- /dev/null +++ b/paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass_test.cc @@ -0,0 +1,117 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/core/mir/elementwise_add_activation_fuse_pass.h" +#include +#include +#include +#include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/lite/api/cxx_api.h" +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/mir/graph_visualize_pass.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/program.h" + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +std::unique_ptr BuildGraph(framework::ProgramDesc* program_desc, + const std::shared_ptr& scope, + const std::vector& valid_places) { + auto* main_block = program_desc->MutableBlock(0); + + auto* add_1 = main_block->AppendOp(); + auto* add_2 = main_block->AppendOp(); + auto* relu_1 = main_block->AppendOp(); + auto* relu_2 = main_block->AppendOp(); + + main_block->Var("x_1"); + main_block->Var("y_1"); + main_block->Var("add_out_1"); + main_block->Var("relu_out_1"); + main_block->Var("y_2"); + main_block->Var("add_out_2"); + main_block->Var("out"); + + scope->Var("x_1")->GetMutable(); + scope->Var("y_1")->GetMutable(); + scope->Var("add_out_1")->GetMutable(); + scope->Var("relu_out_1")->GetMutable(); + scope->Var("y_2")->GetMutable(); + scope->Var("add_out_2")->GetMutable(); + scope->Var("out")->GetMutable(); + + add_1->SetType("elementwise_add"); + add_1->SetInput("X", {"x_1"}); + add_1->SetInput("Y", {"y_1"}); + add_1->SetOutput("Out", {"add_out_1"}); + add_1->SetAttr("axis", 1); + + relu_1->SetType("relu"); + relu_1->SetInput("X", {"add_out_1"}); + relu_1->SetOutput("Out", {"relu_out_1"}); + + add_2->SetType("elementwise_add"); + add_2->SetInput("X", {"relu_out_1"}); + add_2->SetInput("Y", {"y_2"}); + add_2->SetOutput("Out", {"add_out_2"}); + add_2->SetAttr("axis", 1); + + relu_2->SetType("relu"); + relu_2->SetInput("X", {"add_out_2"}); + relu_2->SetOutput("Out", {"out"}); + + program_desc->Flush(); + + lite::Program program(*program_desc->Proto(), scope, valid_places); + auto graph = std::unique_ptr(new SSAGraph()); + graph->Build(program, valid_places); + + return graph; +} + +TEST(elementwise_add_activation_fuse_pass, graph_test) { + framework::ProgramDesc program_desc; + std::vector places{{TARGET(kHost), PRECISION(kFloat)}}; + auto scope = std::make_shared(); + auto graph = BuildGraph(&program_desc, scope, places); + ASSERT_EQ(graph->nodes().size(), + 7UL /*vars*/ + 4UL /*ops*/ + 1UL /* SSAGraph tmp node*/); +} + +TEST(elementwise_add_activation_fuse_pass, fuse_test_op) { + framework::ProgramDesc program_desc; + std::vector places{{TARGET(kHost), PRECISION(kFloat)}}; + auto scope = std::make_shared(); + auto graph = BuildGraph(&program_desc, scope, places); + Visualize(graph.get()); + const int num_nodes = graph->nodes().size(); + auto* fuser = new ElementwiseAddActivationFusePass; + fuser->Apply(graph); + Visualize(graph.get()); + ASSERT_EQ(graph->nodes().size(), + num_nodes - 3UL * 2 /*nodes removed */ + 1UL * 2 /* fused nodes*/); +} + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle + +USE_LITE_OP(elementwise_add); +USE_LITE_OP(fusion_elementwise_add_activation); +USE_LITE_OP(relu); diff --git a/paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc b/paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc index 35efedb57971d19551ee144e47f87bcfd4d73ce4..e2f7dd1a87d2ef576d175857ae880c5828b61a79 100644 --- a/paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc +++ b/paddle/fluid/lite/core/mir/fc_fuse_pass_test.cc @@ -17,7 +17,7 @@ #include #include #include "paddle/fluid/lite/api/cxx_api.h" -#include "paddle/fluid/lite/core/mir/passes.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/op_registry.h" DEFINE_string(model_dir, "", ""); diff --git a/paddle/fluid/lite/core/mir/fusion/CMakeLists.txt b/paddle/fluid/lite/core/mir/fusion/CMakeLists.txt index 2bf9296eb0ea37d999bdcb7fd55fd1b93439f668..321a2ab48d3248cf36706631af74febc40e54686 100644 --- a/paddle/fluid/lite/core/mir/fusion/CMakeLists.txt +++ b/paddle/fluid/lite/core/mir/fusion/CMakeLists.txt @@ -1,12 +1,15 @@ cc_library(fuse_fc SRCS fc_fuser.cc DEPS pattern_matcher_high_api) -cc_library(fuse_conv_elementwise_add_relu - SRCS conv_elementwise_add_relu_fuser.cc +cc_library(fuse_conv_elementwise_add_activation + SRCS conv_elementwise_add_activation_fuser.cc DEPS pattern_matcher_high_api) cc_library(fuse_conv_bn SRCS conv_bn_fuser.cc DEPS pattern_matcher_high_api) +cc_library(fuse_elementwise_add_activation + SRCS elementwise_add_activation_fuser.cc + DEPS pattern_matcher_high_api) cc_library(fuse_quant_dequant SRCS quant_dequant_op_fuser.cc @@ -14,9 +17,10 @@ cc_library(fuse_quant_dequant set(mir_fusers fuse_fc - fuse_conv_elementwise_add_relu + fuse_conv_elementwise_add_activation fuse_conv_bn fuse_quant_dequant + fuse_elementwise_add_activation CACHE INTERNAL "fusers") if (LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) diff --git a/paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc b/paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc index b9d858a990d59c9006e0cfbab9b0afda95350528..d29f078513e2113db12c67be4d694a6dc8de99f9 100644 --- a/paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc +++ b/paddle/fluid/lite/core/mir/fusion/conv_bn_fuser.cc @@ -84,7 +84,7 @@ void ConvBNFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) { ->GetMutable(); size_t bias_size = bn_scale_t->data_size(); auto bn_scale_d = bn_scale_t->mutable_data(); - CHECK(bias_size == conv_weight_dims[0]) + CHECK_EQ(bias_size, static_cast(conv_weight_dims[0])) << "The BN bias's size should be equal to the size of the first " << "dim size of the conv weights"; diff --git a/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.cc b/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.cc similarity index 86% rename from paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.cc rename to paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.cc index 889586a3bc6bc980a19082046f189b25422b1ed2..4cf1dc8948dde31a54476783222396470c3ab9c6 100644 --- a/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.cc +++ b/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.h" +#include "paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h" #include #include @@ -21,7 +21,7 @@ namespace lite { namespace mir { namespace fusion { -void ConvElementwiseAddReLUFuser::BuildPattern() { +void ConvElementwiseAddActivationFuser::BuildPattern() { // create input nodes. auto* input = VarNode("input")->assert_is_op_input(conv_type_, "Input")->AsInput(); @@ -36,7 +36,8 @@ void ConvElementwiseAddReLUFuser::BuildPattern() { auto* add = OpNode("add", "elementwise_add") ->assert_is_op("elementwise_add") ->AsIntermediate(); - auto* relu = OpNode("relu", "relu")->assert_is_op("relu")->AsIntermediate(); + auto* act = + OpNode("act", act_type_)->assert_is_op(act_type_)->AsIntermediate(); // create intermediate nodes auto* conv2d_out = VarNode("conv2d_out") @@ -45,22 +46,23 @@ void ConvElementwiseAddReLUFuser::BuildPattern() { ->AsIntermediate(); auto* add_out = VarNode("add_out") ->assert_is_op_output("elementwise_add", "Out") - ->assert_is_op_input("relu", "X") + ->assert_is_op_input(act_type_, "X") ->AsIntermediate(); // create output node - auto* out = VarNode("output")->assert_is_op_output("relu", "Out")->AsOutput(); + auto* out = + VarNode("output")->assert_is_op_output(act_type_, "Out")->AsOutput(); // create topology. std::vector conv2d_inputs{filter, input}; std::vector add_inputs{conv2d_out, bias}; conv2d_inputs >> *conv2d >> *conv2d_out; add_inputs >> *add >> *add_out; - *add_out >> *relu >> *out; + *add_out >> *act >> *out; } -void ConvElementwiseAddReLUFuser::InsertNewNode(SSAGraph* graph, - const key2nodes_t& matched) { +void ConvElementwiseAddActivationFuser::InsertNewNode( + SSAGraph* graph, const key2nodes_t& matched) { auto op_desc = GenOpDesc(matched); auto conv_op = LiteOpRegistry::Global().Create(conv_type_); auto conv_old = matched.at("conv2d")->stmt()->op; @@ -76,7 +78,8 @@ void ConvElementwiseAddReLUFuser::InsertNewNode(SSAGraph* graph, IR_NODE_LINK_TO(new_op_node, matched.at("output")); } -cpp::OpDesc ConvElementwiseAddReLUFuser::GenOpDesc(const key2nodes_t& matched) { +cpp::OpDesc ConvElementwiseAddActivationFuser::GenOpDesc( + const key2nodes_t& matched) { auto* desc = matched.at("conv2d")->stmt()->op_info(); cpp::OpDesc op_desc = *desc; @@ -97,6 +100,7 @@ cpp::OpDesc ConvElementwiseAddReLUFuser::GenOpDesc(const key2nodes_t& matched) { op_desc.SetAttr("paddings", desc->GetAttr>("paddings")); op_desc.SetAttr("groups", desc->GetAttr("groups")); op_desc.SetAttr("dilations", desc->GetAttr>("dilations")); + // TODO(sangoly): support other activation types op_desc.SetAttr("fuse_relu", true); return op_desc; } diff --git a/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h b/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h new file mode 100644 index 0000000000000000000000000000000000000000..14a33613fdffce8c2d9d4044a11b5de4b5652da3 --- /dev/null +++ b/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_activation_fuser.h @@ -0,0 +1,47 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "paddle/fluid/lite/core/mir/pattern_matcher_high_api.h" + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +class ConvElementwiseAddActivationFuser : public FuseBase { + public: + explicit ConvElementwiseAddActivationFuser(const std::string& conv_type, + const std::string& act_type) { + CHECK(act_type == "relu") << "Only relu activation be supported now"; + conv_type_ = conv_type; + act_type_ = act_type; + } + + void BuildPattern() override; + void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override; + + private: + cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override; + std::string conv_type_; + std::string act_type_; +}; + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.cc b/paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.cc new file mode 100644 index 0000000000000000000000000000000000000000..83b916eea3e47947083d4a41406d2ebd6918dfd2 --- /dev/null +++ b/paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h" +#include +#include + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +void ElementwiseAddActivationFuser::BuildPattern() { + // create input nodes. + auto* x = VarNode("x")->assert_is_op_input("elementwise_add", "X")->AsInput(); + auto* y = VarNode("y")->assert_is_op_input("elementwise_add", "Y")->AsInput(); + + // create op nodes + auto* add = OpNode("add", "elementwise_add") + ->assert_is_op("elementwise_add") + ->AsIntermediate(); + auto* act = + OpNode("act", act_type_)->assert_is_op(act_type_)->AsIntermediate(); + + // create intermediate nodes + auto* add_out = VarNode("add_out") + ->assert_is_op_output("elementwise_add", "Out") + ->assert_is_op_input(act_type_, "X") + ->AsIntermediate(); + + // create output node + auto* out = + VarNode("output")->assert_is_op_output(act_type_, "Out")->AsOutput(); + + // create topology. + std::vector add_inputs{x, y}; + add_inputs >> *add >> *add_out; + *add_out >> *act >> *out; +} + +void ElementwiseAddActivationFuser::InsertNewNode(SSAGraph* graph, + const key2nodes_t& matched) { + auto op_desc = GenOpDesc(matched); + auto op = + LiteOpRegistry::Global().Create("fusion_elementwise_add_activation"); + auto old_op = matched.at("add")->stmt()->op; + auto* scope = old_op->scope(); + auto& valid_places = old_op->valid_places(); + op->Attach(op_desc, scope); + + auto* new_op_node = graph->GraphCreateInstructNode(op, valid_places); + + IR_NODE_LINK_TO(matched.at("x"), new_op_node); + IR_NODE_LINK_TO(matched.at("y"), new_op_node); + IR_NODE_LINK_TO(new_op_node, matched.at("output")); +} + +cpp::OpDesc ElementwiseAddActivationFuser::GenOpDesc( + const key2nodes_t& matched) { + auto* desc = matched.at("add")->stmt()->op_info(); + + cpp::OpDesc op_desc; + op_desc.SetType("fusion_elementwise_add_activation"); + op_desc.SetInput("X", {matched.at("x")->arg()->name}); + op_desc.SetInput("Y", {matched.at("y")->arg()->name}); + op_desc.SetOutput("Out", {matched.at("output")->arg()->name}); + + op_desc.SetAttr("axis", desc->GetAttr("axis")); + op_desc.SetAttr("act_type", act_type_); + return op_desc; +} + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.h b/paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h similarity index 85% rename from paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.h rename to paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h index 3e21368234f36a5afafb08958930943599955090..bcd7b4cbcda84538f01cc4e418ce201500edbb26 100644 --- a/paddle/fluid/lite/core/mir/fusion/conv_elementwise_add_relu_fuser.h +++ b/paddle/fluid/lite/core/mir/fusion/elementwise_add_activation_fuser.h @@ -23,16 +23,16 @@ namespace lite { namespace mir { namespace fusion { -class ConvElementwiseAddReLUFuser : public FuseBase { +class ElementwiseAddActivationFuser : public FuseBase { public: - explicit ConvElementwiseAddReLUFuser(const std::string& conv_type) - : conv_type_(conv_type) {} + explicit ElementwiseAddActivationFuser(const std::string& act_type) + : act_type_(act_type) {} void BuildPattern() override; void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override; private: cpp::OpDesc GenOpDesc(const key2nodes_t& matched) override; - std::string conv_type_; + std::string act_type_; }; } // namespace fusion diff --git a/paddle/fluid/lite/core/mir/generate_program_pass.cc b/paddle/fluid/lite/core/mir/generate_program_pass.cc index e74c71b778b4faa53d82beac66dba46d7f3668a5..75ff159015d6a090b0b0b926328e30ac4ec087a9 100644 --- a/paddle/fluid/lite/core/mir/generate_program_pass.cc +++ b/paddle/fluid/lite/core/mir/generate_program_pass.cc @@ -24,7 +24,7 @@ namespace lite { namespace mir { void GenerateProgramPass::Apply(const std::unique_ptr& graph) { - LOG(INFO) << "final program \n" << Visualize(graph.get()); + VLOG(4) << "final program \n" << Visualize(graph.get()); for (auto& item : graph->StmtTopologicalOrder()) { if (item->IsStmt()) { auto& stmt = item->AsStmt(); diff --git a/paddle/fluid/lite/core/mir/ssa_graph.cc b/paddle/fluid/lite/core/mir/ssa_graph.cc index b44cb0fa808962cde4a1d4c4cc0a640854c66851..7df9e2da42fc0fd3313a571b5e6429835e57695a 100644 --- a/paddle/fluid/lite/core/mir/ssa_graph.cc +++ b/paddle/fluid/lite/core/mir/ssa_graph.cc @@ -24,8 +24,10 @@ namespace lite { namespace mir { bool SSAGraph::CheckBidirectionalConnection() { - LOG(INFO) << "node count " << node_storage_.size(); + VLOG(4) << "node count " << node_storage_.size(); for (auto &node : node_storage_) { + if (node.IsStmt()) VLOG(4) << node.AsStmt().op_info()->Type(); + if (node.IsArg()) VLOG(4) << node.AsArg().name << " " << node.AsArg().id; for (auto *in : node.inlinks) { CHECK(in->outlinks.end() != std::find(in->outlinks.begin(), in->outlinks.end(), &node)); @@ -121,6 +123,7 @@ void SSAGraph::Build(const Program &program, std::unordered_map arg_update_node_map_; for (auto &op : program.ops()) { + VLOG(3) << op->op_info()->Type(); auto *op_node = GraphCreateInstructNode(op, valid_places); for (const std::string &name : op->op_info()->input_names()) { mir::Node *arg_node = nullptr; diff --git a/paddle/fluid/lite/core/mir/ssa_graph_test.cc b/paddle/fluid/lite/core/mir/ssa_graph_test.cc index 520fcf6e7502660aa4dcc3886f6a7af0b70abe58..f1a014e018368f55ad903053c68be93f16d2a8e9 100644 --- a/paddle/fluid/lite/core/mir/ssa_graph_test.cc +++ b/paddle/fluid/lite/core/mir/ssa_graph_test.cc @@ -17,7 +17,7 @@ #include #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/lite/core/mir/graph_visualize_pass.h" -#include "paddle/fluid/lite/core/mir/passes.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/op_registry.h" #include "paddle/fluid/lite/core/program_fake_utils.h" diff --git a/paddle/fluid/lite/core/mir/passes.h b/paddle/fluid/lite/core/mir/use_passes.h similarity index 83% rename from paddle/fluid/lite/core/mir/passes.h rename to paddle/fluid/lite/core/mir/use_passes.h index c3226819698ecf5644981796579c0fad99439c08..cb4ddc4f655cd8e1c46380b9f9b45f9ab5fa379b 100644 --- a/paddle/fluid/lite/core/mir/passes.h +++ b/paddle/fluid/lite/core/mir/use_passes.h @@ -15,14 +15,6 @@ #pragma once #include "paddle/fluid/lite/core/mir/pass_registry.h" -namespace paddle { -namespace lite { -namespace mir {} // namespace mir -} // namespace lite -} // namespace paddle - -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -#endif USE_MIR_PASS(demo); USE_MIR_PASS(static_kernel_pick_pass); USE_MIR_PASS(variable_place_inference_pass); @@ -34,5 +26,6 @@ USE_MIR_PASS(runtime_context_assign_pass); USE_MIR_PASS(lite_conv_bn_fuse_pass); USE_MIR_PASS(graph_visualze); USE_MIR_PASS(lite_fc_fuse_pass); -USE_MIR_PASS(lite_conv_elementwise_add_act_fuse_pass); +USE_MIR_PASS(lite_conv_elementwise_add_activation_fuse_pass); +USE_MIR_PASS(lite_elementwise_add_activation_fuse_pass); USE_MIR_PASS(lite_quant_dequant_fuse_pass); diff --git a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc index d6b8561c378cb2c18c159d6432cb09ac0a08ca0c..60fb873670029160c5895372f07b38834b0c9cb5 100644 --- a/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc +++ b/paddle/fluid/lite/core/mir/variable_place_inference_pass_test.cc @@ -13,7 +13,7 @@ // limitations under the License. #include -#include "paddle/fluid/lite/core/mir/passes.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/optimizer.h" #include "paddle/fluid/lite/core/program_fake_utils.h" #include "paddle/fluid/lite/kernels/cuda/use_kernels.h" diff --git a/paddle/fluid/lite/core/op_lite.h b/paddle/fluid/lite/core/op_lite.h index 41aa3bb0f6da1d3002ef3d8d6274244c19687fdb..38cce73d29133b947b49a7e13e4c44f6a37f2455 100644 --- a/paddle/fluid/lite/core/op_lite.h +++ b/paddle/fluid/lite/core/op_lite.h @@ -54,9 +54,7 @@ class OpLite : public Registry { OpLite() = default; explicit OpLite(const std::string &type) : op_type_(type) {} explicit OpLite(const std::vector &valid_places) - : valid_places_(valid_places) { - LOG(INFO) << "valid places " << valid_places.size(); - } + : valid_places_(valid_places) {} void SetValidPlaces(const std::vector &places) { VLOG(3) << "valid places " << valid_places_.size(); diff --git a/paddle/fluid/lite/core/optimizer.h b/paddle/fluid/lite/core/optimizer.h index 3424024f14bd1909421782cbc80abab495260c7f..bbe7f0a70a63a5a6d4b2e7fd1a397722e17a1bd1 100644 --- a/paddle/fluid/lite/core/optimizer.h +++ b/paddle/fluid/lite/core/optimizer.h @@ -50,7 +50,10 @@ class Optimizer { RunPasses(std::vector{{ "lite_quant_dequant_fuse_pass", // "lite_conv_bn_fuse_pass", // - "lite_conv_elementwise_add_act_fuse_pass", // + "lite_conv_elementwise_add_activation_fuse_pass", // +#ifdef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK + "lite_elementwise_add_activation_fuse_pass", // +#endif "lite_fc_fuse_pass", // "static_kernel_pick_pass", // "variable_place_inference_pass", // @@ -60,8 +63,6 @@ class Optimizer { "argument_type_display_pass", // "io_copy_kernel_pick_pass", // "variable_place_inference_pass", // -#ifndef LITE_WITH_LIGHT_WEIGHT_FRAMEWORK -#endif "runtime_context_assign_pass", // }}); } else { diff --git a/paddle/fluid/lite/core/optimizer_test.cc b/paddle/fluid/lite/core/optimizer_test.cc index ae543dc1b19768a9147af1c3114b46c546318eb2..4d66f769811737d568f7942779744af751cca2af 100644 --- a/paddle/fluid/lite/core/optimizer_test.cc +++ b/paddle/fluid/lite/core/optimizer_test.cc @@ -18,8 +18,8 @@ #include #include "paddle/fluid/lite/core/mir/generate_program_pass.h" #include "paddle/fluid/lite/core/mir/pass_manager.h" -#include "paddle/fluid/lite/core/mir/passes.h" #include "paddle/fluid/lite/core/mir/static_kernel_pick_pass.h" +#include "paddle/fluid/lite/core/mir/use_passes.h" #include "paddle/fluid/lite/core/program_fake_utils.h" namespace paddle { diff --git a/paddle/fluid/lite/core/profile/basic_profiler.cc b/paddle/fluid/lite/core/profile/basic_profiler.cc index 86d5cd39ea99a3b1433a0eadc4ffc06b00a221c7..75b1a48d3adea9be3e9f15da2b0f1001dd3c414f 100644 --- a/paddle/fluid/lite/core/profile/basic_profiler.cc +++ b/paddle/fluid/lite/core/profile/basic_profiler.cc @@ -19,7 +19,7 @@ namespace lite { namespace profile { const int BasicTimer::data_w = 10; -const int BasicTimer::name_w = 10; +const int BasicTimer::name_w = 15; } // namespace profile } // namespace lite diff --git a/paddle/fluid/lite/core/tensor.h b/paddle/fluid/lite/core/tensor.h index 27677e23a27366d052001a6828f12d1cfcc5decb..1d61f72063b8f6e40975e10ae6907c8264d4c117 100644 --- a/paddle/fluid/lite/core/tensor.h +++ b/paddle/fluid/lite/core/tensor.h @@ -91,6 +91,18 @@ class DDimBase { return os; } + friend bool operator==(const DDimBase &a, const DDimBase &b) { + if (a.size() != b.size()) return false; + for (size_t i = 0; i < a.size(); i++) { + if (a[i] != b[i]) return false; + } + return true; + } + + friend bool operator!=(const DDimBase &a, const DDimBase &b) { + return !(a == b); + } + private: DDimT *self() { return static_cast(this); } const DDimT *const_self() const { return static_cast(this); } @@ -154,6 +166,7 @@ class TensorBase { const void *raw_data() const { return const_self()->data(); } size_t data_size() const { return const_self()->dims().production(); } + size_t memory_size() const { return const_self()->memory_size(); } void ShareDataWith(const TensorBase &other) { self()->ShareDataWith(other); } void CopyDataFrom(const TensorBase &other) { self()->CopyDataFrom(other); } @@ -175,5 +188,13 @@ class TensorBase { } }; +template +bool TensorCompareWith(const TensorT &a, const TensorT &b) { + if (a.dims() != b.dims()) return false; + LOG(INFO) << "data_size: " << a.data_size(); + if (memcmp(a.raw_data(), b.raw_data(), a.data_size()) != 0) return false; + return true; +} + } // namespace lite } // namespace paddle diff --git a/paddle/fluid/lite/kernels/arm/CMakeLists.txt b/paddle/fluid/lite/kernels/arm/CMakeLists.txt index 7540d7e012df27c94de6c6398686310c4d59afad..95c8b95ec16aef37c6642df98c2b011b1d3a15a8 100644 --- a/paddle/fluid/lite/kernels/arm/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/arm/CMakeLists.txt @@ -11,10 +11,12 @@ cc_library(scale_compute_arm SRCS scale_compute.cc DEPS ${lite_kernel_deps} math cc_library(softmax_compute_arm SRCS softmax_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(conv_compute_arm SRCS conv_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(batch_norm_compute_arm SRCS batch_norm_compute.cc DEPS ${lite_kernel_deps} math_arm) -cc_library(elementwise_add_compute_arm SRCS elementwise_add_compute.cc DEPS ${lite_kernel_deps} math_arm) +cc_library(elementwise_compute_arm SRCS elementwise_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(pool_compute_arm SRCS pool_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(split_compute_arm SRCS split_compute.cc DEPS ${lite_kernel_deps} math_arm) +cc_library(concat_compute_arm SRCS concat_compute.cc DEPS ${lite_kernel_deps} math_arm) cc_library(dropout_compute_arm SRCS dropout_compute.cc DEPS ${lite_kernel_deps} math_arm) +cc_library(transpose_compute_arm SRCS transpose_compute.cc DEPS ${lite_kernel_deps} math_arm) lite_cc_test(test_fc_compute_arm SRCS fc_compute_test.cc DEPS fc_compute_arm math_arm) lite_cc_test(test_activation_compute_arm SRCS activation_compute_test.cc DEPS activation_compute_arm) @@ -22,11 +24,13 @@ lite_cc_test(test_scale_compute_arm SRCS scale_compute_test.cc DEPS scale_comput lite_cc_test(test_softmax_compute_arm SRCS softmax_compute_test.cc DEPS softmax_compute_arm) lite_cc_test(test_conv_compute_arm SRCS conv_compute_test.cc DEPS conv_compute_arm) lite_cc_test(test_batch_norm_compute_arm SRCS batch_norm_compute_test.cc DEPS batch_norm_compute_arm) -lite_cc_test(test_elementwise_add_compute_arm SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_arm) +lite_cc_test(test_elementwise_compute_arm SRCS elementwise_compute_test.cc DEPS elementwise_compute_arm) lite_cc_test(test_pool_compute_arm SRCS pool_compute_test.cc DEPS pool_compute_arm) lite_cc_test(test_mul_compute_arm SRCS mul_compute_test.cc DEPS mul_compute_arm) lite_cc_test(test_split_compute_arm SRCS split_compute_test.cc DEPS split_compute_arm) +lite_cc_test(test_concat_compute_arm SRCS concat_compute_test.cc DEPS concat_compute_arm) lite_cc_test(test_dropout_compute_arm SRCS dropout_compute_test.cc DEPS dropout_compute_arm) +lite_cc_test(test_transpose_compute_arm SRCS transpose_compute_test.cc DEPS transpose_compute_arm) set(arm_kernels fc_compute_arm @@ -36,10 +40,12 @@ set(arm_kernels softmax_compute_arm conv_compute_arm batch_norm_compute_arm - elementwise_add_compute_arm + elementwise_compute_arm pool_compute_arm split_compute_arm + concat_compute_arm dropout_compute_arm + transpose_compute_arm ) set(arm_kernels "${arm_kernels}" CACHE INTERNAL "arm kernels") diff --git a/paddle/fluid/lite/kernels/arm/concat_compute.cc b/paddle/fluid/lite/kernels/arm/concat_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..70adb8fc33ec0ab9c925f77748536f3372632b55 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/concat_compute.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/concat_compute.h" +#include +#include +#include "paddle/fluid/lite/arm/math/funcs.h" +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +std::vector stride_numel(const DDim& ddim) { + std::vector strides(ddim.size()); + strides[ddim.size() - 1] = ddim[ddim.size() - 1]; + for (int i = ddim.size() - 2; i >= 0; --i) { + strides[i] = strides[i + 1] * ddim[i]; + } + return strides; +} + +void ConcatCompute::Run() { + auto& param = Param(); + std::vector inputs = param.x; + auto* out = param.output; + int axis = param.axis; + out->mutable_data(); + + /// Sometimes direct copies will be faster, this maybe need deeply analysis. + if (axis == 0 && inputs.size() < 10) { + size_t output_offset = 0; + for (auto* in : inputs) { + auto in_stride = stride_numel(in->dims()); + auto out_stride = stride_numel(out->dims()); + void* dst = out->mutable_data() + output_offset; + const void* src = in->data(); +#if 0 + LOG(INFO) << "out_stride.size():" << out_stride.size(); + LOG(INFO) << "out_stride[0]" << out_stride[0]; + for (int i=0; i < out_stride.size(); ++i) { + LOG(INFO) << "out_stride[" << i << "]:" << out_stride[i]; + } + LOG(INFO) << "in_stride.size():" << in_stride.size(); + for (int i=0; i < in_stride.size(); ++i) { + LOG(INFO) << "in_stride[" << i << "]:" << in_stride[i]; + } +#endif + // src and dst tensor should have the same dims size. + CHECK(in_stride.size() == out_stride.size()); + std::memcpy(dst, src, sizeof(float) * in_stride[0]); + output_offset += in_stride[0]; + } + } else { + std::vector inputs_concat(inputs.size()); + for (int j = 0; j < inputs.size(); ++j) { + inputs_concat[j] = inputs[j]; + } + lite::arm::math::concat_func(inputs_concat, axis, out); + } + return; +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(concat, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::ConcatCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/concat_compute.h b/paddle/fluid/lite/kernels/arm/concat_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..2e1ca89841fdcfef869143a9ac3833842dda527e --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/concat_compute.h @@ -0,0 +1,37 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/operators/concat_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +class ConcatCompute : public KernelLite { + public: + using param_t = operators::ConcatParam; + + void Run() override; + + virtual ~ConcatCompute() = default; +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/arm/concat_compute_test.cc b/paddle/fluid/lite/kernels/arm/concat_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..664f4ed116735ceb2d24be2ead887f7680f29230 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/concat_compute_test.cc @@ -0,0 +1,235 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/concat_compute.h" +#include +#include +#include +#include +#include "paddle/fluid/lite/arm/math/funcs.h" +#include "paddle/fluid/lite/core/lite_tensor.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +bool infer_shape(const operators::ConcatParam& param) { + std::vector input_dims; + for (auto p : param.x) { + input_dims.push_back(p->dims()); + } + size_t axis = static_cast(param.axis); + const size_t n = input_dims.size(); + CHECK_GT_OR_FALSE(n, 0); + auto& out_dims = input_dims[0]; + size_t in_zero_dims_size = out_dims.size(); + for (size_t i = 1; i < n; i++) { + for (size_t j = 0; j < in_zero_dims_size; j++) { + if (j == axis) { + out_dims[axis] += input_dims[i][j]; + } else { + CHECK_EQ_OR_FALSE(out_dims[j], input_dims[i][j]); + } + } + } + if (out_dims[axis] < 0) { + out_dims[axis] = -1; + } + // Set output dims + param.output->Resize(lite::DDim(out_dims)); + return true; +} + +void concat_compute_ref(const operators::ConcatParam& param) { + std::vector input = param.x; + int axis = param.axis; + infer_shape(param); + + lite::Tensor* output = param.output; + int num = input.size(); + int rows = 1; + auto dim_0 = input[0]->dims(); + for (int i = 0; i < axis; ++i) { + rows *= dim_0[i]; + } + int out_rows = rows, out_cols = 0; + + std::vector input_cols(input.size()); + for (int i = 0; i < num; ++i) { + int input_i_numel = input[i]->dims().size() == 0 ? 0 : 1; + for (int didx = 0; didx < input[i]->dims().size(); ++didx) { + input_i_numel *= input[i]->dims()[didx]; + } + int t_cols = input_i_numel / rows; + out_cols += t_cols; + input_cols[i] = t_cols; + } + + // computation + auto output_data = output->mutable_data(); + int col_idx = 0; + for (int j = 0; j < num; ++j) { + int col_len = input_cols[j]; + auto input_data = input[j]->data(); + for (int k = 0; k < out_rows; ++k) { + memcpy(output_data + k * out_cols + col_idx, input_data + k * col_len, + sizeof(float) * col_len); + } + col_idx += col_len; + } +} + +TEST(concat_arm, init) { + ConcatCompute concat; + ASSERT_EQ(concat.precision(), PRECISION(kFloat)); + ASSERT_EQ(concat.target(), TARGET(kARM)); +} + +TEST(concat_arm, compute_input_single) { + ConcatCompute concat; + operators::ConcatParam param; + + LOG(INFO) << "test concat start"; + lite::Tensor output; + lite::Tensor output_ref; + lite::Tensor tensorA; + DDimLite ddimA({10, 4, 3, 2}); + tensorA.Resize(ddimA); + + for (int i = 0; i < ddimA.data()[0] * ddimA.data()[1] * ddimA.data()[2] * + ddimA.data()[3]; + i++) { + tensorA.mutable_data()[i] = i; + } + + param.x.push_back(&tensorA); + for (int cur_axis : {0, 1}) { + param.output = &output; + param.axis = cur_axis; + CHECK(infer_shape(param)); + concat.SetParam(param); + LOG(INFO) << "test concat start cur_axis:" << cur_axis; + + concat.Run(); + LOG(INFO) << "concat.Run end"; + param.output = &output_ref; + LOG(INFO) << "concat_compute_ref start"; + concat_compute_ref(param); + LOG(INFO) << "concat_compute_ref end"; + + auto* output_data = output.data(); + auto* output_ref_data = output_ref.data(); + for (int i = 0; i < (ddimA.data()[0]) * ddimA.data()[1] * ddimA.data()[2] * + ddimA.data()[3]; + i++) { + // LOG(INFO) << "output[" << i << "]:" << output_data[i] << " + // output_ref_data[" << i << "]:" << output_ref_data[i]; + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } + } +} + +TEST(concat_arm, compute_input_multi) { + ConcatCompute concat; + operators::ConcatParam param; + + LOG(INFO) << "test concat start"; + // init param + // x: tensorA, tensorB, tensorC, tensorD + // axis: 0 + lite::Tensor output; + lite::Tensor output_ref; + lite::Tensor tensorA; + lite::Tensor tensorB; + lite::Tensor tensorC; + lite::Tensor tensorD; + + DDimLite ddimA({10, 4, 3, 2}); + DDimLite ddimB({20, 4, 3, 2}); + DDimLite ddimC({30, 4, 3, 2}); + DDimLite ddimD({40, 4, 3, 2}); + + tensorA.Resize(ddimA); + tensorB.Resize(ddimB); + tensorC.Resize(ddimC); + tensorD.Resize(ddimD); + + for (int i = 0; i < ddimA.data()[0] * ddimA.data()[1] * ddimA.data()[2] * + ddimA.data()[3]; + i++) { + tensorA.mutable_data()[i] = i; + } + for (int i = 0; i < ddimB.data()[0] * ddimB.data()[1] * ddimB.data()[2] * + ddimB.data()[3]; + i++) { + tensorB.mutable_data()[i] = i + 1; + } + for (int i = 0; i < ddimC.data()[0] * ddimC.data()[1] * ddimC.data()[2] * + ddimC.data()[3]; + i++) { + tensorC.mutable_data()[i] = i + 2; + } + for (int i = 0; i < ddimD.data()[0] * ddimD.data()[1] * ddimD.data()[2] * + ddimD.data()[3]; + i++) { + tensorD.mutable_data()[i] = i + 3; + } + + param.x.push_back(&tensorA); + param.x.push_back(&tensorB); + param.x.push_back(&tensorC); + param.x.push_back(&tensorD); + for (int cur_axis : {0}) { + param.output = &output; + param.axis = cur_axis; + CHECK(infer_shape(param)); + concat.SetParam(param); + LOG(INFO) << "test concat start cur_axis:" << cur_axis; + + concat.Run(); + LOG(INFO) << "concat.Run end"; + param.output = &output_ref; + LOG(INFO) << "concat_compute_ref start"; + concat_compute_ref(param); + LOG(INFO) << "concat_compute_ref end"; + + auto* output_data = output.data(); + auto* output_ref_data = output_ref.data(); + int elem_num = (ddimA.data()[0] + ddimB.data()[0] + ddimC.data()[0] + + ddimD.data()[0]) * + ddimA.data()[1] * ddimA.data()[2] * ddimA.data()[3]; + for (int i = 0; i < elem_num; i++) { + // LOG(INFO) << "output[" << i << "]:" << output_data[i] << " + // output_ref_data[" << i << "]:" << output_ref_data[i]; + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } + } +} + +TEST(concat, retrive_op) { + auto concat = + KernelRegistry::Global().Create( + "concat"); + ASSERT_FALSE(concat.empty()); + ASSERT_TRUE(concat.front()); +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/arm/conv_compute.cc b/paddle/fluid/lite/kernels/arm/conv_compute.cc index 5e9ddb6271684120c8cab68e6e10bade3a3ab015..a7cd385be9873837307fc89d8ac5a1a2ed7171a9 100644 --- a/paddle/fluid/lite/kernels/arm/conv_compute.cc +++ b/paddle/fluid/lite/kernels/arm/conv_compute.cc @@ -28,6 +28,8 @@ void ConvCompute::PrepareForRun() { auto o_dims = param.output->dims(); auto& ctx = this->ctx_->template As(); + // TODO(xxx): make api and expose it + ctx.SetRunMode(LITE_POWER_HIGH, 4); int win = x_dims[3]; // nchw int hin = x_dims[2]; diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc deleted file mode 100644 index e9d9f4927b7ee18b3e18efa69a00dcb1c813bf3b..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.cc +++ /dev/null @@ -1,65 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/lite/kernels/arm/elementwise_add_compute.h" -#include "paddle/fluid/lite/arm/math/funcs.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -void ElementwiseAddCompute::Run() { - auto& param = Param(); - const float* x_data = param.X->data(); - const float* y_data = param.Y->data(); - float* out_data = param.Out->mutable_data(); - int axis = param.axis; - auto x_dims = param.X->dims(); - auto y_dims = param.Y->dims(); - if (axis < 0) { - axis = x_dims.size() - y_dims.size(); - } - if (x_dims.size() == y_dims.size()) { - lite::arm::math::elementwise_add(x_data, y_data, out_data, - x_dims.production()); - } else { - int batch = 1; - int channels = 1; - int num = 1; - for (int i = 0; i < axis; ++i) { - batch *= x_dims[i]; - } - for (int i = 0; i < y_dims.size(); ++i) { - channels *= y_dims[i]; - } - for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { - num *= x_dims[i]; - } - lite::arm::math::elementwise_add_axis(x_data, y_data, out_data, batch, - channels, num); - } -} - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle - -REGISTER_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, - paddle::lite::kernels::arm::ElementwiseAddCompute, def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))}) - .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) - .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc b/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc deleted file mode 100644 index 20b998dc6cfa8a9606fcf0f716470366fdd60338..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute_test.cc +++ /dev/null @@ -1,143 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/fluid/lite/kernels/arm/elementwise_add_compute.h" -#include -#include -#include "paddle/fluid/lite/core/op_registry.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace arm { - -TEST(elementwise_add_arm, retrive_op) { - auto elementwise_add = - KernelRegistry::Global().Create( - "elementwise_add"); - ASSERT_FALSE(elementwise_add.empty()); - ASSERT_TRUE(elementwise_add.front()); -} - -TEST(elementwise_add_arm, init) { - ElementwiseAddCompute elementwise_add; - ASSERT_EQ(elementwise_add.precision(), PRECISION(kFloat)); - ASSERT_EQ(elementwise_add.target(), TARGET(kARM)); -} - -template -void elementwise_add_compute_ref(const operators::ElementwiseParam& param) { - const dtype* x_data = param.X->data(); - const dtype* y_data = param.Y->data(); - dtype* out_data = param.Out->mutable_data(); - auto x_dims = param.X->dims(); - auto y_dims = param.Y->dims(); - int axis = param.axis; - if (axis < 0) { - axis = x_dims.size() - y_dims.size(); - } - int batch = 1; - int channels = 1; - int num = 1; - for (int i = 0; i < axis; ++i) { - batch *= x_dims[i]; - } - for (int i = 0; i < y_dims.size(); ++i) { - channels *= y_dims[i]; - } - for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { - num *= x_dims[i]; - } - for (int i = 0; i < batch; ++i) { - for (int j = 0; j < channels; ++j) { - int offset = (i * channels + j) * num; - const dtype* din_ptr = x_data + offset; - const dtype diny_data = y_data[j]; - dtype* dout_ptr = out_data + offset; - for (int k = 0; k < num; ++k) { - *dout_ptr = *din_ptr + diny_data; - dout_ptr++; - din_ptr++; - } - } - } -} - -TEST(elementwise_add, compute) { - ElementwiseAddCompute elementwise_add; - operators::ElementwiseParam param; - lite::Tensor x, y, output, output_ref; - - for (auto n : {1, 3, 4, 11}) { - for (auto c : {1, 3, 4, 11}) { - for (auto h : {1, 3, 4, 11}) { - for (auto w : {1, 3, 4, 11}) { - for (auto axis : {-1, 0, 1, 2, 3}) { - for (auto yd : - {std::vector({n}), std::vector({c}), - std::vector({h}), std::vector({w}), - std::vector({n, c}), std::vector({c, h}), - std::vector({h, w}), std::vector({n, c, h}), - std::vector({c, h, w}), - std::vector({n, c, h, w})}) { - auto x_dim = DDim(std::vector({n, c, h, w})); - auto y_dim = DDim(yd); - int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; - - if (axis_t + y_dim.size() > 4) continue; - bool flag = false; - for (int i = 0; i < y_dim.size(); i++) { - if (x_dim[i + axis_t] != y_dim[i]) flag = true; - } - if (flag) continue; - - x.Resize(x_dim); - y.Resize(y_dim); - output.Resize(x_dim); - output_ref.Resize(x_dim); - auto* x_data = x.mutable_data(); - auto* y_data = y.mutable_data(); - auto* output_data = output.mutable_data(); - auto* output_ref_data = output_ref.mutable_data(); - for (int i = 0; i < x_dim.production(); i++) { - x_data[i] = i; - } - for (int i = 0; i < y_dim.production(); i++) { - y_data[i] = i; - } - param.X = &x; - param.Y = &y; - param.axis = axis; - param.Out = &output; - elementwise_add.SetParam(param); - elementwise_add.Run(); - param.Out = &output_ref; - elementwise_add_compute_ref(param); - for (int i = 0; i < output.dims().production(); i++) { - EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); - } - } - } - } - } - } - } -} - -} // namespace arm -} // namespace kernels -} // namespace lite -} // namespace paddle - -USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/arm/elementwise_compute.cc b/paddle/fluid/lite/kernels/arm/elementwise_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..c3b9b41cde1e70ecef580f72cfbb6c558258631d --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/elementwise_compute.cc @@ -0,0 +1,111 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/elementwise_compute.h" +#include +#include "paddle/fluid/lite/arm/math/funcs.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +inline bool is_broadcast(const DDim& x_dims, const DDim& y_dims, int axis, + int* pre, int* n, int* post) { + if (axis < 0) { + axis = x_dims.size() - y_dims.size(); + } + if (x_dims.size() == y_dims.size()) { + return false; + } + *pre = 1; + *n = 1; + *post = 1; + for (int i = 0; i < axis; ++i) { + (*pre) *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + CHECK_EQ(x_dims[i + axis], y_dims[i]) << "Broadcast dimension mismatch."; + (*n) *= y_dims[i]; + } + for (int i = axis + y_dims.size(); i < x_dims.size(); ++i) { + (*post) *= x_dims[i]; + } + return true; +} + +void ElementwiseAddCompute::Run() { + auto& param = Param(); + const float* x_data = param.X->data(); + const float* y_data = param.Y->data(); + float* out_data = param.Out->mutable_data(); + int axis = param.axis; + auto x_dims = param.X->dims(); + auto y_dims = param.Y->dims(); + int pre, n, post; + if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) { + lite::arm::math::elementwise_add_broadcast(x_data, y_data, out_data, pre, n, + post); + } else { + lite::arm::math::elementwise_add(x_data, y_data, out_data, + x_dims.production()); + } +} + +void ElementwiseAddActivationCompute::Run() { + auto& param = Param(); + const float* x_data = param.X->data(); + const float* y_data = param.Y->data(); + float* out_data = param.Out->mutable_data(); + int axis = param.axis; + std::string act_type = param.act_type; + auto x_dims = param.X->dims(); + auto y_dims = param.Y->dims(); + int pre, n, post; + if (is_broadcast(x_dims, y_dims, axis, &pre, &n, &post)) { + if (act_type == "relu") { + lite::arm::math::elementwise_add_relu_broadcast(x_data, y_data, out_data, + pre, n, post); + } else { + LOG(FATAL) << "unsupported Activation type: " << act_type; + } + } else { + if (act_type == "relu") { + lite::arm::math::elementwise_add_relu(x_data, y_data, out_data, + x_dims.production()); + } else { + LOG(FATAL) << "unsupported Activation type: " << act_type; + } + } +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::ElementwiseAddCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); + +REGISTER_LITE_KERNEL( + fusion_elementwise_add_activation, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::ElementwiseAddActivationCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindInput("Y", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.h b/paddle/fluid/lite/kernels/arm/elementwise_compute.h similarity index 85% rename from paddle/fluid/lite/kernels/arm/elementwise_add_compute.h rename to paddle/fluid/lite/kernels/arm/elementwise_compute.h index 9939509d0be25eadccdb563e802c98291dea751b..bb80c61221eea2acaad397895d3fbad880e9dce3 100644 --- a/paddle/fluid/lite/kernels/arm/elementwise_add_compute.h +++ b/paddle/fluid/lite/kernels/arm/elementwise_compute.h @@ -30,6 +30,14 @@ class ElementwiseAddCompute virtual ~ElementwiseAddCompute() = default; }; +class ElementwiseAddActivationCompute + : public KernelLite { + public: + void Run() override; + + virtual ~ElementwiseAddActivationCompute() = default; +}; + } // namespace arm } // namespace kernels } // namespace lite diff --git a/paddle/fluid/lite/kernels/arm/elementwise_compute_test.cc b/paddle/fluid/lite/kernels/arm/elementwise_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..4e242c8cc583ecb418ad0c1ebd9dcbde0003b9e7 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/elementwise_compute_test.cc @@ -0,0 +1,263 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/elementwise_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +TEST(elementwise_add_arm, retrive_op) { + auto elementwise_add = + KernelRegistry::Global().Create( + "elementwise_add"); + ASSERT_FALSE(elementwise_add.empty()); + ASSERT_TRUE(elementwise_add.front()); +} + +TEST(elementwise_add_arm, init) { + ElementwiseAddCompute elementwise_add; + ASSERT_EQ(elementwise_add.precision(), PRECISION(kFloat)); + ASSERT_EQ(elementwise_add.target(), TARGET(kARM)); +} + +template +void elementwise_compute_ref(const operators::ElementwiseParam& param, + const std::string elt_type, + const std::string act_type) { + const dtype* x_data = param.X->data(); + const dtype* y_data = param.Y->data(); + dtype* out_data = param.Out->mutable_data(); + auto x_dims = param.X->dims(); + auto y_dims = param.Y->dims(); + int axis = param.axis; + if (axis < 0) { + axis = x_dims.size() - y_dims.size(); + } + int batch = 1; + int channels = 1; + int num = 1; + for (int i = 0; i < axis; ++i) { + batch *= x_dims[i]; + } + for (int i = 0; i < y_dims.size(); ++i) { + channels *= y_dims[i]; + } + for (int i = y_dims.size() + axis; i < x_dims.size(); ++i) { + num *= x_dims[i]; + } + // do elementwise add/sub/max... + if (elt_type == "add") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr + diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } else if (elt_type == "sub") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + int offset = (i * channels + j) * num; + const dtype* din_ptr = x_data + offset; + const dtype diny_data = y_data[j]; + dtype* dout_ptr = out_data + offset; + for (int k = 0; k < num; ++k) { + *dout_ptr = *din_ptr - diny_data; + dout_ptr++; + din_ptr++; + } + } + } + } else { + LOG(FATAL) << "unsupported Elementwise type: " << elt_type; + } + // do activation relu/sigmod... + if (act_type.size() > 0) { + if (act_type == "relu") { + for (int i = 0; i < batch; ++i) { + for (int j = 0; j < channels; ++j) { + dtype* dout_ptr = out_data + (i * channels + j) * num; + for (int k = 0; k < num; ++k) { + *dout_ptr = *dout_ptr > 0.0f ? *dout_ptr : 0.0f; + dout_ptr++; + } + } + } + } else { + LOG(FATAL) << "unsupported Activation type: " << elt_type; + } + } +} + +TEST(elementwise_add, compute) { + ElementwiseAddCompute elementwise_add; + operators::ElementwiseParam param; + lite::Tensor x, y, output, output_ref; + + for (auto n : {1, 3, 4, 11}) { + for (auto c : {1, 3, 4, 11}) { + for (auto h : {1, 3, 4, 11}) { + for (auto w : {1, 3, 4, 11}) { + for (auto axis : {-1, 0, 1, 2, 3}) { + for (auto yd : + {std::vector({n}), std::vector({c}), + std::vector({h}), std::vector({w}), + std::vector({n, c}), std::vector({c, h}), + std::vector({h, w}), std::vector({n, c, h}), + std::vector({c, h, w}), + std::vector({n, c, h, w})}) { + auto x_dim = DDim(std::vector({n, c, h, w})); + auto y_dim = DDim(yd); + int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; + + if (axis_t + y_dim.size() > 4) continue; + bool flag = false; + for (int i = 0; i < y_dim.size(); i++) { + if (x_dim[i + axis_t] != y_dim[i]) flag = true; + } + if (flag) continue; + + x.Resize(x_dim); + y.Resize(y_dim); + output.Resize(x_dim); + output_ref.Resize(x_dim); + auto* x_data = x.mutable_data(); + auto* y_data = y.mutable_data(); + auto* output_data = output.mutable_data(); + auto* output_ref_data = output_ref.mutable_data(); + for (int i = 0; i < x_dim.production(); i++) { + x_data[i] = i; + } + for (int i = 0; i < y_dim.production(); i++) { + y_data[i] = i; + } + param.X = &x; + param.Y = &y; + param.axis = axis; + param.Out = &output; + elementwise_add.SetParam(param); + elementwise_add.Run(); + param.Out = &output_ref; + elementwise_compute_ref(param, "add", ""); + for (int i = 0; i < output.dims().production(); i++) { + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } + } + } + } + } + } + } +} + +TEST(fusion_elementwise_add_activation_arm, retrive_op) { + auto fusion_elementwise_add_activation = + KernelRegistry::Global().Create( + "fusion_elementwise_add_activation"); + ASSERT_FALSE(fusion_elementwise_add_activation.empty()); + ASSERT_TRUE(fusion_elementwise_add_activation.front()); +} + +TEST(fusion_elementwise_add_activation_arm, init) { + ElementwiseAddActivationCompute fusion_elementwise_add_activation; + ASSERT_EQ(fusion_elementwise_add_activation.precision(), PRECISION(kFloat)); + ASSERT_EQ(fusion_elementwise_add_activation.target(), TARGET(kARM)); +} + +TEST(fusion_elementwise_add_activation_arm, compute) { + ElementwiseAddActivationCompute fusion_elementwise_add_activation; + operators::FusionElementwiseActivationParam param; + lite::Tensor x, y, output, output_ref; + + for (auto act_type : {"relu"}) { + for (auto n : {1, 3, 4, 11}) { + for (auto c : {1, 3, 4, 11}) { + for (auto h : {1, 3, 4, 11}) { + for (auto w : {1, 3, 4, 11}) { + for (auto axis : {-1, 0, 1, 2, 3}) { + for (auto yd : + {std::vector({n}), std::vector({c}), + std::vector({h}), std::vector({w}), + std::vector({n, c}), std::vector({c, h}), + std::vector({h, w}), + std::vector({n, c, h}), + std::vector({c, h, w}), + std::vector({n, c, h, w})}) { + auto x_dim = DDim(std::vector({n, c, h, w})); + auto y_dim = DDim(yd); + int axis_t = axis < 0 ? x_dim.size() - y_dim.size() : axis; + + if (axis_t + y_dim.size() > 4) continue; + bool flag = false; + for (int i = 0; i < y_dim.size(); i++) { + if (x_dim[i + axis_t] != y_dim[i]) flag = true; + } + if (flag) continue; + + x.Resize(x_dim); + y.Resize(y_dim); + output.Resize(x_dim); + output_ref.Resize(x_dim); + auto* x_data = x.mutable_data(); + auto* y_data = y.mutable_data(); + auto* output_data = output.mutable_data(); + auto* output_ref_data = output_ref.mutable_data(); + for (int i = 0; i < x_dim.production(); i++) { + float sign = i % 3 == 0 ? -1.0f : 1.0f; + x_data[i] = i * sign; + } + for (int i = 0; i < y_dim.production(); i++) { + float sign = i % 2 == 0 ? 0.5f : -0.5f; + y_data[i] = i * sign; + } + param.X = &x; + param.Y = &y; + param.axis = axis; + param.Out = &output; + param.act_type = act_type; + fusion_elementwise_add_activation.SetParam(param); + fusion_elementwise_add_activation.Run(); + param.Out = &output_ref; + elementwise_compute_ref(param, "add", act_type); + for (int i = 0; i < output.dims().production(); i++) { + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } + } + } + } + } + } + } + } +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(fusion_elementwise_add_activation, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/arm/fc_compute.cc b/paddle/fluid/lite/kernels/arm/fc_compute.cc index e31c36d91dbb6cb38fd963510f779df754ec3434..c7a9269b5f9af40e89a8e58e1363c1b131f81ac4 100644 --- a/paddle/fluid/lite/kernels/arm/fc_compute.cc +++ b/paddle/fluid/lite/kernels/arm/fc_compute.cc @@ -27,6 +27,9 @@ void FcCompute::PrepareForRun() { auto x_dims = param.input->dims(); auto w_dims = param.w->dims(); + auto& ctx = this->ctx_->template As(); + ctx.SetRunMode(LITE_POWER_HIGH, 4); + CHECK_GE(x_dims.size(), 2UL); CHECK_EQ(w_dims.size(), 2UL); CHECK_EQ(param.output->dims().size(), 2UL); diff --git a/paddle/fluid/lite/kernels/arm/mul_compute.cc b/paddle/fluid/lite/kernels/arm/mul_compute.cc index 269e4842252c2a88f33c8faf6666d139e36e49f3..a176086a4cae61e2dc4ab2dec035c25a6df4b512 100644 --- a/paddle/fluid/lite/kernels/arm/mul_compute.cc +++ b/paddle/fluid/lite/kernels/arm/mul_compute.cc @@ -23,7 +23,8 @@ namespace kernels { namespace arm { void MulCompute::PrepareForRun() { - // TODO(TJ): transpose x or y if necessary + auto& ctx = this->ctx_->template As(); + ctx.SetRunMode(LITE_POWER_HIGH, 4); } void MulCompute::Run() { diff --git a/paddle/fluid/lite/kernels/arm/pool_compute.cc b/paddle/fluid/lite/kernels/arm/pool_compute.cc index 168b0e50c98bcf8eab324b627478a7790e665b82..ea3d47a268588f7d593f0c3ac58f3421d9456fa8 100644 --- a/paddle/fluid/lite/kernels/arm/pool_compute.cc +++ b/paddle/fluid/lite/kernels/arm/pool_compute.cc @@ -24,6 +24,11 @@ namespace lite { namespace kernels { namespace arm { +void PoolCompute::PrepareForRun() { + auto& ctx = this->ctx_->template As(); + ctx.SetRunMode(LITE_POWER_HIGH, 4); +} + void PoolCompute::Run() { auto& param = Param(); auto& in_dims = param.x->dims(); diff --git a/paddle/fluid/lite/kernels/arm/pool_compute.h b/paddle/fluid/lite/kernels/arm/pool_compute.h index 76dedbc3132405cd70d74e233619572f97dc07e0..3a8b0f99c5b8292ec845f00383c4751079db2c77 100644 --- a/paddle/fluid/lite/kernels/arm/pool_compute.h +++ b/paddle/fluid/lite/kernels/arm/pool_compute.h @@ -26,6 +26,7 @@ class PoolCompute : public KernelLite { public: using param_t = operators::PoolParam; + void PrepareForRun() override; void Run() override; TargetType target() const override; diff --git a/paddle/fluid/lite/kernels/arm/transpose_compute.cc b/paddle/fluid/lite/kernels/arm/transpose_compute.cc new file mode 100644 index 0000000000000000000000000000000000000000..368716c368083ea877fe5dd8a0054a9763a4829e --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/transpose_compute.cc @@ -0,0 +1,173 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/transpose_compute.h" +#include +#include +#include "paddle/fluid/lite/arm/math/funcs.h" +#include "paddle/fluid/lite/core/compatible_tensor.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +bool IsShuffleChannel(const std::vector &axis) { + bool is_shuffle_channel = true; + if (axis.size() > 2 && axis[0] == 0 && axis[1] == 2 && axis[2] == 1) { + for (int i = 3; i < axis.size(); ++i) { + if (axis[i] != i) { + is_shuffle_channel = false; + break; + } + } + } else { + return false; + } + return is_shuffle_channel; +} + +template +void ShuffleChannelCompute(const std::vector &axis, + const lite::Tensor *input, lite::Tensor *output) { + const Dtype *input_ptr = input->data(); + Dtype *output_ptr = output->mutable_data(); + // input and output's shape dimension must >= 2 && <= 6. + const DDim &in_dim = input->dims(); + const DDim &out_dim = output->dims(); + size_t offset = 1; + for (int i = 3; i < axis.size(); ++i) { + offset *= in_dim[i]; + } + +#pragma omp parallel for collapse(3) + for (int batch = 0; batch < out_dim[0]; ++batch) { + for (int c1 = 0; c1 < out_dim[1]; ++c1) { + for (int c2 = 0; c2 < out_dim[2]; ++c2) { + size_t out_offset = + ((batch * out_dim[1] + c1) * out_dim[2] + c2) * offset; + size_t in_offset = ((batch * in_dim[1] + c2) * in_dim[2] + c1) * offset; + memcpy(output_ptr + out_offset, input_ptr + in_offset, + offset * sizeof(Dtype)); + } + } + } +} + +template +void TransposeCompute_(const std::vector &axis, const lite::Tensor *input, + lite::Tensor *output) { + // const Dtype *input_ptr = input->data(); + const Dtype *input_ptr = input->data(); + Dtype *output_ptr = output->mutable_data(); + + // input and output's shape dimension must >= 2 && <= 6. + const DDim &in_dim = input->dims(); + const DDim &out_dim = output->dims(); + + // precompute inverted output dim and strides + size_t rout_dim[6], strides[6]; + int permute = axis.size(); // permute must >=2 && <= 6. + for (int i = 0; i < permute; ++i) { + int k = permute - 1 - i; + strides[k] = 1; + for (int j = axis[i] + 1; j < permute; ++j) { + strides[k] *= in_dim[j]; + } + rout_dim[k] = out_dim[i]; + } + + // unroll the first 2 dimensions + int reamin_dim = 1; + for (int i = 2; i < out_dim.size(); ++i) { + reamin_dim *= out_dim[i]; + } + +#pragma omp parallel for collapse(2) + for (int batch = 0; batch < out_dim[0]; ++batch) { + for (int j = 0; j < out_dim[1]; ++j) { + size_t offset = batch * strides[permute - 1] + j * strides[permute - 2]; + Dtype *out_ptr = output_ptr + (batch * out_dim[1] + j) * reamin_dim; + int indics[4] = {0, 0, 0, 0}; + for (int k = 0; k < reamin_dim; ++k) { + out_ptr[k] = input_ptr[offset]; + indics[0] += 1; + offset += strides[0]; + for (int p = 0; p < permute - 3; ++p) { + if (indics[p] == rout_dim[p]) { + indics[p + 1] += 1; + indics[p] = 0; + offset += strides[p + 1]; + offset -= rout_dim[p] * strides[p]; + } else { + break; + } + } + } + } + } +} + +// Transpose +void TransposeCompute::Run() { + auto ¶m = Param(); + auto *input = param.x; + auto *output = param.output; + const std::vector axis = param.axis; + + bool shuffle_channel = IsShuffleChannel(axis); + if (shuffle_channel) { + ShuffleChannelCompute(axis, input, output); + } else { + TransposeCompute_(axis, input, output); + } + return; +} + +// Transpose2 +void Transpose2Compute::Run() { + auto ¶m = Param(); + auto *input = param.x; + auto *output = param.output; + const std::vector axis = param.axis; + + bool shuffle_channel = IsShuffleChannel(axis); + if (shuffle_channel) { + ShuffleChannelCompute(axis, input, output); + } else { + TransposeCompute_(axis, input, output); + } + return; +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +// Transpose +REGISTER_LITE_KERNEL(transpose, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::TransposeCompute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); + +// Transpose2 +REGISTER_LITE_KERNEL(transpose2, kARM, kFloat, kNCHW, + paddle::lite::kernels::arm::Transpose2Compute, def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kARM))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kARM))}) + .Finalize(); diff --git a/paddle/fluid/lite/kernels/arm/transpose_compute.h b/paddle/fluid/lite/kernels/arm/transpose_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..d8ebb761ec47f33c9ff4d5addae48bb4f75e5921 --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/transpose_compute.h @@ -0,0 +1,48 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/operators/transpose_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +// Transpose +class TransposeCompute : public KernelLite { + public: + using param_t = operators::TransposeParam; + + void Run() override; + + virtual ~TransposeCompute() = default; +}; + +// Transpose2 +class Transpose2Compute : public KernelLite { + public: + using param_t = operators::TransposeParam; + + void Run() override; + + virtual ~Transpose2Compute() = default; +}; + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/arm/transpose_compute_test.cc b/paddle/fluid/lite/kernels/arm/transpose_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..1315556e3dd47cda95024d4adda9dbc4e56aa35f --- /dev/null +++ b/paddle/fluid/lite/kernels/arm/transpose_compute_test.cc @@ -0,0 +1,205 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/arm/transpose_compute.h" +#include +#include +#include +#include +#include "paddle/fluid/lite/arm/math/funcs.h" +#include "paddle/fluid/lite/core/lite_tensor.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace arm { + +#define IN(n, c, h, w) \ + input_data[w + h * input_w + c * input_h * input_w + \ + n * input_c * input_h * input_w] +#define OUT(n, c, h, w) \ + output_data[w + h * output_w + c * output_h * output_w + \ + n * output_c * output_h * output_w] +void transpose_compute_ref(const operators::TransposeParam& param) { + const lite::Tensor* input = param.x; + lite::Tensor* output = param.output; + std::vector axis = param.axis; + + auto* input_data = input->data(); + auto* output_data = output->mutable_data(); + + int input_n = input->dims()[0]; + int input_c = input->dims()[1]; + int input_h = input->dims()[2]; + int input_w = input->dims()[3]; + int output_n = output->dims()[0]; + int output_c = output->dims()[1]; + int output_h = output->dims()[2]; + int output_w = output->dims()[3]; + + for (int n = 0; n < input_n; ++n) { + for (int c = 0; c < input_c; ++c) { + for (int h = 0; h < input_h; ++h) { + for (int w = 0; w < input_w; ++w) { + OUT(n, h, w, c) = IN(n, c, h, w); + } + } + } + } +} + +// Transpose +TEST(transpose_arm, init) { + TransposeCompute transpose; + ASSERT_EQ(transpose.precision(), PRECISION(kFloat)); + ASSERT_EQ(transpose.target(), TARGET(kARM)); +} + +TEST(transpose_arm, compute_shape_nchw) { + TransposeCompute transpose; + operators::TransposeParam param; + + std::vector axis{0, 2, 3, 1}; + param.axis = axis; + + lite::Tensor input; + lite::Tensor output; + lite::Tensor output_ref; + + const std::vector input_shape{1, 24, 2, 2}; + const std::vector output_shape{1, 2, 2, 24}; + + DDimLite ddimInput(input_shape); + DDimLite ddimOutput(output_shape); + + input.Resize(ddimInput); + output.Resize(ddimOutput); + output_ref.Resize(ddimOutput); + + for (int i = 0; + i < input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + i += 4) { + input.mutable_data()[i] = i; + input.mutable_data()[i + 1] = i + 1; + input.mutable_data()[i + 2] = i + 2; + input.mutable_data()[i + 3] = i + 3; + } + for (int i = 0; + i < input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + i += 4) { + } + param.x = &input; + param.output = &output; + + // run transpose_compute + transpose.SetParam(param); + transpose.Run(); + + // run transpose_compute_ref + param.output = &output_ref; + transpose_compute_ref(param); + + auto* output_data = output.data(); + auto* output_ref_data = output_ref.data(); + for (int i = 0; + i < input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + i += 4) { + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } +} + +TEST(transpose, retrive_op) { + auto transpose = + KernelRegistry::Global().Create( + "transpose"); + ASSERT_FALSE(transpose.empty()); + ASSERT_TRUE(transpose.front()); +} + +// Transpose2 +TEST(transpose2_arm, init) { + Transpose2Compute transpose2; + ASSERT_EQ(transpose2.precision(), PRECISION(kFloat)); + ASSERT_EQ(transpose2.target(), TARGET(kARM)); +} + +TEST(transpose2_arm, compute_shape_nchw) { + Transpose2Compute transpose2; + operators::TransposeParam param; + + std::vector axis{0, 2, 3, 1}; + param.axis = axis; + + lite::Tensor input; + lite::Tensor output; + lite::Tensor output_ref; + + const std::vector input_shape{1, 24, 2, 2}; + const std::vector output_shape{1, 2, 2, 24}; + + DDimLite ddimInput(input_shape); + DDimLite ddimOutput(output_shape); + + input.Resize(ddimInput); + output.Resize(ddimOutput); + output_ref.Resize(ddimOutput); + + for (int i = 0; + i < input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + i += 4) { + input.mutable_data()[i] = i; + input.mutable_data()[i + 1] = i + 1; + input.mutable_data()[i + 2] = i + 2; + input.mutable_data()[i + 3] = i + 3; + } + for (int i = 0; + i < input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + i += 4) { + } + param.x = &input; + param.output = &output; + + // run transpose_compute + transpose2.SetParam(param); + transpose2.Run(); + + // run transpose_compute_ref + param.output = &output_ref; + transpose_compute_ref(param); + + auto* output_data = output.data(); + auto* output_ref_data = output_ref.data(); + for (int i = 0; + i < input_shape[0] * input_shape[1] * input_shape[2] * input_shape[3]; + i += 4) { + EXPECT_NEAR(output_data[i], output_ref_data[i], 1e-5); + } +} + +TEST(transpose2, retrive_op) { + auto transpose2 = + KernelRegistry::Global().Create( + "transpose2"); + ASSERT_FALSE(transpose2.empty()); + ASSERT_TRUE(transpose2.front()); +} + +} // namespace arm +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(transpose, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(transpose2, kARM, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/arm/use_kernels.h b/paddle/fluid/lite/kernels/arm/use_kernels.h index 1f93a81aa94f09f8330aa385840adec559d7161d..1a6583f3f570e688080b1bb1a96217c25ca4bcc9 100644 --- a/paddle/fluid/lite/kernels/arm/use_kernels.h +++ b/paddle/fluid/lite/kernels/arm/use_kernels.h @@ -19,6 +19,7 @@ USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(concat, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(pool, kARM, kFloat, kNCHW, def); USE_LITE_KERNEL(feed, kARM, kAny, kAny, def); USE_LITE_KERNEL(fetch, kARM, kAny, kAny, def); diff --git a/paddle/fluid/lite/kernels/use_kernels.h b/paddle/fluid/lite/kernels/use_kernels.h new file mode 100644 index 0000000000000000000000000000000000000000..2c06092e3856467c031abaf36c63bd61aef65bae --- /dev/null +++ b/paddle/fluid/lite/kernels/use_kernels.h @@ -0,0 +1,56 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +/* + * ATTENTION this header file can only include in .cc file. + */ + +USE_LITE_KERNEL(feed, kHost, kAny, kAny, def); +USE_LITE_KERNEL(fetch, kHost, kAny, kAny, def); + +#ifdef LITE_WITH_X86 +USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(square, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(depthwise_conv2d, kX86, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def); +#endif + +#ifdef LITE_WITH_ARM +USE_LITE_KERNEL(fc, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(mul, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(scale, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(conv2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(batch_norm, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(relu, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(depthwise_conv2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(pool2d, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(elementwise_add, kARM, kFloat, kNCHW, def); +USE_LITE_KERNEL(softmax, kARM, kFloat, kNCHW, def); +#endif + +#ifdef LITE_WITH_CUDA +USE_LITE_KERNEL(mul, kCUDA, kFloat, kNCHW, def); +USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, host_to_device); +USE_LITE_KERNEL(io_copy, kCUDA, kAny, kAny, device_to_host); +#endif diff --git a/paddle/fluid/lite/kernels/x86/CMakeLists.txt b/paddle/fluid/lite/kernels/x86/CMakeLists.txt index c2845fb9b21b2e4d0bb7ff378676d4531212db52..35c61376153e64690f40836812079a20c6c4dc49 100644 --- a/paddle/fluid/lite/kernels/x86/CMakeLists.txt +++ b/paddle/fluid/lite/kernels/x86/CMakeLists.txt @@ -18,6 +18,18 @@ cc_library(concat_compute_x86 SRCS concat_compute.cc DEPS ${lite_kernel_deps} ) cc_library(conv_compute_x86 SRCS conv_compute.cc DEPS ${lite_kernel_deps} blas im2col vol2col) cc_library(pool_compute_x86 SRCS pool_compute.cc DEPS ${lite_kernel_deps} pooling) +lite_cc_test(test_fc_compute_x86 SRCS fc_compute_test.cc DEPS fc_compute_x86) +lite_cc_test(test_conv2d_compute_x86 SRCS conv_compute_test.cc DEPS conv_compute_x86) +lite_cc_test(test_pool2d_compute_x86 SRCS pool_compute_test.cc DEPS pool_compute_x86) +lite_cc_test(test_concat_compute_x86 SRCS concat_compute_test.cc DEPS concat_compute_x86) +lite_cc_test(test_softmax_compute_x86 SRCS softmax_compute_test.cc DEPS softmax_compute_x86) +lite_cc_test(test_elementwise_compute_x86 SRCS elementwise_compute_test.cc DEPS elementwise_compute_x86) +lite_cc_test(test_relu_compute_x86 SRCS relu_compute_test.cc DEPS relu_compute_x86) +lite_cc_test(test_mul_compute_x86 SRCS mul_compute_test.cc DEPS mul_compute_x86 operator) +lite_cc_test(test_scale_compute_x86 SRCS scale_compute_test.cc DEPS scale_compute_x86) +lite_cc_test(test_dropout_compute_x86 SRCS dropout_compute_test.cc DEPS dropout_compute_x86) + + set(x86_kernels activation_compute_x86 elementwise_compute_x86 diff --git a/paddle/fluid/lite/kernels/x86/concat_compute.cc b/paddle/fluid/lite/kernels/x86/concat_compute.cc index 23ae8ca505559cb1fc45b5976f6203a86128ddf0..4e1872951d74335a3bad97597a0104fe54f52d25 100644 --- a/paddle/fluid/lite/kernels/x86/concat_compute.cc +++ b/paddle/fluid/lite/kernels/x86/concat_compute.cc @@ -12,88 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/types.h" -#include "paddle/fluid/operators/strided_memcpy.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -class ConcatCompute : public KernelLite { - public: - using param_t = operators::ConcatParam; - - void Run() override { - auto& param = *param_.get_mutable(); - int64_t axis = static_cast(param.axis); - auto out = param.output; - - if (axis == 0 && param.x.size() < 10) { - size_t output_offset = 0; - for (auto* in : param.x) { - if (!in || in->dims().production() == 0UL) { - continue; - } - auto in_stride = framework::stride_numel(in->dims().data()); - auto out_stride = framework::stride_numel(out->dims().data()); - paddle::operators::StridedNumelCopyWithAxis( - platform::CPUDeviceContext(), axis, - out->mutable_data() + output_offset, out_stride, in->data(), - in_stride, in_stride[axis]); - - output_offset += in_stride[axis]; - } - } else { - std::vector inputs; - for (size_t j = 0; j < param.x.size(); ++j) { - if (param.x[j] && param.x[j]->dims().production() > 0) { - inputs.push_back(*param.x[j]); - } else { - continue; - } - } - - int num = inputs.size(); - int rows = 1; - auto dim_0 = inputs[0].dims(); - for (int i = 0; i < axis; ++i) { - rows *= dim_0[i]; - } - int out_rows = rows, out_cols = 0; - - std::vector input_cols(inputs.size()); - for (int i = 0; i < num; ++i) { - int t_cols = inputs[i].dims().production() / rows; - out_cols += t_cols; - input_cols[i] = t_cols; - } - // computation - auto output_data = param.output->template mutable_data(); - int col_idx = 0; - for (int j = 0; j < num; ++j) { - int col_len = input_cols[j]; - auto input_data = inputs[j].data(); - for (int k = 0; k < out_rows; ++k) { - std::memcpy(output_data + k * out_cols + col_idx, - input_data + k * col_len, sizeof(T) * col_len); - } - col_idx += col_len; - } - } - } - - virtual ~ConcatCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/concat_compute.h" REGISTER_LITE_KERNEL(concat, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::ConcatCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/concat_compute.h b/paddle/fluid/lite/kernels/x86/concat_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..67c2f40f2c197ca3fb1c09fca4a9145a27c4a6fd --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/concat_compute.h @@ -0,0 +1,98 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/types.h" +#include "paddle/fluid/operators/strided_memcpy.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class ConcatCompute : public KernelLite { + public: + using param_t = operators::ConcatParam; + + void Run() override { + auto& param = *param_.get_mutable(); + int64_t axis = static_cast(param.axis); + auto out = param.output; + + if (axis == 0 && param.x.size() < 10) { + size_t output_offset = 0; + for (auto* in : param.x) { + if (!in || in->dims().production() == 0UL) { + continue; + } + auto in_stride = framework::stride_numel(in->dims().data()); + auto out_stride = framework::stride_numel(out->dims().data()); + paddle::operators::StridedNumelCopyWithAxis( + platform::CPUDeviceContext(), axis, + out->mutable_data() + output_offset, out_stride, in->data(), + in_stride, in_stride[axis]); + + output_offset += in_stride[axis]; + } + } else { + std::vector inputs; + for (size_t j = 0; j < param.x.size(); ++j) { + if (param.x[j] && param.x[j]->dims().production() > 0) { + inputs.push_back(*param.x[j]); + } else { + continue; + } + } + + int num = inputs.size(); + int rows = 1; + auto dim_0 = inputs[0].dims(); + for (int i = 0; i < axis; ++i) { + rows *= dim_0[i]; + } + int out_rows = rows, out_cols = 0; + + std::vector input_cols(inputs.size()); + for (int i = 0; i < num; ++i) { + int t_cols = inputs[i].dims().production() / rows; + out_cols += t_cols; + input_cols[i] = t_cols; + } + // computation + auto output_data = param.output->template mutable_data(); + int col_idx = 0; + for (int j = 0; j < num; ++j) { + int col_len = input_cols[j]; + auto input_data = inputs[j].data(); + for (int k = 0; k < out_rows; ++k) { + std::memcpy(output_data + k * out_cols + col_idx, + input_data + k * col_len, sizeof(T) * col_len); + } + col_idx += col_len; + } + } + } + + virtual ~ConcatCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/concat_compute_test.cc b/paddle/fluid/lite/kernels/x86/concat_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..aa50dae9eb9e2bd2aef980cce6546972f5cdf89e --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/concat_compute_test.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/concat_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(concat_x86, retrive_op) { + auto concat = + KernelRegistry::Global().Create( + "concat"); + ASSERT_FALSE(concat.empty()); + ASSERT_TRUE(concat.front()); +} + +TEST(concat_x86, init) { + ConcatCompute concat; + ASSERT_EQ(concat.precision(), PRECISION(kFloat)); + ASSERT_EQ(concat.target(), TARGET(kX86)); +} + +TEST(concat_x86, run_test) { + lite::Tensor x1, x2, out; + constexpr int batch_size = 1; + std::vector x1_shape{batch_size, 1, 3, 3}; + x1.Resize(lite::DDim(x1_shape)); + std::vector x2_shape{batch_size, 1, 3, 3}; + x2.Resize(lite::DDim(x2_shape)); + + std::vector x = {&x1, &x2}; + + std::vector out_shape{batch_size, 2, 3, 3}; + out.Resize(lite::DDim(out_shape)); + + auto x1_data = x1.mutable_data(); + auto x2_data = x2.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x1.dims().production(); i++) { + x1_data[i] = 1; + x2_data[i] = 2; + } + + ConcatCompute concat; + operators::ConcatParam param; + param.x = x; + param.output = &out; + param.axis = 1; + + concat.SetParam(param); + concat.Run(); + + std::cout << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + std::cout << out_data[i] << " "; + } + std::cout << std::endl; +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(concat, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/conv_compute.cc b/paddle/fluid/lite/kernels/x86/conv_compute.cc index b29161c1c60a3b628a97c2ad015ee3dcb1c601aa..7b674a038de00327443ee68196ee6a83e7923cea 100644 --- a/paddle/fluid/lite/kernels/x86/conv_compute.cc +++ b/paddle/fluid/lite/kernels/x86/conv_compute.cc @@ -12,144 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/types.h" -#include "paddle/fluid/lite/operators/conv_op.h" -#include "paddle/fluid/operators/math/blas.h" -#include "paddle/fluid/operators/math/depthwise_conv.h" -#include "paddle/fluid/operators/math/im2col.h" -#include "paddle/fluid/operators/math/vol2col.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -inline bool IsExpand(const std::vector& filter_dim, - const std::vector& strides, - const std::vector& paddings, - const std::vector& dilations) { - bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; - for (size_t j = 0; j < strides.size(); ++j) { - filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); - strides_1 = strides_1 && (strides[j] == 1); - padding_0 = padding_0 && (paddings[j] == 0); - dilation_1 = dilation_1 && (dilations[j] == 1); - } - return !(filter_1 && strides_1 && padding_0 && dilation_1); -} - -template -class Conv2dCompute : public KernelLite { - public: - using param_t = operators::ConvParam; - void Run() override { - auto& param = *param_.get_mutable(); - lite::Tensor filter = *param.filter; - param.output->template mutable_data(); - - const int batch_size = static_cast(param.x->dims()[0]); - - std::vector filter_shape_vec(filter.dims().Vectorize()); - std::vector output_shape_vec(param.output->dims().Vectorize()); - - size_t data_dim = filter_shape_vec.size() - 2; - std::vector col_shape_vec(1 + 2 * data_dim); - col_shape_vec[0] = param.x->dims()[1] / param.groups; - for (size_t j = 0; j < data_dim; ++j) { - col_shape_vec[j + 1] = filter_shape_vec[j + 2]; - col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; - } - lite::DDim col_shape(col_shape_vec); - lite::DDim col_matrix_shape = col_shape.Flattern2D(data_dim + 1); - bool is_expand = IsExpand(filter_shape_vec, param.strides, param.paddings, - param.dilations); - - lite::Tensor col; - lite::Tensor col_matrix; - if (is_expand) { - col.Resize(col_shape); - col.mutable_data(); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } - lite::DDim input_shape = param.x->dims().Slice(1, param.x->dims().size()); - - lite::DDim filter_matrix_shape(std::vector{ - filter.dims()[0], filter.dims().production() / filter.dims()[0]}); - filter.Resize(filter_matrix_shape); - - lite::DDim output_matrix_shape(std::vector{ - param.output->dims()[1], - param.output->dims().production() / - (param.output->dims()[0] * param.output->dims()[1])}); - - int in_step = static_cast(param.x->dims()[1]) / param.groups; - int out_step = static_cast(param.output->dims()[1]) / param.groups; - - paddle::operators::math::Vol2ColFunctor - vol2col; - paddle::operators::math::Im2ColFunctor< - paddle::operators::math::ColFormat::kCFO, platform::CPUDeviceContext, T> - im2col; - auto blas = paddle::operators::math::GetBlas( - platform::CPUDeviceContext()); - for (int i = 0; i < batch_size; i++) { - lite::Tensor in_batch; - in_batch.ShareDataWith( - param.x->raw_tensor().Slice(i, i + 1).Resize(input_shape.data())); - lite::Tensor out_batch; - out_batch.ShareDataWith(param.output->raw_tensor().Slice(i, i + 1).Resize( - output_matrix_shape.data())); - - for (int g = 0; g < param.groups; g++) { - lite::Tensor in_slice; - in_slice.ShareDataWith( - in_batch.raw_tensor().Slice(g * in_step, (g + 1) * in_step)); - - if (!is_expand) { - col.ShareDataWith(in_slice); - col_matrix.ShareDataWith(col); - col_matrix.Resize(col_matrix_shape); - } else if (data_dim == 2U) { - // im2col - im2col(platform::CPUDeviceContext(), in_slice.raw_tensor(), - param.dilations, param.strides, - std::vector{param.paddings[0], param.paddings[1], - param.paddings[0], param.paddings[1]}, - &(col.raw_tensor())); - } else if (data_dim == 3U) { - // vol2col - vol2col(platform::CPUDeviceContext(), in_slice.raw_tensor(), - param.dilations, param.strides, param.paddings, - &(col.raw_tensor())); - } - - // gemm - lite::Tensor out_slice; - out_slice.ShareDataWith( - out_batch.raw_tensor().Slice(g * out_step, (g + 1) * out_step)); - lite::Tensor filter_slice; - filter_slice.ShareDataWith( - filter.raw_tensor().Slice(g * out_step, (g + 1) * out_step)); - blas.MatMul(filter_slice.raw_tensor(), false, col_matrix.raw_tensor(), - false, T(1.0), &(out_slice.raw_tensor()), T(0.0)); - } - } - } - - virtual ~Conv2dCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/conv_compute.h" REGISTER_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::Conv2dCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/conv_compute.h b/paddle/fluid/lite/kernels/x86/conv_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..4b3087792921ac689db3906160663e75ef0c7ed0 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/conv_compute.h @@ -0,0 +1,153 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/types.h" +#include "paddle/fluid/lite/operators/conv_op.h" +#include "paddle/fluid/operators/math/blas.h" +#include "paddle/fluid/operators/math/depthwise_conv.h" +#include "paddle/fluid/operators/math/im2col.h" +#include "paddle/fluid/operators/math/vol2col.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +inline bool IsExpand(const std::vector& filter_dim, + const std::vector& strides, + const std::vector& paddings, + const std::vector& dilations) { + bool filter_1 = true, strides_1 = true, padding_0 = true, dilation_1 = true; + for (size_t j = 0; j < strides.size(); ++j) { + filter_1 = filter_1 && (static_cast(filter_dim[j + 2]) == 1); + strides_1 = strides_1 && (strides[j] == 1); + padding_0 = padding_0 && (paddings[j] == 0); + dilation_1 = dilation_1 && (dilations[j] == 1); + } + return !(filter_1 && strides_1 && padding_0 && dilation_1); +} + +template +class Conv2dCompute : public KernelLite { + public: + using param_t = operators::ConvParam; + void Run() override { + auto& param = *param_.get_mutable(); + lite::Tensor filter = *param.filter; + param.output->template mutable_data(); + + const int batch_size = static_cast(param.x->dims()[0]); + + std::vector filter_shape_vec(filter.dims().Vectorize()); + std::vector output_shape_vec(param.output->dims().Vectorize()); + + size_t data_dim = filter_shape_vec.size() - 2; + std::vector col_shape_vec(1 + 2 * data_dim); + col_shape_vec[0] = param.x->dims()[1] / param.groups; + for (size_t j = 0; j < data_dim; ++j) { + col_shape_vec[j + 1] = filter_shape_vec[j + 2]; + col_shape_vec[j + 1 + data_dim] = output_shape_vec[j + 2]; + } + lite::DDim col_shape(col_shape_vec); + lite::DDim col_matrix_shape = col_shape.Flattern2D(data_dim + 1); + bool is_expand = IsExpand(filter_shape_vec, param.strides, param.paddings, + param.dilations); + + lite::Tensor col; + lite::Tensor col_matrix; + if (is_expand) { + col.Resize(col_shape); + col.mutable_data(); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } + lite::DDim input_shape = param.x->dims().Slice(1, param.x->dims().size()); + + lite::DDim filter_matrix_shape(std::vector{ + filter.dims()[0], filter.dims().production() / filter.dims()[0]}); + filter.Resize(filter_matrix_shape); + + lite::DDim output_matrix_shape(std::vector{ + param.output->dims()[1], + param.output->dims().production() / + (param.output->dims()[0] * param.output->dims()[1])}); + + int in_step = static_cast(param.x->dims()[1]) / param.groups; + int out_step = static_cast(param.output->dims()[1]) / param.groups; + + paddle::operators::math::Vol2ColFunctor + vol2col; + paddle::operators::math::Im2ColFunctor< + paddle::operators::math::ColFormat::kCFO, platform::CPUDeviceContext, T> + im2col; + auto blas = paddle::operators::math::GetBlas( + platform::CPUDeviceContext()); + for (int i = 0; i < batch_size; i++) { + lite::Tensor in_batch; + in_batch.ShareDataWith( + param.x->raw_tensor().Slice(i, i + 1).Resize(input_shape.data())); + lite::Tensor out_batch; + out_batch.ShareDataWith(param.output->raw_tensor().Slice(i, i + 1).Resize( + output_matrix_shape.data())); + + for (int g = 0; g < param.groups; g++) { + lite::Tensor in_slice; + in_slice.ShareDataWith( + in_batch.raw_tensor().Slice(g * in_step, (g + 1) * in_step)); + + if (!is_expand) { + col.ShareDataWith(in_slice); + col_matrix.ShareDataWith(col); + col_matrix.Resize(col_matrix_shape); + } else if (data_dim == 2U) { + // im2col + im2col(platform::CPUDeviceContext(), in_slice.raw_tensor(), + param.dilations, param.strides, + std::vector{param.paddings[0], param.paddings[1], + param.paddings[0], param.paddings[1]}, + &(col.raw_tensor())); + } else if (data_dim == 3U) { + // vol2col + vol2col(platform::CPUDeviceContext(), in_slice.raw_tensor(), + param.dilations, param.strides, param.paddings, + &(col.raw_tensor())); + } + + // gemm + lite::Tensor out_slice; + out_slice.ShareDataWith( + out_batch.raw_tensor().Slice(g * out_step, (g + 1) * out_step)); + lite::Tensor filter_slice; + filter_slice.ShareDataWith( + filter.raw_tensor().Slice(g * out_step, (g + 1) * out_step)); + blas.MatMul(filter_slice.raw_tensor(), false, col_matrix.raw_tensor(), + false, T(1.0), &(out_slice.raw_tensor()), T(0.0)); + } + } + } + + virtual ~Conv2dCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/conv_compute_test.cc b/paddle/fluid/lite/kernels/x86/conv_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..be57153b4b55a1b68cbb0663d4b6dd0a15de5224 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/conv_compute_test.cc @@ -0,0 +1,92 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/conv_compute.h" +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(conv_x86, retrive_op) { + auto conv2d = + KernelRegistry::Global().Create( + "conv2d"); + ASSERT_FALSE(conv2d.empty()); + ASSERT_TRUE(conv2d.front()); +} + +TEST(conv2d_x86, init) { + Conv2dCompute conv2d; + ASSERT_EQ(conv2d.precision(), PRECISION(kFloat)); + ASSERT_EQ(conv2d.target(), TARGET(kX86)); +} + +TEST(conv2d_x86, run_test) { + lite::Tensor x, filter, b, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 3, 3}; + x.Resize(lite::DDim(x_shape)); + std::vector filter_shape{1, 3, 3, 3}; + filter.Resize(lite::DDim(filter_shape)); + std::vector b_shape{1, 3, 1, 1}; + b.Resize(lite::DDim(b_shape)); + std::vector out_shape{batch_size, 1, 1, 1}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto filter_data = filter.mutable_data(); + auto b_data = b.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = 1; + } + for (int64_t i = 0; i < filter.dims().production(); i++) { + filter_data[i] = 1; + } + for (int64_t i = 0; i < b.dims().production(); i++) { + b_data[i] = 0; + } + + Conv2dCompute conv2d; + operators::ConvParam param; + + param.x = &x; + param.filter = &filter; + param.bias = &b; + param.output = &out; + param.strides = {1, 1}; + param.paddings = {0, 0}; + param.groups = 1; + param.dilations = {1, 1}; + + conv2d.SetParam(param); + conv2d.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i] << " "; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(conv2d, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/dropout_compute.cc b/paddle/fluid/lite/kernels/x86/dropout_compute.cc index d762ec2a06f8b4e0b2842e58625534dc92ca96a1..6b68e1da310996903643d6dc12abfc5a02864e74 100644 --- a/paddle/fluid/lite/kernels/x86/dropout_compute.cc +++ b/paddle/fluid/lite/kernels/x86/dropout_compute.cc @@ -12,72 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -using EigenMatrix = framework::EigenMatrix; - -template -class DropoutCompute : public KernelLite { - public: - using param_t = operators::DropoutParam; - void Run() override { - auto& param = *param_.get_mutable(); - const auto* x_data = param.x->data(); - auto* out_data = param.output->template mutable_data(); - if (!param.is_test) { - auto* mask_data = param.mask->template mutable_data(); - std::random_device rnd; - std::minstd_rand engine; - int seed = param.fix_seed ? param.seed : rnd(); - engine.seed(seed); - std::uniform_real_distribution dist(0, 1); - - size_t size = framework::product(param.mask->dims().data()); - for (size_t i = 0; i < size; ++i) { - if (dist(engine) < param.dropout_prob) { - mask_data[i] = 0; - out_data[i] = 0; - } else { - if (param.dropout_implementation == "upscale_in_train") { - mask_data[i] = 1.0f / static_cast(1.0f - param.dropout_prob); - out_data[i] = x_data[i] / static_cast(1.0f - param.dropout_prob); - } else { - mask_data[i] = 1; - out_data[i] = x_data[i]; - } - } - } - } else { - auto X = EigenMatrix::Reshape(param.x->raw_tensor(), 1); - auto Y = EigenMatrix::Reshape(param.output->raw_tensor(), 1); - auto& place = *platform::CPUDeviceContext().eigen_device(); - if (param.dropout_implementation == "upscale_in_train") { - Y.device(place) = X; - } else { - Y.device(place) = X * static_cast(1.0f - param.dropout_prob); - } - } - } - - virtual ~DropoutCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/dropout_compute.h" REGISTER_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::DropoutCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/dropout_compute.h b/paddle/fluid/lite/kernels/x86/dropout_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..ee8b51619a54594b390751c6d2c7a0c4f9931483 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/dropout_compute.h @@ -0,0 +1,81 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +using EigenMatrix = framework::EigenMatrix; + +template +class DropoutCompute : public KernelLite { + public: + using param_t = operators::DropoutParam; + void Run() override { + auto& param = *param_.get_mutable(); + const auto* x_data = param.x->data(); + auto* out_data = param.output->template mutable_data(); + if (!param.is_test) { + auto* mask_data = param.mask->template mutable_data(); + std::random_device rnd; + std::minstd_rand engine; + int seed = param.fix_seed ? param.seed : rnd(); + engine.seed(seed); + std::uniform_real_distribution dist(0, 1); + + size_t size = framework::product(param.mask->dims().data()); + for (size_t i = 0; i < size; ++i) { + if (dist(engine) < param.dropout_prob) { + mask_data[i] = 0; + out_data[i] = 0; + } else { + if (param.dropout_implementation == "upscale_in_train") { + mask_data[i] = 1.0f / static_cast(1.0f - param.dropout_prob); + out_data[i] = x_data[i] / static_cast(1.0f - param.dropout_prob); + } else { + mask_data[i] = 1; + out_data[i] = x_data[i]; + } + } + } + } else { + auto X = EigenMatrix::Reshape(param.x->raw_tensor(), 1); + auto Y = EigenMatrix::Reshape(param.output->raw_tensor(), 1); + auto& place = *platform::CPUDeviceContext().eigen_device(); + if (param.dropout_implementation == "upscale_in_train") { + Y.device(place) = X; + } else { + Y.device(place) = X * static_cast(1.0f - param.dropout_prob); + } + } + } + + virtual ~DropoutCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/dropout_compute_test.cc b/paddle/fluid/lite/kernels/x86/dropout_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..522877857c7adc47a258e24fc330f457520f8f79 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/dropout_compute_test.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/dropout_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(dropout_x86, retrive_op) { + auto dropout = + KernelRegistry::Global().Create( + "dropout"); + ASSERT_FALSE(dropout.empty()); + ASSERT_TRUE(dropout.front()); +} + +TEST(dropout_x86, init) { + DropoutCompute dropout; + ASSERT_EQ(dropout.precision(), PRECISION(kFloat)); + ASSERT_EQ(dropout.target(), TARGET(kX86)); +} + +TEST(dropout_x86, run_test) { + lite::Tensor x, y, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 2, 2}; + x.Resize(lite::DDim(x_shape)); + std::vector out_shape{batch_size, 3, 2, 2}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = static_cast(i); + } + // DropoutCompute dropout; + DropoutCompute dropout; + operators::DropoutParam param; + + param.x = &x; + param.dropout_prob = 0.25; + param.is_test = true; + param.fix_seed = true; + param.output = &out; + + dropout.SetParam(param); + dropout.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(dropout, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc index 8e2ea92d6de24eb5ef58b5ebbdded90b99c1b6b8..5024e49866ff8dd51cc8963af905066f6dfff8a7 100644 --- a/paddle/fluid/lite/kernels/x86/elementwise_compute.cc +++ b/paddle/fluid/lite/kernels/x86/elementwise_compute.cc @@ -12,113 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/operators/activation_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_op.h" -#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" +#include "paddle/fluid/lite/kernels/x86/elementwise_compute.h" -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -struct SubFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { return a - b; } -}; - -template -struct AddFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { return a + b; } -}; - -template -class ElementwiseSubCompute - : public KernelLite { - public: - using param_t = operators::ElementwiseParam; - - void Run() override { - auto& param = *param_.get_mutable(); - auto& context = ctx_->As(); - CHECK(context.x86_device_context()); - - param.Out->template mutable_data(); - paddle::operators::ElementwiseComputeEx, - platform::CPUDeviceContext, T>( - *context.x86_execution_context(), ¶m.X->raw_tensor(), - ¶m.Y->raw_tensor(), param.axis, SubFunctor(), - ¶m.Out->raw_tensor()); - } - - virtual ~ElementwiseSubCompute() = default; -}; - -template -struct SubGradDX { - T operator()(T x, T y, T out, T dout) const { return dout; } -}; - -template -struct SubGradDY { - T operator()(T x, T y, T out, T dout) const { return -dout; } -}; - -template -class ElementwiseSubGradCompute - : public KernelLite { - public: - using param_t = operators::ElementwiseGradParam; - void Run() override { - auto& param = *param_.get_mutable(); - auto& context = ctx_->As(); - CHECK(context.x86_device_context()); - - param.X_grad->template mutable_data(); - param.Y_grad->template mutable_data(); - // skip out, x, y - auto dout = param.Out_grad->raw_tensor(); - auto dx = param.X_grad->raw_tensor(); - auto dy = param.Y_grad->raw_tensor(); - auto& skip = dout; - paddle::operators::ElemwiseExplicitGradCompute< - platform::CPUDeviceContext, T, SubGradDX, SubGradDY>( - *context.x86_execution_context(), skip, skip, skip, dout, param.axis, - &dx, &dy, SubGradDX(), SubGradDY()); - } - - virtual ~ElementwiseSubGradCompute() = default; -}; - -template -class ElementwiseAddCompute - : public KernelLite { - public: - using param_t = operators::ElementwiseParam; - void Run() override { - auto& param = *param_.get_mutable(); - auto& context = ctx_->As(); - CHECK(context.x86_device_context()); - param.Out->template mutable_data(); - paddle::operators::ElementwiseComputeEx, - platform::CPUDeviceContext, T>( - *context.x86_execution_context(), ¶m.X->raw_tensor(), - ¶m.Y->raw_tensor(), param.axis, AddFunctor(), - ¶m.Out->raw_tensor()); - } - - virtual ~ElementwiseAddCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle - -// float REGISTER_LITE_KERNEL(elementwise_sub, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::ElementwiseSubCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/elementwise_compute.h b/paddle/fluid/lite/kernels/x86/elementwise_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..5e46bf8d4525de30b7308d54b30bf9d71b9f2921 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/elementwise_compute.h @@ -0,0 +1,120 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/operators/activation_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op.h" +#include "paddle/fluid/operators/elementwise/elementwise_op_function.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +struct SubFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return a - b; } +}; + +template +struct AddFunctor { + inline HOSTDEVICE T operator()(T a, T b) const { return a + b; } +}; + +template +class ElementwiseSubCompute + : public KernelLite { + public: + using param_t = operators::ElementwiseParam; + + void Run() override { + auto& param = *param_.get_mutable(); + auto& context = ctx_->As(); + CHECK(context.x86_device_context()); + + param.Out->template mutable_data(); + paddle::operators::ElementwiseComputeEx, + platform::CPUDeviceContext, T>( + *context.x86_execution_context(), ¶m.X->raw_tensor(), + ¶m.Y->raw_tensor(), param.axis, SubFunctor(), + ¶m.Out->raw_tensor()); + } + + virtual ~ElementwiseSubCompute() = default; +}; + +template +struct SubGradDX { + T operator()(T x, T y, T out, T dout) const { return dout; } +}; + +template +struct SubGradDY { + T operator()(T x, T y, T out, T dout) const { return -dout; } +}; + +template +class ElementwiseSubGradCompute + : public KernelLite { + public: + using param_t = operators::ElementwiseGradParam; + void Run() override { + auto& param = *param_.get_mutable(); + auto& context = ctx_->As(); + CHECK(context.x86_device_context()); + + param.X_grad->template mutable_data(); + param.Y_grad->template mutable_data(); + // skip out, x, y + auto dout = param.Out_grad->raw_tensor(); + auto dx = param.X_grad->raw_tensor(); + auto dy = param.Y_grad->raw_tensor(); + auto& skip = dout; + paddle::operators::ElemwiseExplicitGradCompute< + platform::CPUDeviceContext, T, SubGradDX, SubGradDY>( + *context.x86_execution_context(), skip, skip, skip, dout, param.axis, + &dx, &dy, SubGradDX(), SubGradDY()); + } + + virtual ~ElementwiseSubGradCompute() = default; +}; + +template +class ElementwiseAddCompute + : public KernelLite { + public: + using param_t = operators::ElementwiseParam; + void Run() override { + auto& param = *param_.get_mutable(); + auto& context = ctx_->As(); + CHECK(context.x86_device_context()); + param.Out->template mutable_data(); + paddle::operators::ElementwiseComputeEx, + platform::CPUDeviceContext, T>( + *context.x86_execution_context(), ¶m.X->raw_tensor(), + ¶m.Y->raw_tensor(), param.axis, AddFunctor(), + ¶m.Out->raw_tensor()); + } + + virtual ~ElementwiseAddCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/elementwise_compute_test.cc b/paddle/fluid/lite/kernels/x86/elementwise_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..abb28e2bb5868e6188c13c6ae145de74881801ae --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/elementwise_compute_test.cc @@ -0,0 +1,86 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/elementwise_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(elementwise_add_x86, retrive_op) { + auto elementwise_add = + KernelRegistry::Global().Create( + "elementwise_add"); + ASSERT_FALSE(elementwise_add.empty()); + ASSERT_TRUE(elementwise_add.front()); +} + +TEST(elementwise_add_x86, init) { + ElementwiseAddCompute elementwise_add; + ASSERT_EQ(elementwise_add.precision(), PRECISION(kFloat)); + ASSERT_EQ(elementwise_add.target(), TARGET(kX86)); +} + +TEST(elementwise_add_x86, run_test) { + lite::Tensor x, y, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 2, 2}; + x.Resize(lite::DDim(x_shape)); + std::vector y_shape{batch_size, 3, 2, 2}; + y.Resize(lite::DDim(y_shape)); + std::vector out_shape{batch_size, 3, 2, 2}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto y_data = y.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = 1; + } + for (int64_t i = 0; i < y.dims().production(); i++) { + y_data[i] = 2; + } + + // ElementwiseAddCompute elementwise_add; + ElementwiseAddCompute elementwise_add; + operators::ElementwiseParam param; + + param.X = &x; + param.Y = &y; + param.Out = &out; + + std::unique_ptr ctx(new KernelContext); + ctx->As(); + elementwise_add.SetParam(param); + elementwise_add.SetContext(std::move(ctx)); + elementwise_add.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(elementwise_add, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/fc_compute.cc b/paddle/fluid/lite/kernels/x86/fc_compute.cc index dad37febc80433f0cf3a6859c985e22a5425b405..4d5399a90b2885046cb08948e32d1bb864876728 100644 --- a/paddle/fluid/lite/kernels/x86/fc_compute.cc +++ b/paddle/fluid/lite/kernels/x86/fc_compute.cc @@ -12,89 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_lite.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/type_system.h" -#include "paddle/fluid/lite/operators/fc_op.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -void fc_compute_eigen(const T* x, int x_h, int x_w, // - const T* w, int w_h, int w_w, // - const T* b, // - T* out) { - using matrix_t = - Eigen::Matrix; - - Eigen::Map X(x, x_h, x_w); - Eigen::Map W(w, w_h, w_w); - Eigen::Map Out(out, x_h, w_w); - - Out = X * W; - - if (b) { - Eigen::Map> B(b, w_w); - Out = Out.array().rowwise() + B.transpose().array(); - } -} - -template -void fc_compute_naive(const T* x, int x_h, int x_w, // - const T* w, int w_h, int w_w, // - const T* b, // - T* out) { - CHECK_EQ(x_w, w_h); - // out shape: (x_h, w_w) - memset(out, 0, x_h * w_w * sizeof(T)); - for (int i = 0; i < x_h; i++) { - for (int j = 0; j < w_w; j++) { - T tmp = static_cast(0); - for (int k = 0; k < x_w; k++) { - tmp += x[i * x_w + k] * w[k * w_w + j]; - } - out[i * w_w + j] = tmp + b[j]; - } - } -} - -template -class FcCompute : public KernelLite { - public: - using param_t = operators::FcParam; - - void Run() override { - auto& param = *param_.get_mutable(); - CHECK_GE(param.input->dims().size(), 2UL); - CHECK_EQ(param.output->dims().size(), 2UL); - - fc_compute_eigen( - param.input->data(), // x - param.input->dims().Slice(0, param.in_num_col_dims).production(), - param.input->dims() - .Slice(param.in_num_col_dims, param.input->dims().size()) - .production(), - param.w->data(), // w - param.w->dims()[0], // w_h - param.w->dims()[1], // w_w - param.bias->data(), // b - param.output->mutable_data()); - } - - virtual ~FcCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/fc_compute.h" REGISTER_LITE_KERNEL(fc, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::FcCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/fc_compute.h b/paddle/fluid/lite/kernels/x86/fc_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..dc71ca25601c24ca55b1730edf6bd354eadfddf9 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/fc_compute.h @@ -0,0 +1,98 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" +#include "paddle/fluid/lite/operators/fc_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +void fc_compute_eigen(const T* x, int x_h, int x_w, // + const T* w, int w_h, int w_w, // + const T* b, // + T* out) { + using matrix_t = + Eigen::Matrix; + + Eigen::Map X(x, x_h, x_w); + Eigen::Map W(w, w_h, w_w); + Eigen::Map Out(out, x_h, w_w); + + Out = X * W; + + if (b) { + Eigen::Map> B(b, w_w); + Out = Out.array().rowwise() + B.transpose().array(); + } +} + +template +void fc_compute_naive(const T* x, int x_h, int x_w, // + const T* w, int w_h, int w_w, // + const T* b, // + T* out) { + CHECK_EQ(x_w, w_h); + // out shape: (x_h, w_w) + memset(out, 0, x_h * w_w * sizeof(T)); + for (int i = 0; i < x_h; i++) { + for (int j = 0; j < w_w; j++) { + T tmp = static_cast(0); + for (int k = 0; k < x_w; k++) { + tmp += x[i * x_w + k] * w[k * w_w + j]; + } + out[i * w_w + j] = tmp + b[j]; + } + } +} + +template +class FcCompute : public KernelLite { + public: + using param_t = operators::FcParam; + + void Run() override { + auto& param = *param_.get_mutable(); + CHECK_GE(param.input->dims().size(), 2UL); + CHECK_EQ(param.output->dims().size(), 2UL); + + fc_compute_eigen( + param.input->data(), // x + param.input->dims().Slice(0, param.in_num_col_dims).production(), + param.input->dims() + .Slice(param.in_num_col_dims, param.input->dims().size()) + .production(), + param.w->data(), // w + param.w->dims()[0], // w_h + param.w->dims()[1], // w_w + param.bias->data(), // b + param.output->mutable_data()); + } + + virtual ~FcCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/fc_compute_test.cc b/paddle/fluid/lite/kernels/x86/fc_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..ed6016d341e830c2d859c246dfbca3c0f20c9117 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/fc_compute_test.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include "paddle/fluid/lite/kernels/x86/fc_compute.h" +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(fc_x86, retrive_op) { + auto fc = + KernelRegistry::Global().Create("fc"); + ASSERT_FALSE(fc.empty()); + ASSERT_TRUE(fc.front()); +} + +TEST(fc_x86, init) { + FcCompute fc; + ASSERT_EQ(fc.precision(), PRECISION(kFloat)); + ASSERT_EQ(fc.target(), TARGET(kX86)); +} + +TEST(fc_x86, run_test) { + lite::Tensor x, w, b, out; + constexpr int batch_size = 2; + std::vector x_shape{batch_size, 3}; + x.Resize(lite::DDim(x_shape)); + std::vector w_shape{3, 4}; + w.Resize(lite::DDim(w_shape)); + std::vector b_shape{1, 4}; + b.Resize(lite::DDim(b_shape)); + std::vector out_shape{1, 4}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto w_data = w.mutable_data(); + auto b_data = b.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = static_cast(i); + } + for (int64_t i = 0; i < w.dims().production(); i++) { + w_data[i] = static_cast(i); + } + for (int64_t i = 0; i < b.dims().production(); i++) { + b_data[i] = static_cast(i); + } + + /* lite::x86::math::fc_compute_eigen(x_data, batch_size, 3, // + w_data, 3, 4, // + b_data, ref_data); */ + + // FcCompute fc; + FcCompute fc; + operators::FcParam param; + + param.in_num_col_dims = 1; + param.input = &x; + param.w = &w; + param.bias = &b; + param.output = &out; + param.in_mat_dims = x.dims(); + + // std::unique_ptr ctx(new KernelContext); + // ctx->As(); + fc.SetParam(param); + // fc.SetContext(std::move(ctx)); + fc.Run(); + + VLOG(3) << "output vs ref"; + for (int i = 0; i < out.dims().production(); i++) { + VLOG(3) << out_data[i]; + } + + /* for (int i = 0; i < out.dims().product(); ++i) { + EXPECT_NEAR(out_data[i], ref_data[i], 1e-5); + }*/ +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(fc, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/mul_compute.cc b/paddle/fluid/lite/kernels/x86/mul_compute.cc index ad009893c8a7c78c17218d66790d292a5030535c..01dd2171061c44cab6d9cbeb306473eb5349c89e 100644 --- a/paddle/fluid/lite/kernels/x86/mul_compute.cc +++ b/paddle/fluid/lite/kernels/x86/mul_compute.cc @@ -12,122 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/types.h" -#include "paddle/fluid/operators/math/blas.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -using Tensor = framework::Tensor; - -template -class MulCompute : public KernelLite { - public: - using param_t = operators::MulParam; - - void Run() override { - auto& context = ctx_->As(); - auto& param = *param_.get_mutable(); - CHECK(context.x86_device_context()); - - param.output->template mutable_data(); - - auto* x = ¶m.x->raw_tensor(); - auto* y = ¶m.y->raw_tensor(); - - const Tensor x_matrix = x->dims().size() > 2 ? framework::ReshapeToMatrix( - *x, param.x_num_col_dims) - : *x; - const Tensor y_matrix = y->dims().size() > 2 ? framework::ReshapeToMatrix( - *y, param.y_num_col_dims) - : *y; - - auto* z = ¶m.output->raw_tensor(); - auto z_dim = z->dims(); - if (z_dim.size() != 2) { - z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); - } - - auto blas = paddle::operators::math::GetBlas( - *context.x86_device_context()); - - blas.MatMul(x_matrix, y_matrix, z); - if (z_dim.size() != 2) { - z->Resize(z_dim); - } - } - - virtual ~MulCompute() = default; -}; - -template -class MulGradCompute : public KernelLite { - public: - void Run() override { - auto& context = ctx_->As(); - auto& param = *param_.get_mutable(); - CHECK(context.x86_device_context()); - - auto* x = ¶m.x->raw_tensor(); - auto* y = ¶m.y->raw_tensor(); - auto x_matrix = x->dims().size() > 2 - ? framework::ReshapeToMatrix(*x, param.x_num_col_dims) - : static_cast(*x); - auto y_matrix = y->dims().size() > 2 - ? framework::ReshapeToMatrix(*y, param.y_num_col_dims) - : static_cast(*y); - auto* dout = ¶m.output_grad->raw_tensor(); - - Tensor dout_mat; - dout_mat.ShareDataWith(*dout); - dout_mat.Resize( - {framework::flatten_to_2d(x->dims(), param.x_num_col_dims)[0], - framework::flatten_to_2d(y->dims(), param.y_num_col_dims)[1]}); - - auto* dx = ¶m.x_grad->raw_tensor(); - auto* dy = ¶m.y_grad->raw_tensor(); - - if (dx != nullptr) { - dx->set_lod(x->lod()); - } - if (dy != nullptr) { - dy->set_lod(y->lod()); - } - - auto blas = paddle::operators::math::GetBlas( - *context.x86_device_context()); - if (dx) { - // dx->mutable_data(context.x86_device_context->GetPlace()); - param.x_grad->template mutable_data(); - Tensor dx_matrix = dx->dims().size() > 2 ? framework::ReshapeToMatrix( - *dx, param.x_num_col_dims) - : *dx; - - // dx = dout * y'. dx: M x K, dout : M x N, y : K x N - blas.MatMul(dout_mat, false, y_matrix, true, &dx_matrix); - } - if (dy) { - // dy->yutable_data(context.x86_device_context->GetPlace()); - param.y_grad->template mutable_data(); - Tensor dy_matrix = dy->dims().size() > 2 ? framework::ReshapeToMatrix( - *dy, param.y_num_col_dims) - : *dy; - // dy = x' * dout. dy K x N, dout : M x N, x : M x K - blas.MatMul(x_matrix, true, dout_mat, false, &dy_matrix); - } - } - - virtual ~MulGradCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/mul_compute.h" REGISTER_LITE_KERNEL(mul, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::MulCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/mul_compute.h b/paddle/fluid/lite/kernels/x86/mul_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..96f90842f69f12a1c7baee9f66f055bb21d73126 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/mul_compute.h @@ -0,0 +1,131 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/types.h" +#include "paddle/fluid/operators/math/blas.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +using Tensor = framework::Tensor; + +template +class MulCompute : public KernelLite { + public: + using param_t = operators::MulParam; + + void Run() override { + auto& context = ctx_->As(); + auto& param = *param_.get_mutable(); + CHECK(context.x86_device_context()); + + param.output->template mutable_data(); + + auto* x = ¶m.x->raw_tensor(); + auto* y = ¶m.y->raw_tensor(); + + const Tensor x_matrix = x->dims().size() > 2 ? framework::ReshapeToMatrix( + *x, param.x_num_col_dims) + : *x; + const Tensor y_matrix = y->dims().size() > 2 ? framework::ReshapeToMatrix( + *y, param.y_num_col_dims) + : *y; + + auto* z = ¶m.output->raw_tensor(); + auto z_dim = z->dims(); + if (z_dim.size() != 2) { + z->Resize({x_matrix.dims()[0], y_matrix.dims()[1]}); + } + + auto blas = paddle::operators::math::GetBlas( + *context.x86_device_context()); + + blas.MatMul(x_matrix, y_matrix, z); + if (z_dim.size() != 2) { + z->Resize(z_dim); + } + } + + virtual ~MulCompute() = default; +}; + +template +class MulGradCompute : public KernelLite { + public: + void Run() override { + auto& context = ctx_->As(); + auto& param = *param_.get_mutable(); + CHECK(context.x86_device_context()); + + auto* x = ¶m.x->raw_tensor(); + auto* y = ¶m.y->raw_tensor(); + auto x_matrix = x->dims().size() > 2 + ? framework::ReshapeToMatrix(*x, param.x_num_col_dims) + : static_cast(*x); + auto y_matrix = y->dims().size() > 2 + ? framework::ReshapeToMatrix(*y, param.y_num_col_dims) + : static_cast(*y); + auto* dout = ¶m.output_grad->raw_tensor(); + + Tensor dout_mat; + dout_mat.ShareDataWith(*dout); + dout_mat.Resize( + {framework::flatten_to_2d(x->dims(), param.x_num_col_dims)[0], + framework::flatten_to_2d(y->dims(), param.y_num_col_dims)[1]}); + + auto* dx = ¶m.x_grad->raw_tensor(); + auto* dy = ¶m.y_grad->raw_tensor(); + + if (dx != nullptr) { + dx->set_lod(x->lod()); + } + if (dy != nullptr) { + dy->set_lod(y->lod()); + } + + auto blas = paddle::operators::math::GetBlas( + *context.x86_device_context()); + if (dx) { + // dx->mutable_data(context.x86_device_context->GetPlace()); + param.x_grad->template mutable_data(); + Tensor dx_matrix = dx->dims().size() > 2 ? framework::ReshapeToMatrix( + *dx, param.x_num_col_dims) + : *dx; + + // dx = dout * y'. dx: M x K, dout : M x N, y : K x N + blas.MatMul(dout_mat, false, y_matrix, true, &dx_matrix); + } + if (dy) { + // dy->yutable_data(context.x86_device_context->GetPlace()); + param.y_grad->template mutable_data(); + Tensor dy_matrix = dy->dims().size() > 2 ? framework::ReshapeToMatrix( + *dy, param.y_num_col_dims) + : *dy; + // dy = x' * dout. dy K x N, dout : M x N, x : M x K + blas.MatMul(x_matrix, true, dout_mat, false, &dy_matrix); + } + } + + virtual ~MulGradCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/mul_compute_test.cc b/paddle/fluid/lite/kernels/x86/mul_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..50854d29d0902baf28770a5320daee92408732c2 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/mul_compute_test.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/mul_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(mul_x86, retrive_op) { + auto mul = + KernelRegistry::Global().Create("mul"); + ASSERT_FALSE(mul.empty()); + ASSERT_TRUE(mul.front()); +} + +TEST(mul_x86, init) { + MulCompute mul; + ASSERT_EQ(mul.precision(), PRECISION(kFloat)); + ASSERT_EQ(mul.target(), TARGET(kX86)); +} + +TEST(mul_x86, run_test) { + lite::Tensor x, y, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3}; + x.Resize(lite::DDim(x_shape)); + std::vector y_shape{3, 4}; + y.Resize(lite::DDim(y_shape)); + std::vector out_shape{batch_size, 4}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto y_data = y.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = static_cast(i); + } + for (int64_t i = 0; i < y.dims().production(); i++) { + y_data[i] = static_cast(i); + } + // MulCompute mul; + MulCompute mul; + operators::MulParam param; + + param.x = &x; + param.y = &y; + param.output = &out; + + std::unique_ptr ctx(new KernelContext); + ctx->As(); + mul.SetContext(std::move(ctx)); + mul.SetParam(param); + mul.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(mul, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/pool_compute.cc b/paddle/fluid/lite/kernels/x86/pool_compute.cc index 745c2a787899070de9ab50601b7147c074b3d1c2..ee1bb9dbd5d57a82df6dfdda8997a39d1555d01b 100644 --- a/paddle/fluid/lite/kernels/x86/pool_compute.cc +++ b/paddle/fluid/lite/kernels/x86/pool_compute.cc @@ -12,69 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/types.h" -#include "paddle/fluid/operators/math/math_function.h" -#include "paddle/fluid/operators/math/pooling.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -class PoolCompute : public KernelLite { - public: - using param_t = operators::PoolParam; - void Run() override { - auto& param = *param_.get_mutable(); - if (param.global_pooling) { - for (size_t i = 0; i < param.ksize.size(); ++i) { - param.paddings[i] = 0; - param.ksize[i] = static_cast(param.x->dims()[i + 2]); - } - } - switch (param.ksize.size()) { - case 2: { - if (param.pooling_type == "max") { - paddle::operators::math::Pool2dFunctor< - platform::CPUDeviceContext, paddle::operators::math::MaxPool, - T> - pool2d_forward; - paddle::operators::math::MaxPool pool_process; - pool2d_forward(platform::CPUDeviceContext(), param.x->raw_tensor(), - param.ksize, param.strides, param.paddings, - pool_process, true, false, - &(param.output->raw_tensor())); - } else if (param.pooling_type == "avg") { - paddle::operators::math::Pool2dFunctor< - platform::CPUDeviceContext, paddle::operators::math::AvgPool, - T> - pool2d_forward; - paddle::operators::math::AvgPool pool_process; - pool2d_forward(platform::CPUDeviceContext(), param.x->raw_tensor(), - param.ksize, param.strides, param.paddings, - pool_process, param.exclusive, param.adaptive, - &(param.output->raw_tensor())); - } - } break; - case 3: { - } break; - } - } - virtual ~PoolCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/pool_compute.h" REGISTER_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::PoolCompute, def) - .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))}) + .BindInput("x", {LiteType::GetTensorTy(TARGET(kX86))}) .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))}) .Finalize(); diff --git a/paddle/fluid/lite/kernels/x86/pool_compute.h b/paddle/fluid/lite/kernels/x86/pool_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..d024c5b84e38ee5791982c7f49348cb05c8d41ca --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/pool_compute.h @@ -0,0 +1,75 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/types.h" +#include "paddle/fluid/operators/math/math_function.h" +#include "paddle/fluid/operators/math/pooling.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class PoolCompute : public KernelLite { + public: + using param_t = operators::PoolParam; + void Run() override { + auto& param = *param_.get_mutable(); + if (param.global_pooling) { + for (size_t i = 0; i < param.ksize.size(); ++i) { + param.paddings[i] = 0; + param.ksize[i] = static_cast(param.x->dims()[i + 2]); + } + } + switch (param.ksize.size()) { + case 2: { + if (param.pooling_type == "max") { + paddle::operators::math::Pool2dFunctor< + platform::CPUDeviceContext, paddle::operators::math::MaxPool, + T> + pool2d_forward; + paddle::operators::math::MaxPool pool_process; + pool2d_forward(platform::CPUDeviceContext(), param.x->raw_tensor(), + param.ksize, param.strides, param.paddings, + pool_process, true, false, + &(param.output->raw_tensor())); + } else if (param.pooling_type == "avg") { + paddle::operators::math::Pool2dFunctor< + platform::CPUDeviceContext, paddle::operators::math::AvgPool, + T> + pool2d_forward; + paddle::operators::math::AvgPool pool_process; + pool2d_forward(platform::CPUDeviceContext(), param.x->raw_tensor(), + param.ksize, param.strides, param.paddings, + pool_process, param.exclusive, param.adaptive, + &(param.output->raw_tensor())); + } + } break; + case 3: { + } break; + } + } + virtual ~PoolCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/pool_compute_test.cc b/paddle/fluid/lite/kernels/x86/pool_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..b3d833509109b887b22dba60b2e16ba5698f2b45 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/pool_compute_test.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/pool_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(pool_x86, retrive_op) { + auto pool2d = + KernelRegistry::Global().Create( + "pool2d"); + ASSERT_FALSE(pool2d.empty()); + ASSERT_TRUE(pool2d.front()); +} + +TEST(pool2d_x86, init) { + PoolCompute pool2d; + ASSERT_EQ(pool2d.precision(), PRECISION(kFloat)); + ASSERT_EQ(pool2d.target(), TARGET(kX86)); +} + +TEST(pool2d_x86, run_test) { + lite::Tensor x, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 4, 4}; + x.Resize(lite::DDim(x_shape)); + std::vector out_shape{batch_size, 3, 2, 2}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = static_cast(i); + } + + PoolCompute pool2d; + operators::PoolParam param; + + param.x = &x; + param.output = &out; + param.strides = {2, 2}; + param.paddings = {0, 0}; + param.ksize = {2, 2}; + param.pooling_type = "max"; + + pool2d.SetParam(param); + pool2d.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(pool2d, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/relu_compute.cc b/paddle/fluid/lite/kernels/x86/relu_compute.cc index 52fffb579816cd70a748d59cb3750ebaaadb10c7..326df35beffc53122fc7af4526e2148ead92bdf9 100644 --- a/paddle/fluid/lite/kernels/x86/relu_compute.cc +++ b/paddle/fluid/lite/kernels/x86/relu_compute.cc @@ -12,42 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_lite.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/type_system.h" -#include "paddle/fluid/lite/operators/relu_op.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -class ReluCompute : public KernelLite { - public: - using param_t = operators::ReluParam; - - void Run() override { - auto& param = *param_.get_mutable(); - auto n = param.input->dims().production(); - const float* input = param.input->data(); - float* output = param.output->mutable_data(); - for (int i = 0; i < n; i++) { - output[i] = std::max(0.f, input[i]); - } - } - - virtual ~ReluCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/relu_compute.h" REGISTER_LITE_KERNEL(relu, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::ReluCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/relu_compute.h b/paddle/fluid/lite/kernels/x86/relu_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..89458fad45e2ee8782039d6a04f499932267991b --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/relu_compute.h @@ -0,0 +1,52 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" +#include "paddle/fluid/lite/operators/relu_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +class ReluCompute : public KernelLite { + public: + using param_t = operators::ReluParam; + + void Run() override { + auto& param = *param_.get_mutable(); + auto n = param.input->dims().production(); + const float* input = param.input->data(); + float* output = param.output->mutable_data(); + for (int i = 0; i < n; i++) { + output[i] = std::max(0.f, input[i]); + } + } + + virtual ~ReluCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/relu_compute_test.cc b/paddle/fluid/lite/kernels/x86/relu_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..e868947bbd7383cbb8b0a10d475ff3dbb9a6485f --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/relu_compute_test.cc @@ -0,0 +1,75 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/relu_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(relu_x86, retrive_op) { + auto relu = + KernelRegistry::Global().Create("relu"); + ASSERT_FALSE(relu.empty()); + ASSERT_TRUE(relu.front()); +} + +TEST(relu_x86, init) { + ReluCompute relu; + ASSERT_EQ(relu.precision(), PRECISION(kFloat)); + ASSERT_EQ(relu.target(), TARGET(kX86)); +} + +TEST(relu_x86, run_test) { + lite::Tensor x, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 2, 2}; + x.Resize(lite::DDim(x_shape)); + std::vector out_shape{batch_size, 3, 2, 2}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + int sign = i % 2 == 0 ? 1 : -1; + x_data[i] = static_cast(i * sign); + } + // ReluCompute relu; + ReluCompute relu; + operators::ReluParam param; + + param.input = &x; + param.output = &out; + + relu.SetParam(param); + relu.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(relu, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/scale_compute.cc b/paddle/fluid/lite/kernels/x86/scale_compute.cc index 0135a6f614ef4bee841cf21ce946d82e5d50628a..9a71750cf1ed93f641b74e92cf1590be9dd75377 100644 --- a/paddle/fluid/lite/kernels/x86/scale_compute.cc +++ b/paddle/fluid/lite/kernels/x86/scale_compute.cc @@ -12,48 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_lite.h" -#include "paddle/fluid/lite/core/op_registry.h" -#include "paddle/fluid/lite/core/type_system.h" -#include "paddle/fluid/lite/operators/relu_op.h" - -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -template -void scale_compute(const T* x, T* out, int size, float scale, float bias, - bool bias_before) { - if (bias_before) bias *= scale; - for (int i = 0; i < size; i++) { - out[i] = x[i] * scale + bias; - } -} - -template -class ScaleCompute : public KernelLite { - public: - using param_t = operators::ScaleParam; - - void Run() override { - auto& param = *param_.get_mutable(); - scale_compute(param.x->data(), param.output->mutable_data(), - param.x->dims().production(), param.scale, param.bias, - param.bias_after_scale); - } - - virtual ~ScaleCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/scale_compute.h" REGISTER_LITE_KERNEL(scale, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::ScaleCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/scale_compute.h b/paddle/fluid/lite/kernels/x86/scale_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..dc54cc07bd81faae19e346a66e1f83edaa39b1e0 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/scale_compute.h @@ -0,0 +1,57 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/core/type_system.h" +#include "paddle/fluid/lite/operators/relu_op.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +template +void scale_compute(const T* x, T* out, int size, float scale, float bias, + bool bias_before) { + if (bias_before) bias *= scale; + for (int i = 0; i < size; i++) { + out[i] = x[i] * scale + bias; + } +} + +template +class ScaleCompute : public KernelLite { + public: + using param_t = operators::ScaleParam; + + void Run() override { + auto& param = *param_.get_mutable(); + scale_compute(param.x->data(), param.output->mutable_data(), + param.x->dims().production(), param.scale, param.bias, + param.bias_after_scale); + } + + virtual ~ScaleCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/scale_compute_test.cc b/paddle/fluid/lite/kernels/x86/scale_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..68d0e67cdf6770be1e09d8e5365e5045cd93c6b5 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/scale_compute_test.cc @@ -0,0 +1,76 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/scale_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(scale_x86, retrive_op) { + auto scale = + KernelRegistry::Global().Create("scale"); + ASSERT_FALSE(scale.empty()); + ASSERT_TRUE(scale.front()); +} + +TEST(scale_x86, init) { + ScaleCompute scale; + ASSERT_EQ(scale.precision(), PRECISION(kFloat)); + ASSERT_EQ(scale.target(), TARGET(kX86)); +} + +TEST(scale_x86, run_test) { + lite::Tensor x, y, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 2, 2}; + x.Resize(lite::DDim(x_shape)); + std::vector out_shape{batch_size, 3, 2, 2}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = static_cast(i); + } + // ScaleCompute scale; + ScaleCompute scale; + operators::ScaleParam param; + + param.x = &x; + param.scale = 0.5; + param.bias = 0; + param.output = &out; + + scale.SetParam(param); + scale.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(scale, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/kernels/x86/softmax_compute.cc b/paddle/fluid/lite/kernels/x86/softmax_compute.cc index fe408aa3c842396388ceb385802e75bcfeea94d5..5bdb58b6887f5700ba79e9717cf8dc9b67fa07e0 100644 --- a/paddle/fluid/lite/kernels/x86/softmax_compute.cc +++ b/paddle/fluid/lite/kernels/x86/softmax_compute.cc @@ -12,76 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/operators/math/softmax.h" -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/lite/core/kernel.h" -#include "paddle/fluid/lite/core/op_registry.h" -namespace paddle { -namespace lite { -namespace kernels { -namespace x86 { - -static inline int CanonicalAxis(const int axis, const int rank) { - if (axis < 0) { - return axis + rank; - } - return axis; -} - -static inline int SizeToAxis(const int axis, lite::DDim dims) { - int size = 1; - for (int i = 0; i < axis; i++) { - size *= dims[i]; - } - return size; -} - -static inline int SizeFromAxis(const int axis, lite::DDim dims) { - int size = 1; - for (int i = axis; i < dims.size(); i++) { - size *= dims[i]; - } - return size; -} - -template -class SoftmaxCompute : public KernelLite { - public: - using param_t = operators::SoftmaxParam; - - void Run() override { - auto& param = *param_.get_mutable(); - // auto& context = context_->As(); - CHECK(param.output); - CHECK(param.x); - const int rank = param.x->dims().size(); - const int axis = CanonicalAxis(param.axis, rank); - int axis_dim = param.x->dims()[axis]; - const int n = SizeToAxis(axis, param.x->dims()); - const int d = SizeFromAxis(axis, param.x->dims()); - std::vector shape{n, d}; - - lite::Tensor input_2d, out_2d; - input_2d.ShareDataWith(*param.x); - input_2d.Resize(lite::DDim(shape)); - out_2d.ShareDataWith(*param.output); - out_2d.Resize(lite::DDim(shape)); - - paddle::operators::math::SoftmaxFunctor()( - platform::CPUDeviceContext(), axis_dim, &input_2d.raw_tensor(), - &out_2d.raw_tensor()); - } - - virtual ~SoftmaxCompute() = default; -}; - -} // namespace x86 -} // namespace kernels -} // namespace lite -} // namespace paddle +#include "paddle/fluid/lite/kernels/x86/softmax_compute.h" REGISTER_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, paddle::lite::kernels::x86::SoftmaxCompute, def) diff --git a/paddle/fluid/lite/kernels/x86/softmax_compute.h b/paddle/fluid/lite/kernels/x86/softmax_compute.h new file mode 100644 index 0000000000000000000000000000000000000000..984a56965a822cf567e69a2c12523fefbc94a9d2 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/softmax_compute.h @@ -0,0 +1,86 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#pragma once + +#include +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/lod_tensor.h" +#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/lite/core/kernel.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/operators/math/softmax.h" +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +static inline int CanonicalAxis(const int axis, const int rank) { + if (axis < 0) { + return axis + rank; + } + return axis; +} + +static inline int SizeToAxis(const int axis, lite::DDim dims) { + int size = 1; + for (int i = 0; i < axis; i++) { + size *= dims[i]; + } + return size; +} + +static inline int SizeFromAxis(const int axis, lite::DDim dims) { + int size = 1; + for (size_t i = axis; i < dims.size(); i++) { + size *= dims[i]; + } + return size; +} + +template +class SoftmaxCompute : public KernelLite { + public: + using param_t = operators::SoftmaxParam; + + void Run() override { + auto& param = *param_.get_mutable(); + // auto& context = context_->As(); + CHECK(param.output); + CHECK(param.x); + const int rank = param.x->dims().size(); + const int axis = CanonicalAxis(param.axis, rank); + int axis_dim = param.x->dims()[axis]; + const int n = SizeToAxis(axis, param.x->dims()); + const int d = SizeFromAxis(axis, param.x->dims()); + std::vector shape{n, d}; + + lite::Tensor input_2d, out_2d; + input_2d.ShareDataWith(*param.x); + input_2d.Resize(lite::DDim(shape)); + out_2d.ShareDataWith(*param.output); + out_2d.Resize(lite::DDim(shape)); + + paddle::operators::math::SoftmaxFunctor()( + platform::CPUDeviceContext(), axis_dim, &input_2d.raw_tensor(), + &out_2d.raw_tensor()); + } + + virtual ~SoftmaxCompute() = default; +}; + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/kernels/x86/softmax_compute_test.cc b/paddle/fluid/lite/kernels/x86/softmax_compute_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..daab7e82a5361105f1e40eea8e0418b26e393848 --- /dev/null +++ b/paddle/fluid/lite/kernels/x86/softmax_compute_test.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/kernels/x86/softmax_compute.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace x86 { + +TEST(softmax_x86, retrive_op) { + auto softmax = + KernelRegistry::Global().Create( + "softmax"); + ASSERT_FALSE(softmax.empty()); + ASSERT_TRUE(softmax.front()); +} + +TEST(softmax_x86, init) { + SoftmaxCompute softmax; + ASSERT_EQ(softmax.precision(), PRECISION(kFloat)); + ASSERT_EQ(softmax.target(), TARGET(kX86)); +} + +TEST(softmax_x86, run_test) { + lite::Tensor x, out; + constexpr int batch_size = 1; + std::vector x_shape{batch_size, 3, 3, 3}; + x.Resize(lite::DDim(x_shape)); + std::vector out_shape{batch_size, 3, 3, 3}; + out.Resize(lite::DDim(out_shape)); + + auto x_data = x.mutable_data(); + auto out_data = out.mutable_data(); + + for (int64_t i = 0; i < x.dims().production(); i++) { + x_data[i] = static_cast(i); + } + SoftmaxCompute softmax; + operators::SoftmaxParam param; + + param.x = &x; + param.output = &out; + + softmax.SetParam(param); + softmax.Run(); + + LOG(INFO) << "output: "; + for (int i = 0; i < out.dims().production(); i++) { + LOG(INFO) << out_data[i]; + } +} + +} // namespace x86 +} // namespace kernels +} // namespace lite +} // namespace paddle + +USE_LITE_KERNEL(softmax, kX86, kFloat, kNCHW, def); diff --git a/paddle/fluid/lite/model_parser/CMakeLists.txt b/paddle/fluid/lite/model_parser/CMakeLists.txt index d179e0350ac0edd89912377cc668c6b8888c2638..c539e409a655d73136b3c5c5ebc84ce1ecc697bd 100644 --- a/paddle/fluid/lite/model_parser/CMakeLists.txt +++ b/paddle/fluid/lite/model_parser/CMakeLists.txt @@ -1,7 +1,7 @@ #cc_library(runtime_lite SRCS runtime.cc) #TODO(Superjomn) enable it again. -if(NOT LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) +if(NOT LITE_ON_MOBILE) lite_cc_test(test_model_parser_lite SRCS model_parser_test.cc DEPS model_parser_lite framework_proto_lite ARGS --model_dir=${LITE_MODEL_DIR}/lite_naive_model) @@ -13,18 +13,15 @@ endif() cc_library(compatible_pb_lite SRCS compatible_pb.cc DEPS op_desc_lite framework_proto_lite var_desc_lite) -set(model_parser_deps variable_lite scope_lite ${tensor_lite} scope_lite - target_wrapper_host - compatible_pb_lite - memory_lite - ) -if (LITE_WITH_CUDA) - set(model_parser_deps ${model_parser_deps} target_wrapper_cuda) -endif() -cc_library(model_parser_lite SRCS model_parser.cc DEPS ${model_parser_deps}) +lite_cc_library(model_parser_lite SRCS model_parser.cc DEPS + variable_lite scope_lite ${tensor_lite} scope_lite + target_wrapper_host + compatible_pb_lite + memory_lite + CUDA_DEPS target_wrapper_cuda) lite_cc_test(test_op_desc_lite SRCS op_desc_test.cc DEPS cpp_op_desc_lite op_desc_lite compatible_pb_lite) + add_subdirectory(pb) add_subdirectory(cpp) - diff --git a/paddle/fluid/lite/model_parser/model_parser.cc b/paddle/fluid/lite/model_parser/model_parser.cc index 1b30ca772f872de6fec2b427eee1ad2e96d24576..d69fe4d7f7f61208e8c8a4973dcc648d79ed1cac 100644 --- a/paddle/fluid/lite/model_parser/model_parser.cc +++ b/paddle/fluid/lite/model_parser/model_parser.cc @@ -209,7 +209,7 @@ void TensorToStream(std::ostream &os, const lite::Tensor &tensor) { os.write(out.data(), size); } { // the 3rd field, tensor data - uint64_t size = tensor.data_size(); + uint64_t size = tensor.memory_size(); CHECK_LT(size, std::numeric_limits::max()) << "Index overflow when writing tensor"; diff --git a/paddle/fluid/lite/operators/CMakeLists.txt b/paddle/fluid/lite/operators/CMakeLists.txt index 9269e46e6624770aceab439ef5eb85505643e950..c4347c46f7a070239064e8f1d4a54de51ce3c6e7 100644 --- a/paddle/fluid/lite/operators/CMakeLists.txt +++ b/paddle/fluid/lite/operators/CMakeLists.txt @@ -14,13 +14,15 @@ cc_library(fetch_op_lite SRCS fetch_op.cc DEPS ${op_DEPS}) cc_library(io_copy_op_lite SRCS io_copy_op.cc DEPS ${op_DEPS}) cc_library(activation_ops_lite SRCS activation_ops.cc DEPS ${op_DEPS}) cc_library(elementwise_ops_lite SRCS elementwise_ops.cc DEPS ${op_DEPS}) +cc_library(fusion_elementwise_activation_ops_lite SRCS fusion_elementwise_activation_ops.cc DEPS elementwise_ops_lite ${op_DEPS}) cc_library(mean_op_lite SRCS mean_op.cc DEPS ${op_DEPS}) cc_library(fill_constant_op_lite SRCS fill_constant_op.cc DEPS ${op_DEPS}) #cc_library(sgd_op_lite SRCS sgd_op.cc DEPS ${op_DEPS}) cc_library(op_params_lite SRCS op_params.cc DEPS ${tensor_lite} any_lite framework_proto_lite) cc_library(dropout_op_lite SRCS dropout_op.cc DEPS ${op_DEPS}) cc_library(concat_op_lite SRCS concat_op.cc DEPS ${op_DEPS}) -# cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS}) +cc_library(split_op_lite SRCS split_op.cc DEPS ${op_DEPS}) +cc_library(transpose_op_lite SRCS transpose_op.cc DEPS ${op_DEPS}) cc_library(fake_quant SRCS fake_quantize_moving_avg_max_abs.cc DEPS ${op_DEPS}) cc_library(fake_dequant SRCS fake_dequantize_max_abs.cc DEPS ${op_DEPS}) @@ -38,12 +40,14 @@ set(ops_lite fetch_op_lite io_copy_op_lite elementwise_ops_lite + fusion_elementwise_activation_ops_lite mean_op_lite fill_constant_op_lite activation_ops_lite dropout_op_lite concat_op_lite - #split_op_lite + split_op_lite + transpose_op_lite fake_quant fake_dequant PARENT_SCOPE) @@ -60,3 +64,7 @@ lite_cc_test(test_softmax_op_lite SRCS softmax_op_test.cc DEPS softmax_op_lite m lite_cc_test(test_reshape_op_lite SRCS reshape_op_test.cc DEPS reshape_op_lite memory_lite) lite_cc_test(test_batch_norm_op_lite SRCS batch_norm_op_test.cc DEPS batch_norm_op_lite memory_lite) lite_cc_test(test_concat_op_lite SRCS concat_op_test.cc DEPS concat_op_lite memory_lite) +lite_cc_test(test_fusion_elementwise_activation_ops_lite + SRCS fusion_elementwise_activation_ops_test.cc + DEPS fusion_elementwise_activation_ops_lite memory_lite) +lite_cc_test(test_transpose_op_lite SRCS transpose_op_test.cc DEPS transpose_op_lite memory_lite) diff --git a/paddle/fluid/lite/operators/dropout_op.cc b/paddle/fluid/lite/operators/dropout_op.cc index b5b50dc3d1668712cdbe1af6b809485d9689d588..7c9fb2d0b0ce03739d7058d040348df4841a8f04 100644 --- a/paddle/fluid/lite/operators/dropout_op.cc +++ b/paddle/fluid/lite/operators/dropout_op.cc @@ -52,7 +52,7 @@ class DropoutOpLite : public OpLite { param_.mask = GetMutableVar(scope, Mask); param_.dropout_prob = op_desc.GetAttr("dropout_prob"); - if (op_desc.HasAttr("axis")) { + if (op_desc.HasAttr("is_test")) { param_.is_test = op_desc.GetAttr("is_test"); } param_.fix_seed = op_desc.GetAttr("fix_seed"); diff --git a/paddle/fluid/lite/operators/elementwise_ops.cc b/paddle/fluid/lite/operators/elementwise_ops.cc index b400b1ab26c137fbbee830e1992706e586ae152e..2c6d4e709082b11ab643d6d8b8571efcba4e5f7b 100644 --- a/paddle/fluid/lite/operators/elementwise_ops.cc +++ b/paddle/fluid/lite/operators/elementwise_ops.cc @@ -12,92 +12,67 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/operators/elementwise_ops.h" #include "paddle/fluid/lite/core/op_registry.h" namespace paddle { namespace lite { namespace operators { -class ElementwiseOp : public OpLite { - public: - explicit ElementwiseOp(const std::string& type) : OpLite(type) {} - - bool CheckShape() const override { - CHECK_OR_FALSE(param_.X); - CHECK_OR_FALSE(param_.Y); - CHECK_OR_FALSE(param_.Out); - return true; - } - - bool InferShape() const override { - CHECK_OR_FALSE(param_.X->dims().size() >= param_.Y->dims().size()); - param_.Out->Resize(param_.X->dims()); - return true; - } - - bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override { - auto X_name = opdesc.Input("X").front(); - auto Y_name = opdesc.Input("Y").front(); - auto Out_name = opdesc.Output("Out").front(); - - param_.X = GetVar(scope, X_name); - param_.Y = GetVar(scope, Y_name); - param_.Out = GetMutableVar(scope, Out_name); - param_.axis = opdesc.GetAttr("axis"); - return true; - } - - void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } - - std::string DebugString() const override { return "elementwise_op"; } - - private: - mutable operators::ElementwiseParam param_; -}; +bool ElementwiseOp::CheckShape() const { + CHECK_OR_FALSE(param_.X); + CHECK_OR_FALSE(param_.Y); + CHECK_OR_FALSE(param_.Out); + return true; +} + +bool ElementwiseOp::InferShape() const { + CHECK_OR_FALSE(param_.X->dims().size() >= param_.Y->dims().size()); + param_.Out->Resize(param_.X->dims()); + return true; +} + +bool ElementwiseOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { + auto X_name = opdesc.Input("X").front(); + auto Y_name = opdesc.Input("Y").front(); + auto Out_name = opdesc.Output("Out").front(); + + param_.X = GetVar(scope, X_name); + param_.Y = GetVar(scope, Y_name); + param_.Out = GetMutableVar(scope, Out_name); + param_.axis = opdesc.GetAttr("axis"); + return true; +} #ifdef LITE_WITH_X86 -class ElementwiseGradExplicitOp : public OpLite { - public: - explicit ElementwiseGradExplicitOp(const std::string& type) : OpLite(type) {} - - bool CheckShape() const override { - CHECK_OR_FALSE(param_.Y); - CHECK_OR_FALSE(param_.X_grad); - CHECK_OR_FALSE(param_.Y_grad); - CHECK_OR_FALSE(param_.Out_grad); - return true; - } - - bool InferShape() const override { - param_.X_grad->Resize(param_.Out_grad->dims()); - param_.Y_grad->Resize(param_.Y->dims()); - return true; - } - - bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override { - CHECK_EQ(opdesc.InputArgumentNames().size(), 1UL); - auto Out_name = opdesc.Input(framework::GradVarName("Out")).front(); - auto X_name = opdesc.Output(framework::GradVarName("X")).front(); - auto Y_name = opdesc.Output(framework::GradVarName("Y")).front(); - - param_.Out_grad = GetVar(scope, Out_name); - param_.X_grad = GetMutableVar(scope, X_name); - param_.Y_grad = GetMutableVar(scope, Y_name); - param_.axis = opdesc.GetAttr("axis"); - - return true; - } - - void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } - - std::string DebugString() const override { - return "elementwise_grad_explicit_op"; - } - - private: - mutable operators::ElementwiseGradParam param_; -}; +bool ElementwiseGradExplicitOp::CheckShape() const { + CHECK_OR_FALSE(param_.Y); + CHECK_OR_FALSE(param_.X_grad); + CHECK_OR_FALSE(param_.Y_grad); + CHECK_OR_FALSE(param_.Out_grad); + return true; +} + +bool ElementwiseGradExplicitOp::InferShape() const { + param_.X_grad->Resize(param_.Out_grad->dims()); + param_.Y_grad->Resize(param_.Y->dims()); + return true; +} + +bool ElementwiseGradExplicitOp::AttachImpl(const cpp::OpDesc& opdesc, + lite::Scope* scope) { + CHECK_EQ(opdesc.InputArgumentNames().size(), 1UL); + auto Out_name = opdesc.Input(framework::GradVarName("Out")).front(); + auto X_name = opdesc.Output(framework::GradVarName("X")).front(); + auto Y_name = opdesc.Output(framework::GradVarName("Y")).front(); + + param_.Out_grad = GetVar(scope, Out_name); + param_.X_grad = GetMutableVar(scope, X_name); + param_.Y_grad = GetMutableVar(scope, Y_name); + param_.axis = opdesc.GetAttr("axis"); + + return true; +} #endif } // namespace operators diff --git a/paddle/fluid/lite/operators/elementwise_ops.h b/paddle/fluid/lite/operators/elementwise_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8e427f708fcab5a74052a5ea13776709d7f4f72e --- /dev/null +++ b/paddle/fluid/lite/operators/elementwise_ops.h @@ -0,0 +1,65 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/lite/core/op_lite.h" + +namespace paddle { +namespace lite { +namespace operators { + +class ElementwiseOp : public OpLite { + public: + explicit ElementwiseOp(const std::string& op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override; + + void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "elementwise_op"; } + + private: + mutable operators::ElementwiseParam param_; +}; + +#ifdef LITE_WITH_X86 +class ElementwiseGradExplicitOp : public OpLite { + public: + explicit ElementwiseGradExplicitOp(const std::string& type) : OpLite(type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override; + + void AttachKernel(KernelBase* kernel) override { kernel->SetParam(param_); } + + std::string DebugString() const override { + return "elementwise_grad_explicit_op"; + } + + private: + mutable operators::ElementwiseGradParam param_; +}; +#endif + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/operators/fusion_elementwise_activation_ops.cc b/paddle/fluid/lite/operators/fusion_elementwise_activation_ops.cc new file mode 100644 index 0000000000000000000000000000000000000000..c7c57810fe6f6b4c1ed04883ec736eca6abc297d --- /dev/null +++ b/paddle/fluid/lite/operators/fusion_elementwise_activation_ops.cc @@ -0,0 +1,57 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h" +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +bool FusionElementwiseActivationOp::AttachImpl(const cpp::OpDesc& opdesc, + lite::Scope* scope) { + ElementwiseOp::AttachImpl(opdesc, scope); + param_.act_type = opdesc.GetAttr("act_type"); + // TODO(sangoly): support more activation types. + CHECK(param_.act_type == "relu") << "Only relu activation be supported now"; + + return true; +} + +#ifdef LITE_WITH_X86 +bool FusionElementwiseActivationGradExplicitOp::AttachImpl( + const cpp::OpDesc& opdesc, lite::Scope* scope) { + ElementwiseGradExplicitOp::AttachImpl(opdesc, scope); + param_.act_type = opdesc.GetAttr("act_type"); + // TODO(sangoly): support more activation types. + CHECK(param_.act_type == "relu") << "Only relu activation be supported now"; + + return true; +} +#endif + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(fusion_elementwise_sub_activation, + paddle::lite::operators::FusionElementwiseActivationOp); +#ifdef LITE_WITH_X86 +REGISTER_LITE_OP( + fusion_elementwise_sub_activation_grad, + paddle::lite::operators::FusionElementwiseActivationGradExplicitOp); +#endif +REGISTER_LITE_OP(fusion_elementwise_add_activation, + paddle::lite::operators::FusionElementwiseActivationOp); diff --git a/paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h b/paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..78ec419925f3d23d5eac0a9a62d82588e52e0d2c --- /dev/null +++ b/paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h @@ -0,0 +1,60 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/op_registry.h" +#include "paddle/fluid/lite/operators/elementwise_ops.h" + +namespace paddle { +namespace lite { +namespace operators { + +class FusionElementwiseActivationOp : public ElementwiseOp { + public: + explicit FusionElementwiseActivationOp(const std::string& type) + : ElementwiseOp(type) {} + + bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override; + + std::string DebugString() const override { + return "fusion_elementwise_activation_op"; + } + + private: + mutable operators::FusionElementwiseActivationParam param_; +}; + +#ifdef LITE_WITH_X86 +class FusionElementwiseActivationGradExplicitOp + : public ElementwiseGradExplicitOp { + public: + explicit FusionElementwiseActivationGradExplicitOp(const std::string& type) + : ElementwiseGradExplicitOp(type) {} + + bool AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) override; + + std::string DebugString() const override { + return "fusion_elementwise_activation_grad_explicit_op"; + } + + private: + mutable operators::FusionElementwiseActivationGradParam param_; +}; +#endif + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/operators/fusion_elementwise_activation_ops_test.cc b/paddle/fluid/lite/operators/fusion_elementwise_activation_ops_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..07566e25fc1133bc09c62a97d2cfcb4c823164a0 --- /dev/null +++ b/paddle/fluid/lite/operators/fusion_elementwise_activation_ops_test.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/operators/fusion_elementwise_activation_ops.h" +#include +#include +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +TEST(fusion_elementwise_activation_op_lite, test) { + // prepare variables + lite::Scope scope; + auto* x = scope.Var("x")->GetMutable(); + auto* y = scope.Var("y")->GetMutable(); + auto* out = scope.Var("out")->GetMutable(); + x->Resize(lite::DDim(std::vector({10, 20}))); + y->Resize(lite::DDim(std::vector({10, 20}))); + out->Resize(lite::DDim(std::vector{10, 20})); + + // set data + for (int i = 0; i < 10 * 20; i++) { + x->mutable_data()[i] = i; + } + for (int i = 0; i < 10 * 20; i++) { + y->mutable_data()[i] = i; + } + for (int i = 0; i < 10 * 20; i++) { + out->mutable_data()[i] = 0.; + } + + // prepare op desc + cpp::OpDesc desc; + desc.SetType("fusion_elementwise_add_activation"); + desc.SetInput("X", {"x"}); + desc.SetInput("Y", {"y"}); + desc.SetOutput("Out", {"out"}); + desc.SetAttr("axis", static_cast(1)); + desc.SetAttr("act_type", std::string("relu")); + + FusionElementwiseActivationOp fuse_op("fusion_elementwise_add_activation"); + + fuse_op.SetValidPlaces({Place{TARGET(kX86), PRECISION(kFloat)}}); + fuse_op.Attach(desc, &scope); +} + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/operators/op_params.h b/paddle/fluid/lite/operators/op_params.h index bf10c717c49d0b63aa68e54c9d26bd5798517706..5c00c8a292c371c6f657fc0cebaa74e6bdc4b9f3 100644 --- a/paddle/fluid/lite/operators/op_params.h +++ b/paddle/fluid/lite/operators/op_params.h @@ -203,6 +203,15 @@ struct SplitParam { std::vector sections; }; +// For Transpose op +struct TransposeParam { + const lite::Tensor* x{}; + lite::Tensor* output{}; + std::vector axis; + bool use_mkldnn{false}; + std::string data_format{"AnyLayout"}; +}; + /// ----------------------- element wise operators ---------------------- struct ElementwiseParam { const lite::Tensor* X{}; @@ -219,6 +228,14 @@ struct ElementwiseGradParam { int axis{-1}; // for broadcasting. }; +struct FusionElementwiseActivationParam : public ElementwiseParam { + std::string act_type; +}; + +struct FusionElementwiseActivationGradParam : public ElementwiseGradParam { + std::string act_type; +}; + /// ----------------------- activation operators ---------------------- struct ActivationParam { const lite::Tensor* X{}; diff --git a/paddle/fluid/lite/operators/split_op.cc b/paddle/fluid/lite/operators/split_op.cc index 58768276377edd9ea92356a808a6f46c3b5c6a80..1f220819db61d17b27cb06d4928ae52c5eb7f7eb 100644 --- a/paddle/fluid/lite/operators/split_op.cc +++ b/paddle/fluid/lite/operators/split_op.cc @@ -48,7 +48,7 @@ bool SplitOp::InferShape() const { outs_dims.push_back(dim); } } else if (sections.size() > 0) { - for (size_t i = 0; i < outs_number; ++i) { + for (int i = 0; i < outs_number; ++i) { auto dim = in_dims; dim[axis] = sections[i]; outs_dims.push_back(dim); @@ -66,9 +66,9 @@ bool SplitOp::AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) { param_.axis = opdesc.GetAttr("axis"); param_.num = opdesc.GetAttr("num"); param_.sections = opdesc.GetAttr>("sections"); - param_.x = const_cast( - &scope->FindVar(opdesc.Input("X").front())->Get()); + auto input = opdesc.Input("Input").front(); auto outs = opdesc.Output("Out"); + param_.x = scope->FindVar(input)->GetMutable(); for (auto var : outs) { param_.output.push_back(scope->FindVar(var)->GetMutable()); } diff --git a/paddle/fluid/lite/operators/transpose_op.cc b/paddle/fluid/lite/operators/transpose_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..6b422bbb277e8ea5b337ffe2cc2b7d2511a86f34 --- /dev/null +++ b/paddle/fluid/lite/operators/transpose_op.cc @@ -0,0 +1,165 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/operators/transpose_op.h" +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +// Transpose +bool TransposeOp::CheckShape() const { + CHECK_OR_FALSE(param_.x); + CHECK_OR_FALSE(param_.output); + auto x_dims = param_.x->dims(); + auto x_rank = x_dims.size(); + std::vector axis = param_.axis; + size_t axis_size = axis.size(); + // "The input tensor's rank(%d) should be equal to the axis's size(%d)", + // x_rank, axis_size + CHECK_OR_FALSE(x_rank == axis_size); + + std::vector count(axis_size, 0); + for (size_t i = 0; i < axis_size; i++) { + // Each element of Attribute axis should be a unique value + // range from 0 to (dims - 1), + // where the dims is the axis's size + CHECK_OR_FALSE(axis[i] < static_cast(axis_size) && + ++count[axis[i]] == 1); + } + return true; +} + +bool TransposeOp::InferShape() const { + CHECK_OR_FALSE(param_.x); + CHECK_OR_FALSE(param_.output); + auto x_dims = param_.x->dims(); + auto x_rank = x_dims.size(); + std::vector axis = param_.axis; + size_t axis_size = axis.size(); + // "The input tensor's rank(%d) should be equal to the axis's size(%d)", + // x_rank, axis_size + CHECK_OR_FALSE(x_rank == axis_size); + + std::vector count(axis_size, 0); + for (size_t i = 0; i < axis_size; i++) { + // Each element of Attribute axis should be a unique value + // range from 0 to (dims - 1), + // where the dims is the axis's size + CHECK_OR_FALSE(axis[i] < static_cast(axis_size) && + ++count[axis[i]] == 1); + } + lite::DDim out_dims(x_dims); + for (size_t i = 0; i < axis_size; i++) { + out_dims[i] = x_dims[axis[i]]; + } + param_.output->Resize(out_dims); + return true; +} + +bool TransposeOp::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) { + auto x = op_desc.Input("X").front(); + auto out = op_desc.Output("Out").front(); + + CHECK(scope->FindVar(x)); + CHECK(scope->FindVar(out)); + param_.x = GetVar(scope, x); + param_.output = GetMutableVar(scope, out); + + param_.axis = op_desc.GetAttr>("axis"); + if (op_desc.HasAttr("use_mkldnn")) { + param_.use_mkldnn = op_desc.GetAttr("use_mkldnn"); + } + if (op_desc.HasAttr("data_format")) { + param_.data_format = op_desc.GetAttr("data_format"); + } + return true; +} + +// Transpose2 +bool Transpose2Op::CheckShape() const { + CHECK_OR_FALSE(param_.x); + CHECK_OR_FALSE(param_.output); + auto x_dims = param_.x->dims(); + auto x_rank = x_dims.size(); + std::vector axis = param_.axis; + size_t axis_size = axis.size(); + // "The input tensor's rank(%d) should be equal to the axis's size(%d)", + // x_rank, axis_size + CHECK_OR_FALSE(x_rank == axis_size); + + std::vector count(axis_size, 0); + for (size_t i = 0; i < axis_size; i++) { + // Each element of Attribute axis should be a unique value + // range from 0 to (dims - 1), + // where the dims is the axis's size + CHECK_OR_FALSE(axis[i] < static_cast(axis_size) && + ++count[axis[i]] == 1); + } + return true; +} + +bool Transpose2Op::InferShape() const { + CHECK_OR_FALSE(param_.x); + CHECK_OR_FALSE(param_.output); + auto x_dims = param_.x->dims(); + auto x_rank = x_dims.size(); + std::vector axis = param_.axis; + size_t axis_size = axis.size(); + // "The input tensor's rank(%d) should be equal to the axis's size(%d)", + // x_rank, axis_size + CHECK_OR_FALSE(x_rank == axis_size); + + std::vector count(axis_size, 0); + for (size_t i = 0; i < axis_size; i++) { + // Each element of Attribute axis should be a unique value + // range from 0 to (dims - 1), + // where the dims is the axis's size + CHECK_OR_FALSE(axis[i] < static_cast(axis_size) && + ++count[axis[i]] == 1); + } + lite::DDim out_dims(x_dims); + for (size_t i = 0; i < axis_size; i++) { + out_dims[i] = x_dims[axis[i]]; + } + param_.output->Resize(out_dims); + return true; +} + +bool Transpose2Op::AttachImpl(const cpp::OpDesc &op_desc, lite::Scope *scope) { + auto x = op_desc.Input("X").front(); + auto out = op_desc.Output("Out").front(); + + CHECK(scope->FindVar(x)); + CHECK(scope->FindVar(out)); + param_.x = GetVar(scope, x); + param_.output = GetMutableVar(scope, out); + + param_.axis = op_desc.GetAttr>("axis"); + if (op_desc.HasAttr("use_mkldnn")) { + param_.use_mkldnn = op_desc.GetAttr("use_mkldnn"); + } + if (op_desc.HasAttr("data_format")) { + param_.data_format = op_desc.GetAttr("data_format"); + } + return true; +} + +} // namespace operators +} // namespace lite +} // namespace paddle + +REGISTER_LITE_OP(transpose, paddle::lite::operators::TransposeOp); +REGISTER_LITE_OP(transpose2, paddle::lite::operators::Transpose2Op); diff --git a/paddle/fluid/lite/operators/transpose_op.h b/paddle/fluid/lite/operators/transpose_op.h new file mode 100644 index 0000000000000000000000000000000000000000..f51acb61e1be2eb0ff778668b3b4a1f79467cabb --- /dev/null +++ b/paddle/fluid/lite/operators/transpose_op.h @@ -0,0 +1,66 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include "paddle/fluid/lite/core/op_lite.h" +#include "paddle/fluid/lite/core/scope.h" +#include "paddle/fluid/lite/utils/all.h" + +namespace paddle { +namespace lite { +namespace operators { + +// Transpose +class TransposeOp : public OpLite { + public: + TransposeOp() {} + explicit TransposeOp(const std::string &op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "transpose"; } + + private: + mutable TransposeParam param_; +}; + +// Transpose2 +class Transpose2Op : public OpLite { + public: + Transpose2Op() {} + explicit Transpose2Op(const std::string &op_type) : OpLite(op_type) {} + + bool CheckShape() const override; + + bool InferShape() const override; + + bool AttachImpl(const cpp::OpDesc &opdesc, lite::Scope *scope) override; + + void AttachKernel(KernelBase *kernel) override { kernel->SetParam(param_); } + std::string DebugString() const override { return "transpose2"; } + + private: + mutable TransposeParam param_; +}; + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/operators/transpose_op_test.cc b/paddle/fluid/lite/operators/transpose_op_test.cc new file mode 100644 index 0000000000000000000000000000000000000000..8962c1e4921452c68ee85b18034e4b8887f68527 --- /dev/null +++ b/paddle/fluid/lite/operators/transpose_op_test.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/lite/operators/transpose_op.h" +#include +#include "paddle/fluid/lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace operators { + +// Transpose +TEST(transpose_op_lite, test) { + // prepare variables + Scope scope; + auto* x = scope.Var("x")->GetMutable(); + auto* output = scope.Var("output")->GetMutable(); + const int h = 10; + const int w = 20; + x->Resize(DDim(std::vector({h, w}))); + output->Resize(DDim(std::vector{w, h})); + + // set data + for (int i = 0; i < h * w; i++) { + x->mutable_data()[i] = i; + } + for (int i = 0; i < w * h; i++) { + output->mutable_data()[i] = 0.; + } + + // prepare op desc + cpp::OpDesc desc; + desc.SetType("transpose"); + desc.SetInput("X", {"x"}); + desc.SetOutput("Out", {"output"}); + // axis change for shape in mobilenetssd: [1, 24, 2, 2] => [1, 2, 2, 24] + std::vector axis{0, 2, 3, 1}; + desc.SetAttr("axis", axis); + + TransposeOp transpose("transpose"); + + transpose.SetValidPlaces({Place{TARGET(kARM), PRECISION(kFloat)}}); + transpose.Attach(desc, &scope); +} + +// Transpose2 +TEST(transpose2_op_lite, test) { + // prepare variables + Scope scope; + auto* x = scope.Var("x")->GetMutable(); + auto* output = scope.Var("output")->GetMutable(); + const int h = 10; + const int w = 20; + x->Resize(DDim(std::vector({h, w}))); + output->Resize(DDim(std::vector{w, h})); + + // set data + for (int i = 0; i < h * w; i++) { + x->mutable_data()[i] = i; + } + for (int i = 0; i < w * h; i++) { + output->mutable_data()[i] = 0.; + } + + // prepare op desc + cpp::OpDesc desc; + desc.SetType("transpose2"); + desc.SetInput("X", {"x"}); + desc.SetOutput("Out", {"output"}); + // axis change for shape in mobilenetssd: [1, 24, 2, 2] => [1, 2, 2, 24] + std::vector axis{0, 2, 3, 1}; + desc.SetAttr("axis", axis); + + Transpose2Op transpose2("transpose2"); + + transpose2.SetValidPlaces({Place{TARGET(kARM), PRECISION(kFloat)}}); + transpose2.Attach(desc, &scope); +} + +} // namespace operators +} // namespace lite +} // namespace paddle diff --git a/paddle/fluid/lite/operators/use_ops.h b/paddle/fluid/lite/operators/use_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8f7599042b5538a9bff248a84c5f3f3980c9500b --- /dev/null +++ b/paddle/fluid/lite/operators/use_ops.h @@ -0,0 +1,36 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +/* + * ATTENTION this header file can only include in .cc file. + */ + +USE_LITE_OP(mul); +USE_LITE_OP(fc); +USE_LITE_OP(relu); +USE_LITE_OP(scale); +USE_LITE_OP(feed); +USE_LITE_OP(fetch); +USE_LITE_OP(io_copy); +USE_LITE_OP(elementwise_add) +USE_LITE_OP(elementwise_sub) +USE_LITE_OP(square) +USE_LITE_OP(softmax) +USE_LITE_OP(dropout) +USE_LITE_OP(concat) +USE_LITE_OP(conv2d) +USE_LITE_OP(depthwise_conv2d) +USE_LITE_OP(pool2d) +USE_LITE_OP(batch_norm) diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index a02cdc0385dfc50374cf99e4e0759717ff00092f..29fa9d9ad0b6c0b46e41ad12cee615bee4928bcc 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -56,7 +56,8 @@ function check_style { function cmake_arm { # $1: ARM_TARGET_OS in "android" , "armlinux" - # $2: ARM_TARGET_ARCH_ABI in "arm64-v8a", "armeabi-v7a" ,"armeabi-v7a-hf" + # $2: ARM_TARGET_ARCH_ABI in "armv8", "armv7" ,"armv7hf" + # $3: ARM_TARGET_LANG in "gcc" "clang" cmake .. \ -DWITH_GPU=OFF \ -DWITH_MKL=OFF \ @@ -66,7 +67,7 @@ function cmake_arm { -DLITE_WITH_ARM=ON \ -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ -DWITH_TESTING=ON \ - -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 + -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 -DARM_TARGET_LANG=$3 } function build_single { @@ -75,7 +76,7 @@ function build_single { } function build { - make lite_compile_deps -j $NUM_CORES_FOR_COMPILE + make lite_compile_deps -j$NUM_CORES_FOR_COMPILE } # It will eagerly test all lite related unittests. @@ -113,22 +114,91 @@ function test_arm_android { echo "test name: ${test_name}" adb_work_dir="/data/local/tmp" - skip_list="test_model_parser_lite" # add more with space - [[ $skip_list =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && continue || echo 'skip $test_name' + + skip_list=("test_model_parser_lite" "test_cxx_api_lite") + for skip_name in ${skip_list[@]} ; do + [[ $skip_name =~ (^|[[:space:]])$test_name($|[[:space:]]) ]] && echo "skip $test_name" && return + done + testpath=$(find ./paddle/fluid -name ${test_name}) adb -s emulator-${port} push ${testpath} ${adb_work_dir} adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${test_name}" adb -s emulator-${port} shell "./${adb_work_dir}/${test_name}" } +function test_arm_model { + local test_name=$1 + local port=$2 + local model_dir=$3 + + if [[ "${test_name}x" == "x" ]]; then + echo "test_name can not be empty" + exit 1 + fi + if [[ "${port}x" == "x" ]]; then + echo "Port can not be empty" + exit 1 + fi + if [[ "${model_dir}x" == "x" ]]; then + echo "Model dir can not be empty" + exit 1 + fi + + echo "test name: ${test_name}" + adb_work_dir="/data/local/tmp" + + testpath=$(find ./paddle/fluid -name ${test_name}) + adb -s emulator-${port} push ${model_dir} ${adb_work_dir} + adb -s emulator-${port} push ${testpath} ${adb_work_dir} + adb -s emulator-${port} shell chmod +x "${adb_work_dir}/${test_name}" + local adb_model_path="./${adb_work_dir}/`basename ${model_dir}`" + adb -s emulator-${port} shell "./${adb_work_dir}/${test_name} --eval_model_dir=$adb_model_path" +} + # Build the code and run lite arm tests. This is executed in the CI system. function build_test_arm { + # 1. Build goes first + cur_dir=$(pwd) + for lang in "gcc" "clang"; do + for os in "android" "armlinux" ; do + if [[ ${os} == "armlinux" && ${lang} == "clang" ]]; then + continue + fi + for abi in "armv8" "armv7" "armv7hf"; do + # TODO(hongming): enable compile armv7 and armv7hf on armlinux + if [[ ${abi} == "armv7hf" ]]; then + echo "armv7hf is not supported on both android and armlinux yet" + continue + fi + + # TODO(hongming): enable armv7 on armlinux + if [[ ${os} == "armlinux" && ${abi} == "armv7" ]]; then + echo "armv7 is not supported on armlinux yet" + continue + fi + + if [[ ${os} == "android" && ${abi} == "armv7hf" ]]; then + echo "android do not need armv7hf" + continue + fi + + build_dir=$cur_dir/build.lite.${os}.${abi}.${lang} + mkdir -p $build_dir + cd $build_dir + + cmake_arm ${os} ${abi} ${lang} + build $TESTS_FILE + done + done + done + + # 2. Then test port_armv8=5554 port_armv7=5556 adb kill-server adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done - # start android arm64-v8a armeabi-v7a emulators first + # start android armv8 and armv7 emulators first echo n | avdmanager create avd -f -n paddle-armv8 -k "system-images;android-24;google_apis;arm64-v8a" echo -ne '\n' | ${ANDROID_HOME}/emulator/emulator -avd paddle-armv8 -noaudio -no-window -gpu off -verbose -port ${port_armv8} & sleep 1m @@ -136,55 +206,37 @@ function build_test_arm { echo -ne '\n' | ${ANDROID_HOME}/emulator/emulator -avd paddle-armv7 -noaudio -no-window -gpu off -verbose -port ${port_armv7} & sleep 1m - cur_dir=$(pwd) - - for os in "android" "armlinux" ; do - for abi in "arm64-v8a" "armeabi-v7a" "armeabi-v7a-hf" ; do - # TODO(TJ): enable compile on v7-hf on andorid and all v7 on armlinux - if [[ ${abi} == "armeabi-v7a-hf" ]]; then - echo "armeabi-v7a-hf is not supported on both android and armlinux" - continue - fi - - if [[ ${os} == "armlinux" && ${abi} == "armeabi-v7a" ]]; then - echo "armeabi-v7a is not supported on armlinux yet" + # now can only test android. + for lang in "gcc" "clang"; do + for abi in "armv8" "armv7" ; do + # TODO(yuanshuai): enable armv7 on android + if [[ ${abi} == "armv7" ]]; then continue fi - build_dir=$cur_dir/build.lite.${os}.${abi} - mkdir -p $build_dir + build_dir=$cur_dir/build.lite.android.${abi}.${lang} cd $build_dir - cmake_arm ${os} ${abi} - build $TESTS_FILE - - # armlinux need in another docker - # TODO(TJ): enable test with armlinux - if [[ ${os} == "android" ]]; then - adb_abi=${abi} - if [[ ${adb_abi} == "armeabi-v7a-hf" ]]; then - adb_abi="armeabi-v7a" - fi - if [[ ${adb_abi} == "armeabi-v7a" ]]; then - # skip all armv7 tests - # TODO(TJ): enable test with armv7 - continue - fi - local port= - if [[ ${adb_abi} == "armeabi-v7a" ]]; then - port=${port_armv7} - fi + local port= + if [[ ${abi} == "armv7" ]]; then + port=${port_armv7} + fi - if [[ ${adb_abi} == "arm64-v8a" ]]; then - port=${port_armv8} - fi - echo "test file: ${TESTS_FILE}" - for _test in $(cat $TESTS_FILE); do - test_arm_android $_test $port - done + if [[ ${abi} == "armv8" ]]; then + port=${port_armv8} fi + echo "test file: ${TESTS_FILE}" + for _test in $(cat $TESTS_FILE); do + test_arm_android $_test $port + done + # TODO(sangoly): refine this + test_arm_model "test_cxx_api_lite" $port "./third_party/install/mobilenet_v2_relu" done done + + # armlinux need in another docker + # TODO(hongming): enable test armlinux on armv8, armv7 and armv7hf + adb devices | grep emulator | cut -f1 | while read line; do adb -s $line emu kill; done echo "Done" } diff --git a/paddle/fluid/lite/tools/mobile_readme.md b/paddle/fluid/lite/tools/mobile_readme.md index b7ffbe6faa34860d029064246121e76c80fc06f0..08bd7b0f5d6728eb5ac0b5734a60befe66bd876b 100644 --- a/paddle/fluid/lite/tools/mobile_readme.md +++ b/paddle/fluid/lite/tools/mobile_readme.md @@ -17,8 +17,16 @@ $ git checkout incubate/lite ### 主要cmake选项 -- `ARM_TARGET_OS` 代表目标操作系统, 目前支持 "android" "armlinux", 模型是Android -- `ARM_TARGET_ARCH_ABI` 代表ARCH, 目前支持 "arm64-v8a" "armeabi-v7a"。 模型是arm64-v8a +- `ARM_TARGET_OS` 代表目标操作系统, 目前支持 "android" "armlinux", 默认是Android +- `ARM_TARGET_ARCH_ABI` 代表ARCH,支持输入"armv8"和"armv7",针对OS不一样选择不一样。 + - `-DARM_TARGET_OS="android"` 时 + - "armv8", 等效于 "arm64-v8a"。 default值为这个。 + - "armv7", 等效于 "armeabi-v7a"。 + - `-DARM_TARGET_OS="armlinux"` 时 + - "armv8", 等效于 "arm64"。 default值为这个。 + - "armv7hf", 等效于使用`eabihf`且`-march=armv7-a -mfloat-abi=hard -mfpu=neon-vfpv4 `。 + - "armv7", 等效于使用`eabi`且`-march=armv7-a -mfloat-abi=softfp -mfpu=neon-vfpv4`。 +- `ARM_TARGET_LANG` 代表目标编译的语言, 默认为gcc,支持 gcc和clang两种。 ### 编译 diff --git a/paddle/fluid/lite/x86/CMakeLists.txt b/paddle/fluid/lite/x86/CMakeLists.txt index 515933e2588844f2795ca676269965db9a9770fd..be772b921b4edc989e3ce25143bb88360fbb10b6 100644 --- a/paddle/fluid/lite/x86/CMakeLists.txt +++ b/paddle/fluid/lite/x86/CMakeLists.txt @@ -3,5 +3,3 @@ if (NOT LITE_WITH_X86) endif() cc_library(target_wrapper_x86 SRCS target_wrapper.cc) - -