未验证 提交 92da5055 编写于 作者: L Liu-xiandong 提交者: GitHub

Add xpu2 compiler (#37254)

* Add XPU compiler for paddle, test=develop

* clean code

* clean useless code

* clean useless code

* clean useless code

* test

* add include path

* use clang compiler

* xpu2.cmake

* XPU2 compiler passed

* update

* update after pten

* combination the WITH_XPU and WITH_XPU2

* update the fuse operation in WITH_XPU and WITH_XPU2

* update

* update

* update

* fix the merge error

* update

* update the code

* update the code

* add run_kp_kernel flag

* update

* update

* fix prepared type_ bug

* clean and update the code

* reset the kernel_primitives

* update

* clean the code

* delete useless comment

* fix the bug in WITH_XPU

* update

* update

* modify the abi

* delete some useless code

* Parameter automation in xpu compilation

* Parameter automation in xpu compilation

* delete kps in cmake

* delete useless comment

* clean the code

* clean the code
上级 96bcf2df
......@@ -43,6 +43,7 @@ option(WITH_ONEMKL "Compile PaddlePaddle with oneMKL" OFF)
option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
......@@ -59,6 +60,9 @@ include(generic) # simplify cmake module
if (WITH_GPU AND WITH_XPU)
message(FATAL_ERROR "Error when compile GPU and XPU at the same time")
endif()
if (WITH_GPU AND WITH_XPU_KP)
message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time")
endif()
if (WITH_GPU AND WITH_ASCEND)
message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
endif()
......@@ -273,6 +277,14 @@ if (NOT WITH_GPU AND WITH_NCCL)
"Disable NCCL when compiling without GPU" FORCE)
endif()
# force WITH_XPU on when WITH_XPU_KP
if (WITH_XPU_KP AND NOT WITH_XPU)
MESSAGE(WARNING
"Enable WITH_XPU when compiling with WITH_XPU_KP. Force WITH_XPU=ON.")
set(WITH_XPU ON CACHE STRING
"Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE)
endif()
if (NOT WITH_XPU AND WITH_XPU_BKCL)
MESSAGE(WARNING
"Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
......@@ -317,6 +329,10 @@ if(WITH_ROCM)
include(miopen) # set miopen libraries, must before configure
endif(WITH_ROCM)
if(WITH_XPU_KP)
include(xpu_kp)
endif()
if (NOT WITH_ROCM AND WITH_RCCL)
MESSAGE(WARNING
"Disable RCCL when compiling without ROCM. Force WITH_RCCL=OFF.")
......
......@@ -99,6 +99,11 @@ if(WITH_XPU)
add_definitions(-DPADDLE_WITH_XPU)
endif()
if(WITH_XPU_KP)
message(STATUS "Compile with XPU_KP!")
add_definitions(-DPADDLE_WITH_XPU_KP)
endif()
if(WITH_IPU)
message(STATUS "Compile with IPU!")
add_definitions(-DPADDLE_WITH_IPU)
......
......@@ -654,6 +654,81 @@ function(hip_test TARGET_NAME)
endif()
endfunction(hip_test)
function(xpu_library TARGET_NAME)
if (WITH_XPU_KP)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(xpu_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if(xpu_library_SRCS)
if (xpu_library_SHARED OR xpu_library_shared) # build *.so
message(FATAL_ERROR "XPU kernel currently does not support dynamic links")
else()
xpu_add_library(${TARGET_NAME} STATIC ${xpu_library_SRCS} DEPENDS ${xpu_library_DEPS})
find_fluid_modules(${TARGET_NAME})
endif()
if (xpu_library_DEPS)
add_dependencies(${TARGET_NAME} ${xpu_library_DEPS})
target_link_libraries(${TARGET_NAME} ${xpu_library_DEPS})
endif()
# cpplint code style
foreach(source_file ${xpu_library_SRCS})
string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
list(APPEND xpu_library_HEADERS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
endif()
endforeach()
else(xpu_library_SRCS)
if (xpu_library_DEPS)
list(REMOVE_DUPLICATES xpu_library_DEPS)
generate_dummy_static_lib(LIB_NAME ${TARGET_NAME} FILE_PATH ${target_SRCS} GENERATOR "generic.cmake:xpu_library")
target_link_libraries(${TARGET_NAME} ${xpu_library_DEPS})
add_dependencies(${TARGET_NAME} ${xpu_library_DEPS})
else()
message(FATAL "Please specify source file or library in xpu_library.")
endif()
endif(xpu_library_SRCS)
endif()
endfunction(xpu_library)
function(xpu_binary TARGET_NAME)
if (WITH_XPU_KP)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(xpu_binary "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${xpu_binary_SRCS})
if(xpu_binary_DEPS)
target_link_libraries(${TARGET_NAME} ${xpu_binary_DEPS})
add_dependencies(${TARGET_NAME} ${xpu_binary_DEPS})
common_link(${TARGET_NAME})
endif()
endif()
endfunction(xpu_binary)
function(xpu_test TARGET_NAME)
# The environment variable `CI_SKIP_CPP_TEST` is used to skip the compilation
# and execution of test in CI. `CI_SKIP_CPP_TEST` is set to ON when no files
# other than *.py are modified.
if (WITH_XPU_KP AND WITH_TESTING AND NOT "$ENV{CI_SKIP_CPP_TEST}" STREQUAL "ON")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(xpu_test "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
add_executable(${TARGET_NAME} ${xpu_test_SRCS})
# "-pthread -ldl -lrt" is defined in CMAKE_CXX_LINK_EXECUTABLE
target_link_options(${TARGET_NAME} PRIVATE -pthread -ldl -lrt)
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries(${TARGET_NAME} ${xpu_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog ${os_dependency_modules})
add_dependencies(${TARGET_NAME} ${xpu_test_DEPS} paddle_gtest_main lod_tensor memory gtest gflags glog)
common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_init_allocated_mem=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT FLAGS_cudnn_deterministic=true)
endif()
endfunction(xpu_test)
function(go_library TARGET_NAME)
set(options STATIC static SHARED shared)
set(oneValueArgs "")
......
......@@ -34,6 +34,7 @@ function(op_library TARGET)
set(cu_cc_srcs)
set(hip_cc_srcs)
set(xpu_cc_srcs)
set(xpu_kp_cc_srcs)
set(npu_cc_srcs)
set(mlu_cc_srcs)
set(cudnn_cu_cc_srcs)
......@@ -120,6 +121,11 @@ function(op_library TARGET)
list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
endif()
endif()
if(WITH_XPU_KP)
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu)
list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu)
endif()
endif()
if(WITH_ASCEND_CL)
string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}")
if (EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc)
......@@ -154,6 +160,8 @@ function(op_library TARGET)
list(APPEND mkldnn_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
list(APPEND xpu_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$")
list(APPEND npu_cc_srcs ${src})
elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
......@@ -161,11 +169,13 @@ function(op_library TARGET)
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
else()
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu")
message(FATAL_ERROR "${TARGET} Source file ${src} should only be .cc or .cu or .xpu")
endif()
endforeach()
endif()
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len)
list(LENGTH cc_srcs cc_srcs_len)
if (${cc_srcs_len} EQUAL 0)
message(FATAL_ERROR "The op library ${TARGET} should contains at least one .cc file")
......@@ -231,6 +241,8 @@ function(op_library TARGET)
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(${TARGET} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} ${mkldnn_cc_srcs} ${hip_srcs} DEPS ${op_library_DEPS}
${op_common_deps})
elseif (WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
xpu_library(${TARGET} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} DEPS ${op_library_DEPS} ${op_common_deps})
else()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
......@@ -359,6 +371,11 @@ function(op_library TARGET)
endif()
endif()
# pybind USE_OP_DEVICE_KERNEL for XPU KP
if (WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${TARGET}, KP);\n")
endif()
# pybind USE_OP_DEVICE_KERNEL for NPU
if (WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
foreach(npu_src ${npu_cc_srcs})
......@@ -438,7 +455,6 @@ function(op_library TARGET)
endif()
endfunction()
function(register_operators)
set(options "")
set(oneValueArgs "")
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_XPU_KP)
return()
endif()
if(NOT XPU_TOOLCHAIN)
set(XPU_TOOLCHAIN /workspace/paddle/xpu-demo/XTDK)
get_filename_component(XPU_TOOLCHAIN ${XPU_TOOLCHAIN} REALPATH)
endif()
if(NOT IS_DIRECTORY ${XPU_TOOLCHAIN})
message(FATAL_ERROR "Directory ${XPU_TOOLCHAIN} not found!")
endif()
message(STATUS "Build with XPU_TOOLCHAIN=" ${XPU_TOOLCHAIN})
set(XPU_CLANG ${XPU_TOOLCHAIN}/bin/clang++)
message(STATUS "Build with XPU_CLANG=" ${XPU_CLANG})
# The host sysroot of XPU compiler is gcc-8.2
if(NOT HOST_SYSROOT)
set(HOST_SYSROOT /opt/compiler/gcc-8.2)
endif()
if(NOT IS_DIRECTORY ${HOST_SYSROOT})
message(FATAL_ERROR "Directory ${HOST_SYSROOT} not found!")
endif()
if(NOT API_ARCH)
set(API_ARCH x86_64-baidu-linux-gnu)
endif()
if(API_ARCH MATCHES "x86_64")
if(EXISTS ${HOST_SYSROOT}/bin/g++)
set(HOST_CXX ${HOST_SYSROOT}/bin/g++)
set(HOST_AR ${HOST_SYSROOT}/bin/ar)
else()
set(HOST_CXX /usr/bin/g++)
set(HOST_AR /usr/bin/ar)
endif()
else()
set(HOST_CXX ${CMAKE_CXX_COMPILER})
set(HOST_AR ${CMAKE_AR})
endif()
set(TOOLCHAIN_ARGS )
if(OPT_LEVEL)
set(OPT_LEVEL ${OPT_LEVEL})
else()
set(OPT_LEVEL "-O3")
endif()
message(STATUS "Build with API_ARCH=" ${API_ARCH})
message(STATUS "Build with TOOLCHAIN_ARGS=" ${TOOLCHAIN_ARGS})
message(STATUS "Build with HOST_SYSROOT=" ${HOST_SYSROOT})
message(STATUS "Build with HOST_CXX=" ${HOST_CXX})
message(STATUS "Build with HOST_AR=" ${HOST_AR})
macro(compile_kernel COMPILE_ARGS)
set(options "")
set(oneValueArgs "")
set(multiValueArgs KERNEL DIRPATH XNAME DEVICE HOST XPU DEPENDS)
cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(kernel_path ${xpu_add_library_DIRPATH})
set(kernel_name ${xpu_add_library_XNAME})
set(device_o_extra_flags ${xpu_add_library_DEVICE})
set(host_o_extra_flags ${xpu_add_library_HOST})
set(xpu_1_or_2 ${xpu_add_library_XPU})
set(cc_depends ${xpu_add_library_DEPENDS})
set(kernel_target ${kernel_name}_kernel)
add_custom_target(${kernel_target}
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS
kernel_build/${kernel_name}.host.o
kernel_build/${kernel_name}.bin.o
COMMENT
${kernel_target}
VERBATIM
)
if(cc_depends)
add_dependencies(${kernel_target} ${xpu_add_library_DEPENDS})
endif()
set(arg_device_o_extra_flags ${device_o_extra_flags})
separate_arguments(arg_device_o_extra_flags)
set(arg_host_o_extra_flags ${host_o_extra_flags})
separate_arguments(arg_host_o_extra_flags)
set(XTDK_DIR ${XPU_TOOLCHAIN})
set(CXX_DIR ${HOST_SYSROOT})
set(XPU_CXX_FLAGS -Wno-error=pessimizing-move -Wno-error=constant-conversion -Wno-error=c++11-narrowing -Wno-error=shift-count-overflow -Wno-error=unused-local-typedef -Wno-error=deprecated-declarations -Wno-deprecated-declarations -std=c++14 -m64 -fPIC -fno-omit-frame-pointer -Wall -Wno-inconsistent-missing-override -Wextra -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wno-unused-parameter -Wno-unused-function -Wno-error=unused-local-typedefs -Wno-error=ignored-attributes -Wno-error=int-in-bool-context -Wno-error=parentheses -Wno-error=address -Wno-ignored-qualifiers -Wno-ignored-attributes -Wno-parentheses -DNDEBUG )
#include path
get_property(dirs DIRECTORY ${CMAKE_SOURCE_DIR} PROPERTY INCLUDE_DIRECTORIES)
set(XPU_CXX_INCLUDES "")
foreach(dir IN LISTS dirs)
list(APPEND XPU_CXX_INCLUDES "-I${dir}")
endforeach()
string(REPLACE ";" " " XPU_CXX_INCLUDES "${XPU_CXX_INCLUDES}" )
separate_arguments(XPU_CXX_INCLUDES UNIX_COMMAND "${XPU_CXX_INCLUDES}")
#related flags
get_directory_property( DirDefs DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS )
set(XPU_CXX_DEFINES "")
foreach(def IN LISTS DirDefs)
list(APPEND XPU_CXX_DEFINES "-D${def}")
endforeach()
string(REPLACE ";" " " XPU_CXX_DEFINES "${XPU_CXX_DEFINES}" )
separate_arguments(XPU_CXX_DEFINES UNIX_COMMAND "${XPU_CXX_DEFINES}")
add_custom_command(
OUTPUT
kernel_build/${kernel_name}.bin.o
COMMAND
${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=1 ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES}
-I. -o kernel_build/${kernel_name}.bin.o.sec ${kernel_path}/${kernel_name}.xpu
--xpu-device-only -c -v
COMMAND
${XTDK_DIR}/bin/xpu2-elfconv kernel_build/${kernel_name}.bin.o.sec kernel_build/${kernel_name}.bin.o ${XPU_CLANG} --sysroot=${CXX_DIR}
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS
${xpu_add_library_DEPENDS}
COMMENT
kernel_build/${kernel_name}.bin.o
VERBATIM
)
list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.bin.o)
add_custom_command(
OUTPUT
kernel_build/${kernel_name}.host.o
COMMAND
${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=1 ${OPT_LEVEL} -fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS} ${XPU_CXX_INCLUDES}
-I. -o kernel_build/${kernel_name}.host.o ${kernel_path}/${kernel_name}.xpu
--xpu-host-only -c -v
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS
${xpu_add_library_DEPENDS}
COMMENT
kernel_build/${kernel_name}.host.o
VERBATIM
)
list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.host.o)
endmacro()
###############################################################################
# XPU_ADD_LIBRARY
###############################################################################
macro(xpu_add_library TARGET_NAME)
# Separate the sources from the options
set(options "")
set(oneValueArgs "")
set(multiValueArgs STATIC DEPENDS)
cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(xpu_srcs ${xpu_add_library_STATIC})
set(xpu_target ${TARGET_NAME})
set(cc_srcs_depends ${xpu_add_library_DEPENDS})
file(GLOB_RECURSE xpu_srcs_lists ${xpu_srcs})
list(LENGTH xpu_srcs_lists xpu_srcs_lists_num)
set(XPU1_DEVICE_O_EXTRA_FLAGS " ")
set(XPU1_HOST_O_EXTRA_FLAGS " ")
# Distinguish .xpu file from other files
foreach(cur_xpu_src IN LISTS xpu_srcs_lists)
get_filename_component(language_type_name ${cur_xpu_src} EXT)
if(${language_type_name} STREQUAL ".xpu")
list(APPEND xpu_kernel_lists ${cur_xpu_src})
else()
list(APPEND cc_kernel_lists ${cur_xpu_src})
endif()
endforeach()
# Ensure that there is only one xpu kernel
list(LENGTH xpu_kernel_lists xpu_kernel_lists_num)
list(LENGTH cc_srcs_depends cc_srcs_depends_num)
if(${xpu_kernel_lists_num})
foreach(xpu_kernel IN LISTS xpu_kernel_lists)
get_filename_component(kernel_name ${xpu_kernel} NAME_WE)
get_filename_component(kernel_dir ${xpu_kernel} DIRECTORY)
set(kernel_rules ${kernel_dir}/${kernel_name}.rules)
set(kernel_name ${kernel_name})
compile_kernel( KERNEL ${xpu_kernel} DIRPATH ${kernel_dir} XNAME ${kernel_name} DEVICE ${XPU1_DEVICE_O_EXTRA_FLAGS} HOST ${XPU1_HOST_O_EXTRA_FLAGS} XPU "xpu2" DEPENDS ${cc_srcs_depends})
endforeach()
add_custom_target(${xpu_target}_src ALL
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS
${xpu_kernel_depends}
${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
COMMENT
${xpu_target}_src
VERBATIM
)
add_custom_command(
OUTPUT
${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
COMMAND
${HOST_AR} rcs ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a ${xpu_kernel_depends}
WORKING_DIRECTORY
${CMAKE_CURRENT_BINARY_DIR}
DEPENDS
${xpu_kernel_depends}
COMMENT
${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
VERBATIM
)
add_library(${xpu_target} STATIC ${cc_kernel_lists})
add_dependencies(${xpu_target} ${xpu_target}_src)
target_link_libraries(${TARGET_NAME} ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a)
else()
add_library(${xpu_target} STATIC ${cc_kernel_lists})
endif()
endmacro()
......@@ -26,6 +26,7 @@ enum class LibraryType {
kPlain = 0,
kMKLDNN = 1,
kCUDNN = 2,
kKP = 3,
};
inline std::string LibraryTypeToString(const LibraryType& library_type) {
......@@ -36,10 +37,12 @@ inline std::string LibraryTypeToString(const LibraryType& library_type) {
return "MKLDNN";
case LibraryType::kCUDNN:
return "CUDNN";
case LibraryType::kKP:
return "KP";
default:
PADDLE_THROW(platform::errors::Unimplemented(
"Unknown LibraryType code (%d), only supports library type include "
"PLAIN(0), MKLDNN(1), CUDNN(2).",
"PLAIN(0), MKLDNN(1), CUDNN(2), KP(3).",
static_cast<int>(library_type)));
}
}
......@@ -57,6 +60,8 @@ inline LibraryType StringToLibraryType(const char* ctype) {
return LibraryType::kCUDNN;
// To be compatible with register macro.
// CPU, CUDA, PLAIN are same library type.
} else if (s == std::string("KP")) {
return LibraryType::kKP;
} else if (s == std::string("CPU")) {
return LibraryType::kPlain;
} else if (s == std::string("XPU")) {
......
......@@ -1386,6 +1386,7 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
auto expected_kernel_key = InnerGetExpectedKernelType(ctx);
auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if (kernel_iter == kernels.end() &&
......@@ -1408,6 +1409,22 @@ void OperatorWithKernel::ChooseKernel(const ExecutionContext& ctx) const {
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
#ifdef PADDLE_WITH_XPU_KP
bool use_xpu_kp_kernel_rt =
FLAGS_run_kp_kernel &&
paddle::platform::is_xpu_kp_support_op(type_, expected_kernel_key);
bool use_xpu_kp_kernel_debug =
paddle::platform::is_in_xpu_kpwhite_list(type_);
if (platform::is_xpu_place(expected_kernel_key.place_) &&
(use_xpu_kp_kernel_rt || use_xpu_kp_kernel_debug)) {
expected_kernel_key.library_type_ = LibraryType::kKP;
kernel_iter = kernels.find(expected_kernel_key);
VLOG(3) << "using XPU KP kernel: " << type_
<< ", using_kernel_key:" << expected_kernel_key;
}
#endif
#ifdef PADDLE_WITH_IPU
if (kernel_iter == kernels.end() &&
platform::is_ipu_place(expected_kernel_key.place_)) {
......
......@@ -690,6 +690,7 @@ class OperatorWithKernel : public OperatorBase {
// new pten kernel, if there is a better design in the future,
// we may polish the implementation here
mutable bool run_pten_kernel_ = false;
mutable bool run_kp_kernel = false;
mutable std::unique_ptr<pten::KernelSignature> pt_kernel_signature_;
mutable std::unique_ptr<pten::Kernel> pt_kernel_;
};
......
......@@ -24,6 +24,7 @@
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/device/xpu/xpu_op_list.h"
#endif
#include "paddle/fluid/framework/library_type.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/fluid/platform/profiler.h"
......@@ -145,7 +146,6 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
auto dygraph_exe_ctx = DygraphExecutionContext<VarType>(
op, framework::Scope(), *dev_ctx, ctx, ins, outs, attrs, default_attrs);
auto expected_kernel_key = op.GetExpectedKernelType(dygraph_exe_ctx);
VLOG(3) << "expected_kernel_key:" << expected_kernel_key;
framework::KernelSignature pt_kernel_signature;
pten::KernelKey pt_kernel_key;
......@@ -228,7 +228,31 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
expected_kernel_key.place_ = platform::CPUPlace();
kernel_iter = kernels.find(expected_kernel_key);
}
#endif
#ifdef PADDLE_WITH_XPU_KP
bool use_xpu_kp_kernel_rt =
FLAGS_run_kp_kernel &&
paddle::platform::is_xpu_kp_support_op(op.Type(), expected_kernel_key);
bool use_xpu_kp_kernel_debug =
paddle::platform::is_in_xpu_kpwhite_list(op.Type());
if (use_xpu_kp_kernel_rt) {
VLOG(3) << "xpu_kp using rt mode ";
}
if (use_xpu_kp_kernel_debug) {
VLOG(3) << "xpu_kp using debug mode ";
}
if (paddle::platform::is_xpu_place(expected_kernel_key.place_) &&
(use_xpu_kp_kernel_rt || use_xpu_kp_kernel_debug)) {
expected_kernel_key.place_ = platform::XPUPlace();
expected_kernel_key.library_type_ = paddle::framework::LibraryType::kKP;
kernel_iter = kernels.find(expected_kernel_key);
VLOG(3) << "using XPU KP kernel: " << op.Type()
<< ", using_kernel_key:" << expected_kernel_key;
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
if (kernel_iter == kernels.end() &&
paddle::platform::is_npu_place(expected_kernel_key.place_)) {
......
......@@ -190,6 +190,7 @@ class PreparedOp {
// new pten kernel, if there is a better design in the future,
// we may polish the implementation here
bool run_pten_kernel_{false};
bool run_kp_kernel_{false};
framework::KernelSignature pt_kernel_signature_;
pten::Kernel pt_kernel_;
};
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU_KP
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/op_kernel_type.h"
namespace paddle {
namespace platform {
using vartype = paddle::framework::proto::VarType;
using pOpKernelType = paddle::framework::OpKernelType;
using XPUKernelSet =
std::unordered_set<pOpKernelType, paddle::framework::OpKernelType::Hash>;
using XPUOpMap = std::unordered_map<std::string, XPUKernelSet>;
XPUOpMap& get_kp_ops() {
static XPUOpMap s_xpu_kp_kernels{};
return s_xpu_kp_kernels;
}
} // namespace platform
} // namespace paddle
#endif
......@@ -16,6 +16,7 @@ limitations under the License. */
#include "paddle/fluid/platform/device/xpu/xpu1_op_list.h"
#include "paddle/fluid/platform/device/xpu/xpu2_op_list.h"
#include "paddle/fluid/platform/device/xpu/xpu_info.h"
#include "paddle/fluid/platform/device/xpu/xpu_op_kpfirst_list.h"
#include "paddle/fluid/platform/device/xpu/xpu_op_list.h"
namespace paddle {
......@@ -74,6 +75,48 @@ bool is_in_xpu_black_list(const std::string& op_name) {
return false;
}
#ifdef PADDLE_WITH_XPU_KP
bool is_xpu_kp_support_op(const std::string& op_name,
const pOpKernelType& type) {
auto& ops = get_kl1_ops();
auto v = get_xpu_version(type.place_.device);
if (v == pten::backends::xpu::XPUVersion::XPU2) {
ops = get_kp_ops();
}
if (ops.find(op_name) != ops.end() &&
ops[op_name].find(type) != ops[op_name].end()) {
return true;
}
return false;
}
bool is_in_xpu_kpwhite_list(const std::string& op_name) {
static bool inited = false;
static std::unordered_set<std::string> xpu_kpwhite_list;
static std::mutex s_mtx;
if (!inited) {
std::lock_guard<std::mutex> guard(s_mtx);
if (!inited) {
if (std::getenv("XPU_KPWHITE_LIST") != nullptr) {
std::string ops(std::getenv("XPU_KPWHITE_LIST"));
tokenize(ops, ',', &xpu_kpwhite_list);
}
inited = true;
VLOG(3) << "XPU kpwhite List: ";
for (auto iter = xpu_kpwhite_list.begin(); iter != xpu_kpwhite_list.end();
++iter) {
VLOG(3) << *iter << " ";
}
}
}
if (xpu_kpwhite_list.find(op_name) != xpu_kpwhite_list.end()) {
return true;
}
return false;
}
#endif
std::vector<vartype::Type> get_xpu_op_support_type(
const std::string& op_name, pten::backends::xpu::XPUVersion version) {
std::vector<vartype::Type> res;
......@@ -101,7 +144,6 @@ XPUOpListMap get_xpu_op_list(pten::backends::xpu::XPUVersion version) {
}
return res;
}
} // namespace platform
} // namespace paddle
#endif
......@@ -27,6 +27,12 @@ using XPUOpListMap =
bool is_xpu_support_op(const std::string& op_name, const pOpKernelType& type);
bool is_in_xpu_black_list(const std::string& op_name);
#ifdef PADDLE_WITH_XPU_KP
bool is_xpu_kp_support_op(const std::string& op_name,
const pOpKernelType& type);
bool is_in_xpu_kpwhite_list(const std::string& op_name);
#endif
std::vector<vartype::Type> get_xpu_op_support_type(
const std::string& op_name, pten::backends::xpu::XPUVersion version);
XPUOpListMap get_xpu_op_list(pten::backends::xpu::XPUVersion version);
......
......@@ -700,16 +700,16 @@ PADDLE_DEFINE_EXPORTED_bool(run_pten_kernel, true,
"It controls whether to use pten kernel");
/**
* Pt kernel related FLAG
* KP kernel related FLAG
* Name: FLAGS_run_kp_kernel
* Since Version: 2.3.0
* Value Range: bool, default=false
* Example: FLAGS_run_kp_kernel=true would use the kp kernel to compute in
* the Op for XPU2.
* Example: FLAGS_run_kp_kernel=true would use the kp kernel to compute in the
* Op.
* Note:
*/
PADDLE_DEFINE_EXPORTED_bool(run_kp_kernel, true,
"It controls whether to use kp kernel for xpu2");
PADDLE_DEFINE_EXPORTED_bool(run_kp_kernel, false,
"It controls whether to run PaddlePaddle using KP");
/**
* Distributed related FLAG
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册