未验证 提交 6d5d9f23 编写于 作者: H hong19860320 提交者: GitHub

[XPU] Add XPU plugin support (#55101)

* Add XPU plugin to support the customized ops or improve the performance of the fusion ops based on hand-written xpu micro kernels.

* refine README.md
上级 57cb1c99
......@@ -56,6 +56,7 @@ option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
option(WITH_XPU_XFT "Compile PaddlePaddle with BAIDU XPU-XFT" OFF)
option(WITH_XPU_PLUGIN "Compile PaddlePaddle with BAIDU XPU plugin" OFF)
option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
......
......@@ -170,6 +170,12 @@ if(WITH_XPTI)
set(XPU_XPTI_LIB "${XPU_LIB_DIR}/${XPU_XPTI_LIB_NAME}")
endif()
if(WITH_XPU_PLUGIN)
message(STATUS "Compile with XPU PLUGIN!")
add_definitions(-DPADDLE_WITH_XPU_PLUGIN)
include_directories(${CMAKE_SOURCE_DIR}/paddle/phi/kernels/xpu/plugin/include)
endif()
if(WITH_XPU_BKCL AND WITH_XPU_XFT)
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}
${XPU_XFT_LIB})
......
......@@ -86,6 +86,10 @@ endif()
if(WITH_XPU)
list(APPEND PHI_DEPS xpulib)
if(WITH_XPU_PLUGIN)
add_subdirectory(kernels/xpu/plugin)
list(APPEND PHI_DEPS xpuplugin)
endif()
endif()
set(PHI_SRCS
......
......@@ -24,6 +24,9 @@ limitations under the License. */
#include "xpu/runtime.h"
#include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"
#ifdef PADDLE_WITH_XPU_PLUGIN
#include "xpu/plugin.h"
#endif
namespace xpu = baidu::xpu::api;
......
cmake_minimum_required(VERSION 3.16)
project(xpuplugin LANGUAGES CXX)
if(NOT DEFINED BUILD_STANDALONE)
if(NOT DEFINED XPU_INC_DIR)
message(
FATAL_ERROR
"XPU_INC_DIR not set, or directory ${XPU_INC_DIR} not found, please compile with PaddlePaddle."
)
endif()
if(NOT DEFINED XPU_LIB_DIR)
message(
FATAL_ERROR
"XPU_LIB_DIR not set, or directory ${XPU_LIB_DIR} not found, please compile with PaddlePaddle."
)
endif()
set(XDNN_INC_DIR ${XPU_INC_DIR})
set(XDNN_LIB_DIR ${XPU_LIB_DIR})
set(XRE_INC_DIR ${XPU_INC_DIR})
set(XRE_LIB_DIR ${XPU_LIB_DIR})
set(XPU_DEPS xpulib) # Depends cmake/external/xpu.cmake
else()
if(NOT DEFINED XDNN_PATH)
set(XDNN_PATH $ENV{XDNN_PATH})
endif()
if(NOT DEFINED XRE_PATH)
set(XRE_PATH $ENV{XRE_PATH})
endif()
if(NOT IS_DIRECTORY ${XDNN_PATH})
message(
FATAL_ERROR
"XDNN_PATH not set, or directory ${XDNN_PATH} not found, please export XDNN_PATH=<path_to_xdnn>."
)
endif()
if(NOT IS_DIRECTORY ${XRE_PATH})
message(
FATAL_ERROR
"XRE_PATH not set, or directory ${XRE_PATH} not found, please export XRE_PATH=<path_to_xre>."
)
endif()
set(XDNN_INC_DIR ${XDNN_PATH}/include)
set(XDNN_LIB_DIR ${XDNN_PATH}/so)
set(XRE_INC_DIR ${XRE_PATH}/include)
set(XRE_LIB_DIR ${XRE_PATH}/so)
endif()
if(NOT DEFINED CLANG_PATH)
set(CLANG_PATH $ENV{CLANG_PATH})
endif()
if(NOT IS_DIRECTORY ${CLANG_PATH})
message(
FATAL_ERROR
"Directory ${CLANG_PATH} not found, please export CLANG_PATH=<path_to_xtdk>."
)
endif()
message(STATUS "Build with CLANG_PATH=" ${CLANG_PATH})
set(XPU_CLANG ${CLANG_PATH}/bin/clang++)
message(STATUS "Build with XPU_CLANG=" ${XPU_CLANG})
if(NOT DEFINED HOST_SYSROOT)
set(HOST_SYSROOT $ENV{HOST_SYSROOT})
endif()
if(NOT HOST_SYSROOT)
set(HOST_SYSROOT /opt/compiler/gcc-8.2)
endif()
if(NOT IS_DIRECTORY ${HOST_SYSROOT})
message(
FATAL_ERROR
"Directory ${HOST_SYSROOT} not found, please export HOST_SYSROOT=<path_to_gcc>."
)
endif()
if(NOT DEFINED HOST_ARCH)
set(HOST_ARCH $ENV{HOST_ARCH})
endif()
if(NOT HOST_ARCH)
set(HOST_ARCH x86_64-baidu-linux-gnu)
endif()
if(NOT DEFINED TARGET_ARCH)
set(TARGET_ARCH $ENV{TARGET_ARCH})
endif()
if(NOT TARGET_ARCH)
set(TARGET_ARCH x86_64-baidu-linux-gnu)
endif()
if(NOT DEFINED TOOLCHAIN_ARGS)
set(TOOLCHAIN_ARGS $ENV{TOOLCHAIN_ARGS})
endif()
set(TOOLCHAIN_ARGS -isystem ${HOST_SYSROOT}/include/c++/8.2.0 -isystem
/usr/include/ -isystem /usr/include/x86_64-linux-gnu)
if(HOST_ARCH MATCHES "x86_64")
if(TARGET_ARCH MATCHES "x86_64")
if(EXISTS ${HOST_SYSROOT}/bin/g++)
set(HOST_CXX ${HOST_SYSROOT}/bin/g++)
if(NOT EXISTS ${HOST_SYSROOT}/bin/ar)
# try gcc-ar
set(HOST_AR ${HOST_SYSROOT}/bin/gcc-ar)
endif()
else()
set(HOST_CXX /usr/bin/g++)
set(HOST_AR /usr/bin/ar)
endif()
endif()
if(TARGET_ARCH MATCHES "aarch64")
set(TOOLCHAIN_ARGS --gcc-toolchain=${HOST_SYSROOT})
set(HOST_SYSROOT ${HOST_SYSROOT}/aarch64-linux-gnu/libc)
set(HOST_CXX ${CMAKE_CXX_COMPILER})
set(HOST_AR ${CMAKE_AR})
endif()
endif()
if(HOST_ARCH MATCHES "aarch64")
if(TARGET_ARCH MATCHES "aarch64")
if(EXISTS ${HOST_SYSROOT}/bin/g++)
set(HOST_CXX ${HOST_SYSROOT}/bin/g++)
set(HOST_AR ${HOST_SYSROOT}/bin/ar)
else()
set(HOST_CXX /usr/bin/g++)
set(HOST_AR /usr/bin/ar)
endif()
endif()
endif()
set(OPT_LEVEL "-O2")
message(STATUS "Build with TARGET_ARCH=" ${TARGET_ARCH})
message(STATUS "Build with TOOLCHAIN_ARGS=" ${TOOLCHAIN_ARGS})
message(STATUS "Build with HOST_SYSROOT=" ${HOST_SYSROOT})
message(STATUS "Build with HOST_CXX=" ${HOST_CXX})
message(STATUS "Build with HOST_AR=" ${HOST_AR})
# compile xpu kernel macro function
macro(
compile_kernel
kernel_path
kernel_name
xpu_n
rule
device_o_extra_flags
host_o_extra_flags
xpu_n_macro)
set(arg_rule ${rule})
separate_arguments(arg_rule)
set(arg_device_o_extra_flags ${device_o_extra_flags})
separate_arguments(arg_device_o_extra_flags)
set(arg_host_o_extra_flags ${host_o_extra_flags})
separate_arguments(arg_host_o_extra_flags)
add_custom_command(
OUTPUT ${kernel_name}.device.bin.o ${kernel_name}.o
COMMAND
${XPU_CLANG} -std=c++11 ${OPT_LEVEL} ${arg_device_o_extra_flags} -c
${kernel_path} -D ${xpu_n_macro} --target=${TARGET_ARCH} ${HOST_XPU_FLAGS}
--basename ${kernel_name} -fno-builtin --xpu-arch=${xpu_n} -fPIC
-Wno-int-to-void-pointer-cast -Wno-int-to-pointer-cast -Werror -mllvm
--xpu-inline-cost -mllvm --xpu-inline-hot-call
-I${CMAKE_CURRENT_SOURCE_DIR}/include -I${CMAKE_CURRENT_SOURCE_DIR}/src
-I${CMAKE_CURRENT_SOURCE_DIR}/src/kernel
-I${CMAKE_CURRENT_SOURCE_DIR}/src/kernel/include ${arg_rule}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${kernel_path}
COMMENT ${kernel_name}.device.bin.o ${kernel_name}.o
VERBATIM)
list(APPEND xpuplugin_kernels_depends ${kernel_name}.device.bin.o
${kernel_name}.o)
endmacro()
macro(
__compile_kernel_with_rules
kernel_path
kernel_name
xpu_n
rules_path
device_o_extra_flags
host_o_extra_flags
xpu_n_macro)
file(STRINGS ${rules_path} rules)
foreach(rule IN LISTS rules)
message(STATUS " Instantiate with '${rule}'")
execute_process(
COMMAND bash "-c" "echo -n ${rule} | md5sum | cut -c1-6"
OUTPUT_VARIABLE rule_md5
OUTPUT_STRIP_TRAILING_WHITESPACE)
set(kernel_name_md5 ${kernel_name}_${rule_md5})
compile_kernel(
${kernel_path}
${kernel_name_md5}
${xpu_n}
${rule}
${device_o_extra_flags}
${host_o_extra_flags}
${xpu_n_macro})
endforeach()
endmacro()
macro(
compile_kernel_with_rules
kernel_path
kernel_name
xpu_n
rules_path
device_o_extra_flags
host_o_extra_flags
xpu_n_macro)
# reconfigure if file |rules_path| was modified
set_property(
DIRECTORY
APPEND
PROPERTY CMAKE_CONFIGURE_DEPENDS ${rules_path})
__compile_kernel_with_rules(
${kernel_path}
${kernel_name}
${xpu_n}
${rules_path}
${device_o_extra_flags}
${host_o_extra_flags}
${xpu_n_macro})
endmacro()
macro(search_and_compile_kernel xpu_n)
if(${xpu_n} STREQUAL "xpu1")
set(XPU_DEVICE_O_EXTRA_FLAGS " ")
set(XPU_HOST_O_EXTRA_FLAGS " ")
set(XPU_KERNEL_PATH "src/kernel/cpp/*.xpu")
set(xpu_n_macro "__XPU1__")
elseif(${xpu_n} STREQUAL "xpu2")
set(XPU_DEVICE_O_EXTRA_FLAGS "--xpu-arch=xpu2")
set(XPU_HOST_O_EXTRA_FLAGS "--xpu-arch=xpu2")
set(XPU_KERNEL_PATH "src/kernel/kunlun2cpp/*.xpu")
set(xpu_n_macro "__XPU2__")
elseif(${xpu_n} STREQUAL "xpu3")
set(XPU_DEVICE_O_EXTRA_FLAGS "--xpu-arch=xpu3")
set(XPU_HOST_O_EXTRA_FLAGS "--xpu-arch=xpu3")
set(XPU_KERNEL_PATH "src/kernel/kunlun3cpp/*.xpu")
set(xpu_n_macro "__XPU3__")
else()
message(FATAL_ERROR "Are you sure? ${xpu_n}")
endif()
file(GLOB_RECURSE xpu_kernels ${XPU_KERNEL_PATH})
list(LENGTH xpu_kernels xpu_kernels_num)
message(STATUS "Found ${xpu_kernels_num} ${xpu_n} kernels")
foreach(xpu_kernel IN LISTS xpu_kernels)
message(STATUS "Process ${xpu_kernel}")
get_filename_component(kernel_name ${xpu_kernel} NAME_WE)
get_filename_component(kernel_dir ${xpu_kernel} DIRECTORY)
set(kernel_rules ${kernel_dir}/${kernel_name}.rules)
set(kernel_name ${xpu_n}_${kernel_name})
if(EXISTS ${kernel_rules})
compile_kernel_with_rules(
${xpu_kernel}
${kernel_name}
${xpu_n}
${kernel_rules}
${XPU_DEVICE_O_EXTRA_FLAGS}
${XPU_HOST_O_EXTRA_FLAGS}
${xpu_n_macro})
else()
compile_kernel(
${xpu_kernel}
${kernel_name}
${xpu_n}
" "
${XPU_DEVICE_O_EXTRA_FLAGS}
${XPU_HOST_O_EXTRA_FLAGS}
${xpu_n_macro})
endif()
endforeach()
endmacro()
# compile xpu kernels
search_and_compile_kernel("xpu1")
search_and_compile_kernel("xpu2")
search_and_compile_kernel("xpu3")
# compile xpu wrappers
file(GLOB_RECURSE xpu_wrappers src/wrapper/*.cpp)
list(LENGTH xpu_wrappers xpu_wrappers_num)
message(STATUS "Found ${xpu_wrappers_num} XPU wrappers")
foreach(xpu_wrapper IN LISTS xpu_wrappers)
message(STATUS "Process ${xpu_wrapper}")
get_filename_component(wrapper_name ${xpu_wrapper} NAME_WE)
set(wrapper_target ${wrapper_name}_wrapper)
add_custom_target(
${wrapper_target}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS wrapper_build/${wrapper_name}.wrapper.d
wrapper_build/${wrapper_name}.wrapper.o
COMMENT ${wrapper_target}
VERBATIM)
add_custom_command(
OUTPUT wrapper_build/${wrapper_name}.wrapper.d
COMMAND ${CMAKE_COMMAND} -E make_directory wrapper_build
COMMAND
${XPU_CLANG} -M -MQ wrapper_build/${wrapper_name}.wrapper.o -MF
wrapper_build/${wrapper_name}.wrapper.d -std=c++11 -x xpu -c
${xpu_wrapper} -I${XDNN_INC_DIR} -I${XRE_INC_DIR}
-I${CMAKE_CURRENT_SOURCE_DIR}/include -I${CMAKE_CURRENT_SOURCE_DIR}/src
-I${CMAKE_CURRENT_SOURCE_DIR}/src/wrapper -D_GNU_SOURCE
-D__STDC_LIMIT_MACROS -DNDEBUG --sysroot=${HOST_SYSROOT} ${TOOLCHAIN_ARGS}
--target=${TARGET_ARCH} -fPIC -Werror -Wreorder -fvisibility=hidden
--xpu-host-only ${XPU_MF_FLAGS}
COMMAND
${CMAKE_COMMAND} -E cmake_depends "Unix Makefiles" ${CMAKE_SOURCE_DIR}
${CMAKE_SOURCE_DIR} ${CMAKE_BINARY_DIR} ${CMAKE_BINARY_DIR}
${CMAKE_BINARY_DIR}/CMakeFiles/${wrapper_target}.dir/DependInfo.cmake
--color=$(COLOR)
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpu_wrapper} ${XPU_DEPS}
COMMENT wrapper_build/${wrapper_name}.wrapper.d
VERBATIM)
add_custom_command(
OUTPUT wrapper_build/${wrapper_name}.wrapper.o
COMMAND ${CMAKE_COMMAND} -E make_directory wrapper_build
COMMAND
${XPU_CLANG} -std=c++11 ${EXTRA_FLAGS} ${OPT_LEVEL} -x xpu -c
${xpu_wrapper} -o wrapper_build/${wrapper_name}.wrapper.o
-I${XDNN_INC_DIR} -I${XRE_INC_DIR} -I${CMAKE_CURRENT_SOURCE_DIR}/include
-I${CMAKE_CURRENT_SOURCE_DIR}/src
-I${CMAKE_CURRENT_SOURCE_DIR}/src/wrapper -D_GNU_SOURCE
-D__STDC_LIMIT_MACROS -DNDEBUG --sysroot=${HOST_SYSROOT} ${TOOLCHAIN_ARGS}
--target=${TARGET_ARCH} -fPIC -Wunused-variable -Werror -Wreorder
-fvisibility=hidden --xpu-host-only ${HOST_XPU_FLAGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS wrapper_build/${wrapper_name}.wrapper.d
COMMENT wrapper_build/${wrapper_name}.wrapper.o
VERBATIM)
list(APPEND xpuplugin_wrapper_depends wrapper_build/${wrapper_name}.wrapper.o)
endforeach()
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a
COMMAND ${HOST_AR} rcs ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a
${xpuplugin_kernels_depends} ${xpuplugin_wrapper_depends}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpuplugin_kernels_depends} ${xpuplugin_wrapper_depends}
COMMENT ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a
VERBATIM)
add_custom_target(
xpuplugin_a
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpuplugin_kernels_depends} ${xpuplugin_wrapper_depends}
${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a
COMMENT xpuplugin_a
VERBATIM)
add_custom_target(
xpuplugin_so ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS xpuplugin_a ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.so
COMMENT xpuplugin_so)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.so
COMMAND
${HOST_CXX} -shared -o ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.so -Xlinker
\"-\(\" -Wl,--whole-archive ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a
-Wl,--no-whole-archive -L${XDNN_LIB_DIR} -L${XRE_LIB_DIR} -lxpurt -lxpuapi
-Wl,--no-undefined -Wl,-soname,libxpuplugin.so -lstdc++ -ldl -lm -lpthread
-specs=${CMAKE_CURRENT_SOURCE_DIR}/src/linker.specs -Xlinker \"-\)\"\;
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a
COMMENT ${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.so)
if(NOT DEFINED BUILD_STANDALONE)
add_library(xpuplugin STATIC IMPORTED GLOBAL)
add_dependencies(xpuplugin xpuplugin_a)
set_target_properties(
xpuplugin PROPERTIES IMPORTED_LOCATION
${CMAKE_CURRENT_BINARY_DIR}/libxpuplugin.a)
endif()
# XPU PLUGIN
## Standalone build and test.
```
$ cd plugin
Modify ./build.sh to set the path of XDNN, XRE and XTDK.
$ ./build.sh
$ cd example
Modify ./example/build.sh to set the path of XDNN and XRE.
$ ./build.sh
$ ./run.sh
```
## Build with PaddlePaddle.
### Copy to the source code of PaddlePaddle.
```
$ cp -rf plugin <path_to_paddle_source_code>/paddle/phi/xpu
```
### Add -DWITH_XPU_PLUGIN=ON as extra cmake arguments.
```
$ cmake .. <other_cmake_args> -DWITH_XPU_PLUGIN=ON
```
#!/bin/bash
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
export XDNN_PATH=/opt/xdnn # <path_to_xdnn>
export XRE_PATH=/opt/xre # <path_to_xre>
export CLANG_PATH=/opt/xtdk # <path_to_xtdk>
export HOST_SYSROOT=/opt/compiler/gcc-8.2 # <path_to_gcc>
rm -rf build
mkdir build
cd build
cmake -DCMAKE_VERBOSE_MAKEFILE=ON -DBUILD_STANDALONE=ON ..
make
cmake_minimum_required(VERSION 3.16)
project(example LANGUAGES CXX)
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR x86_64)
set(CMAKE_C_COMPILER "gcc")
set(CMAKE_CXX_COMPILER "g++")
if(NOT DEFINED XDNN_PATH)
set(XDNN_PATH $ENV{XDNN_PATH})
endif()
if(NOT DEFINED XRE_PATH)
set(XRE_PATH $ENV{XRE_PATH})
endif()
if(NOT IS_DIRECTORY ${XDNN_PATH})
message(
FATAL_ERROR
"XDNN_PATH not set, or directory ${XDNN_PATH} not found, please export XDNN_PATH=<path_to_xdnn>."
)
endif()
if(NOT IS_DIRECTORY ${XRE_PATH})
message(
FATAL_ERROR
"XRE_PATH not set, or directory ${XRE_PATH} not found, please export XRE_PATH=<path_to_xre>."
)
endif()
set(XDNN_INC_DIR ${XDNN_PATH}/include)
set(XDNN_LIB_DIR ${XDNN_PATH}/so)
set(XRE_INC_DIR ${XRE_PATH}/include)
set(XRE_LIB_DIR ${XRE_PATH}/so)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wl,--allow-shlib-undefined")
include_directories(${XDNN_INC_DIR})
include_directories(${XRE_INC_DIR})
link_directories(${XDNN_LIB_DIR})
link_directories(${XRE_LIB_DIR})
set(DEPS ${DEPS} xpurt xpuapi)
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../include)
if(NOT DEFINED LINK_TYPE)
set(LINK_TYPE $ENV{LINK_TYPE})
endif()
if(LINK_TYPE STREQUAL "static")
set(DEPS ${DEPS} ${CMAKE_CURRENT_SOURCE_DIR}/../build/libxpuplugin.a)
elseif(LINK_TYPE STREQUAL "shared")
link_directories(${CMAKE_CURRENT_SOURCE_DIR}/../build)
set(DEPS ${DEPS} xpuplugin)
else()
message(
FATAL_ERROR
"Unknown LINK_TYPE ${LINK_TYPE}, only supports static or shared.")
return()
endif()
add_executable(example example.cc)
target_link_libraries(example ${DEPS})
#!/bin/bash
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
export XDNN_PATH=/opt/xdnn # <path_to_xdnn>
export XRE_PATH=/opt/xre # <path_to_xre>
export LINK_TYPE=static # shared/static
rm -rf build
mkdir build
cd build
cmake -DCMAKE_VERBOSE_MAKEFILE=ON ..
make
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* copyright (C) 2022 KUNLUNXIN, Inc
*/
#include <assert.h>
#include "xpu/plugin.h"
#include "xpu/xdnn.h"
namespace xdnn = baidu::xpu::api;
int main() {
int num = 5;
int errcode = 0;
auto ctx = xdnn::create_context();
float* A = nullptr;
errcode = xpu_malloc(reinterpret_cast<void**>(&A), num * sizeof(float));
assert(errcode == 0);
float* B = nullptr;
errcode = xpu_malloc(reinterpret_cast<void**>(&B), num * sizeof(float));
assert(errcode == 0);
std::vector<float> A_cpu = {1, 2, 3, 4, 5};
std::vector<float> B_cpu(num, 0.0f);
std::vector<float> B_ref = {3, 4, 5, 6, 7};
xpu_memcpy(reinterpret_cast<void*>(A),
reinterpret_cast<void*>(&(A_cpu[0])),
num * sizeof(float),
XPUMemcpyKind::XPU_HOST_TO_DEVICE);
errcode = xdnn::plugin::add2(ctx, A, B, num);
assert(errcode == 0);
xpu_memcpy(reinterpret_cast<void*>(&(B_cpu[0])),
reinterpret_cast<void*>(B),
num * sizeof(float),
XPUMemcpyKind::XPU_DEVICE_TO_HOST);
printf("A(%p):\n", A);
for (size_t i = 0; i < num; i++) {
printf("%f ", A_cpu[i]);
}
printf("\nB(%p):\n", B);
for (size_t i = 0; i < num; i++) {
printf("%f ", B_cpu[i]);
}
bool pass = true;
for (size_t i = 0; i < num; i++) {
if (fabs(B_cpu[i] - B_ref[i]) > 1e-5f) {
pass = false;
break;
}
}
printf("\nCheck %s! \n", pass ? "pass" : "fail");
destroy_context(ctx);
errcode = xpu_free(A);
assert(errcode == 0);
errcode = xpu_free(B);
assert(errcode == 0);
return 0;
}
#!/bin/bash
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e
XDNN_PATH=/opt/xdnn # <path_to_xdnn>
XRE_PATH=/opt/xre # <path_to_xre>
:<<!
export GLOG_v=0
export XPU_VISIBLE_DEVICES=0;
export XPUAPI_DEBUG=1;
export LD_LIBRARY_PATH=$XDNN_PATH/so:$XRE_PATH/so:$LD_LIBRARY_PATH
chmod +x ./build/example
./build/example
!
#:<<!
SSH_IP_ADDR=localhost
SSH_PORT=9031
SSH_USR_ID=root
SSH_USR_PWD=root
WORK_SPACE="/var/tmp/example"
EXPORT_ENVIRONMENT_VARIABLES="export GLOG_v=0;export XPU_VISIBLE_DEVICES=0;export XPUAPI_DEBUG=1;"
EXPORT_ENVIRONMENT_VARIABLES="${EXPORT_ENVIRONMENT_VARIABLES}export LD_LIBRARY_PATH=.:\$LD_LIBRARY_PATH;"
sshpass -p $SSH_USR_PWD ssh -v -o ConnectTimeout=60 -o StrictHostKeyChecking=no -p $SSH_PORT $SSH_USR_ID@$SSH_IP_ADDR "rm -rf $WORK_SPACE"
sshpass -p $SSH_USR_PWD ssh -v -o ConnectTimeout=60 -o StrictHostKeyChecking=no -p $SSH_PORT $SSH_USR_ID@$SSH_IP_ADDR "mkdir -p $WORK_SPACE"
sshpass -p $SSH_USR_PWD scp -v -r -o ConnectTimeout=60 -o StrictHostKeyChecking=no -P $SSH_PORT $XDNN_PATH/so/* $SSH_USR_ID@$SSH_IP_ADDR:$WORK_SPACE
sshpass -p $SSH_USR_PWD scp -v -r -o ConnectTimeout=60 -o StrictHostKeyChecking=no -P $SSH_PORT $XRE_PATH/so/* $SSH_USR_ID@$SSH_IP_ADDR:$WORK_SPACE
sshpass -p $SSH_USR_PWD scp -v -r -o ConnectTimeout=60 -o StrictHostKeyChecking=no -P $SSH_PORT ../build/libxpuplugin.so $SSH_USR_ID@$SSH_IP_ADDR:$WORK_SPACE
sshpass -p $SSH_USR_PWD scp -v -r -o ConnectTimeout=60 -o StrictHostKeyChecking=no -P $SSH_PORT build/example $SSH_USR_ID@$SSH_IP_ADDR:$WORK_SPACE
sshpass -p $SSH_USR_PWD ssh -v -o ConnectTimeout=60 -o StrictHostKeyChecking=no -p $SSH_PORT $SSH_USR_ID@$SSH_IP_ADDR "cd $WORK_SPACE; ${EXPORT_ENVIRONMENT_VARIABLES} chmod +x ./example; ./example"
#!
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* copyright (C) 2022 KUNLUNXIN, Inc
*/
#pragma once
#include "xpu/xdnn.h"
namespace baidu {
namespace xpu {
namespace api {
namespace plugin {
DLL_EXPORT int add2(Context* ctx, const float* x, float* y, int len);
} // namespace plugin
} // namespace api
} // namespace xpu
} // namespace baidu
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* copyright (C) 2022 KUNLUNXIN, Inc
*/
#include "xpu/kernel/xtdk.h"
#include "xpu/kernel/xtdk_math.h"
#include "xpu/kernel/xtdk_simd.h"
namespace xpu2 {
namespace plugin {
__global__ void add1(const float* x, float* y, int len) {
int cid = core_id();
int ncores = core_num();
if (cid >= ncores) {
return;
}
int thread_id = ncores * cluster_id() + cid;
int nthreads = ncores * cluster_num();
const int buf_size = 128;
__simd__ float local_x[buf_size];
__simd__ float local_y[buf_size];
float32x16_t v_x;
float32x16_t v_y;
int len_per_loop = 128;
for (int i = thread_id * len_per_loop; i < len;
i += nthreads * len_per_loop) {
int read_len = min(len_per_loop, len - i);
GM2LM(x + i, local_x, read_len * sizeof(float));
for (int k = 0; k < read_len; k += 16) {
v_x = vload_lm_float32x16(local_x + k);
v_y = svadd_float32x16(1.0f, v_x);
vstore_lm_float32x16((local_y + k), v_y);
}
mfence();
LM2GM(local_y, y + i, read_len * sizeof(float));
}
}
} // namespace plugin
} // namespace xpu2
# overwrite incorrect rpath arguments
# its original value is:
# -rpath $ORIGIN:$ORIGIN/lib:$ORIGIN/lib64:$ORIGIN/../lib:$ORIGIN/../lib64:/opt/compiler/gcc-4.8.2/lib:/opt/compiler/gcc-4.8.2/lib64
# specify your own rpath if needed.
*linker:
collect2 -rpath $ORIGIN
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
* copyright (C) 2022 KUNLUNXIN, Inc
*/
#include "xpu/plugin.h"
#include "xpu/refactor/impl_public/wrapper_check.h"
namespace xpu2 {
namespace plugin {
__attribute__((global)) void add1(const float* x, float* y, int len);
}
} // namespace xpu2
namespace baidu {
namespace xpu {
namespace api {
namespace plugin {
static int cpu_wrapper(Context* ctx, const float* x, float* y, int len) {
for (int i = 0; i < len; i++) {
y[i] = x[i] + 2.0f;
}
return SUCCESS;
}
static int xpu2_wrapper(Context* ctx, const float* x, float* y, int len) {
ctx_guard RAII_GUARD(ctx);
float* tensor_one = RAII_GUARD.alloc<float>(len);
WRAPPER_ASSERT_WORKSPACE(ctx, tensor_one);
int ret = constant<float>(ctx, tensor_one, len, 1.0f);
WRAPPER_ASSERT_SUCCESS(ctx, ret);
ret = add<float>(ctx, x, tensor_one, y, len);
WRAPPER_ASSERT_SUCCESS(ctx, ret);
xpu2::plugin::add1<<<ctx->ncluster(), 64, ctx->xpu_stream>>>(y, y, len);
return api::SUCCESS;
}
int add2(Context* ctx, const float* x, float* y, int len) {
WRAPPER_CHECK_CTX(ctx);
WRAPPER_DUMP_FUNCTION_T1(ctx, "add2", float);
WRAPPER_DUMP_PARAM3(ctx, x, y, len);
WRAPPER_DUMP(ctx);
WRAPPER_ASSERT_GT(ctx, len, 0);
WRAPPER_CHECK_2PTRS(ctx, float, len, x, y);
if (ctx->dev().type() == api::kCPU) {
return cpu_wrapper(ctx, x, y, len);
}
if (ctx->dev().type() == api::kXPU2) {
return xpu2_wrapper(ctx, x, y, len);
}
return NOT_IMPLEMENT;
}
} // namespace plugin
} // namespace api
} // namespace xpu
} // namespace baidu
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册