From 2d0f3d9bec62ba480863e3fede365c7b55cce8f1 Mon Sep 17 00:00:00 2001 From: QingshuChen Date: Tue, 3 Aug 2021 15:22:35 +0800 Subject: [PATCH] support Kunlun2 (#34459) * support Kunlun2 * support KL2 * support KL2 --- cmake/external/xpu.cmake | 2 +- paddle/fluid/framework/CMakeLists.txt | 5 + paddle/fluid/framework/operator.cc | 6 +- paddle/fluid/imperative/CMakeLists.txt | 4 + paddle/fluid/imperative/prepared_operator.cc | 7 +- .../memory/allocation/allocator_facade.cc | 2 +- .../allocation/naive_best_fit_allocator.cc | 2 +- paddle/fluid/memory/memcpy.cc | 2 +- paddle/fluid/operators/activation_op_xpu.cc | 2 +- paddle/fluid/operators/concat_op_xpu.cc | 2 +- .../fluid/operators/deformable_conv_op_xpu.cc | 2 +- paddle/fluid/operators/dropout_op_xpu.cc | 2 +- .../operators/metrics/accuracy_op_xpu.cc | 2 +- .../operators/reduce_ops/logsumexp_op_xpu.cc | 2 +- .../operators/reduce_ops/reduce_max_op_xpu.cc | 2 +- .../operators/reduce_ops/reduce_op_xpu.h | 2 +- .../operators/reduce_ops/reduce_sum_op_xpu.cc | 2 +- paddle/fluid/operators/rnn_op_xpu.cc | 2 +- paddle/fluid/operators/scale_op_xpu.cc | 2 +- paddle/fluid/operators/sign_op_xpu.cc | 2 +- paddle/fluid/operators/sum_op_xpu.cc | 2 +- paddle/fluid/operators/transpose_op_xpu.cc | 2 +- paddle/fluid/platform/CMakeLists.txt | 3 +- paddle/fluid/platform/device_context.cc | 5 +- paddle/fluid/platform/device_context.h | 7 +- paddle/fluid/platform/init.cc | 4 +- paddle/fluid/platform/xpu/xpu1_op_list.h | 230 ++++++++++++++++++ paddle/fluid/platform/xpu/xpu2_op_list.h | 42 ++++ paddle/fluid/platform/{ => xpu}/xpu_header.h | 5 +- paddle/fluid/platform/{ => xpu}/xpu_info.cc | 20 +- paddle/fluid/platform/{ => xpu}/xpu_info.h | 3 + paddle/fluid/platform/xpu/xpu_op_list.cc | 39 +++ paddle/fluid/platform/xpu/xpu_op_list.h | 27 ++ paddle/fluid/pybind/pybind.cc | 2 +- 34 files changed, 410 insertions(+), 35 deletions(-) create mode 100644 paddle/fluid/platform/xpu/xpu1_op_list.h create mode 100644 paddle/fluid/platform/xpu/xpu2_op_list.h rename paddle/fluid/platform/{ => xpu}/xpu_header.h (95%) rename paddle/fluid/platform/{ => xpu}/xpu_info.cc (86%) rename paddle/fluid/platform/{ => xpu}/xpu_info.h (95%) create mode 100644 paddle/fluid/platform/xpu/xpu_op_list.cc create mode 100644 paddle/fluid/platform/xpu/xpu_op_list.h diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 42de34fb52..640e2e37ad 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -35,7 +35,7 @@ ELSE () ENDIF() SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") -SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701") +SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729") SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 1e4d813c6d..08e912f52c 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place) cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference) +IF(WITH_XPU) +cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto + shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ELSE() cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ENDIF() cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 20cffaa959..0f7012940d 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -36,7 +36,8 @@ class LoDTensor; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" #endif #ifdef PADDLE_WITH_MKLDNN @@ -1254,7 +1255,8 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, #endif #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(type_, expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << type_ << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index c9dffe2d76..93b18e7e55 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,6 +1,10 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags) +IF(WITH_XPU) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ELSE() cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ENDIF() cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry) add_subdirectory(jit) cc_library(amp SRCS amp_auto_cast.cc DEPS layer ) diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index f1e1f79b99..619d31c4f5 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -17,7 +17,9 @@ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/imperative/infer_shape_context.h" - +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu/xpu_op_list.h" +#endif DECLARE_bool(check_nan_inf); namespace paddle { @@ -130,7 +132,8 @@ PreparedOp PrepareImpl(const NameVarMap& ins, auto kernel_iter = kernels.find(expected_kernel_key); #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(op.Type(), expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << op.Type() << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 8bc9775381..bfc4a1d598 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -33,7 +33,7 @@ #include "paddle/fluid/platform/gpu_info.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #include "paddle/fluid/platform/npu_info.h" diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index bc72b4b20d..6c2fb82cb7 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -31,7 +31,7 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif DEFINE_bool(init_allocated_mem, false, diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index f2f8c5d1fb..3b3be9776c 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index 2c7219ef68..257a91d7c1 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/concat_op_xpu.cc b/paddle/fluid/operators/concat_op_xpu.cc index be299babdb..dc9359ecf5 100644 --- a/paddle/fluid/operators/concat_op_xpu.cc +++ b/paddle/fluid/operators/concat_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/deformable_conv_op_xpu.cc b/paddle/fluid/operators/deformable_conv_op_xpu.cc index 18bab83b0e..4576167562 100644 --- a/paddle/fluid/operators/deformable_conv_op_xpu.cc +++ b/paddle/fluid/operators/deformable_conv_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc index 79d2390748..0b0b7095bd 100644 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ b/paddle/fluid/operators/dropout_op_xpu.cc @@ -11,7 +11,7 @@ limitations under the License. */ #include "paddle/fluid/operators/dropout_op.h" #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc index d73e46df34..cb75616221 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/metrics/accuracy_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index 9cc8ac200b..4f98dde210 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc index a4ed0c85f4..ae27a5d7df 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index fa9503ec3f..5ae60713bc 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -21,7 +21,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc index bf55221bd3..f759b104d0 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/rnn_op_xpu.cc b/paddle/fluid/operators/rnn_op_xpu.cc index fb82d18e62..9d637e1cee 100644 --- a/paddle/fluid/operators/rnn_op_xpu.cc +++ b/paddle/fluid/operators/rnn_op_xpu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index fdb90797b6..e0dfad9157 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sign_op_xpu.cc b/paddle/fluid/operators/sign_op_xpu.cc index 86fe826c65..a164a9b056 100644 --- a/paddle/fluid/operators/sign_op_xpu.cc +++ b/paddle/fluid/operators/sign_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/sign_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index 264cc4e2cf..d16bb5562e 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -13,7 +13,7 @@ limitations under the License. */ #include "paddle/fluid/operators/sum_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op_xpu.cc b/paddle/fluid/operators/transpose_op_xpu.cc index 2748c07f9e..360c2125ed 100644 --- a/paddle/fluid/operators/transpose_op_xpu.cc +++ b/paddle/fluid/operators/transpose_op_xpu.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 36a9567621..efd25bc892 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -69,7 +69,8 @@ cc_library(place SRCS place.cc DEPS enforce boost) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) if(WITH_XPU) -cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_info SRCS xpu/xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_op_list SRCS xpu/xpu_op_list.cc DEPS gflags glog enforce xpulib) endif() if(WITH_ASCEND) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 82f14c612d..c7162f58de 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -196,7 +196,10 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { Place CPUDeviceContext::GetPlace() const { return place_; } #ifdef PADDLE_WITH_XPU -XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } +XPUDeviceContext::XPUDeviceContext() { + context_ = xpu::create_context(); + xpu_version_ = get_xpu_version(place_.device); +} XPUDeviceContext::~XPUDeviceContext() {} diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 68589f546d..abac12ff26 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -68,8 +68,8 @@ struct GpuDevice; } // namespace Eigen #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_ASCEND_CL @@ -137,12 +137,14 @@ struct DefaultDeviceContextType { }; #ifdef PADDLE_WITH_XPU +namespace xpu = baidu::xpu::api; class XPUDeviceContext : public DeviceContext { public: XPUDeviceContext(); explicit XPUDeviceContext(XPUPlace place); virtual ~XPUDeviceContext(); Eigen::DefaultDevice* eigen_device() const { return nullptr; } + XPUVersion xpu_version() const { return xpu_version_; } Place GetPlace() const override; xpu::Context* x_context() const; @@ -159,6 +161,7 @@ class XPUDeviceContext : public DeviceContext { private: XPUPlace place_; + XPUVersion xpu_version_; xpu::Context* context_; #ifdef PADDLE_WITH_XPU_BKCL BKCLContext_t bkcl_context_; diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index ac6988d350..2e0ba9d241 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -29,8 +29,8 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef WITH_WIN_DUMP_DBG diff --git a/paddle/fluid/platform/xpu/xpu1_op_list.h b/paddle/fluid/platform/xpu/xpu1_op_list.h new file mode 100644 index 0000000000..131525718c --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu1_op_list.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl1_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu1_kernels{ + {"relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"relu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pow", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"abs", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"assign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"cast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"clip_by_norm", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"coalesce_tensor", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_reduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_allreduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"broadcast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicaland", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalnot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_pow", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_floordiv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"fill_constant", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"load", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"accuracy", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rmsprop", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lamb", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"range", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"scale", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"shape", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_with_cross_entropy", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"top_k", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"truncated_gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"uniform_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"momuntem", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})} + // AddMore + }; + + return s_xpu1_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu2_op_list.h b/paddle/fluid/platform/xpu/xpu2_op_list.h new file mode 100644 index 0000000000..fc80e5ee96 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu2_op_list.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl2_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu2_kernels{ + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace())})}, + // AddMore + }; + + return s_xpu2_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu_header.h b/paddle/fluid/platform/xpu/xpu_header.h similarity index 95% rename from paddle/fluid/platform/xpu_header.h rename to paddle/fluid/platform/xpu/xpu_header.h index 99f4224b5d..caee41ae29 100644 --- a/paddle/fluid/platform/xpu_header.h +++ b/paddle/fluid/platform/xpu/xpu_header.h @@ -21,12 +21,9 @@ #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/float16.h" -#include "xpu/api.h" -#include "xpu/refactor/fusion.h" -#include "xpu/refactor/math.h" -#include "xpu/refactor/nn.h" #include "xpu/runtime.h" #include "xpu/runtime_ex.h" +#include "xpu/xdnn.h" namespace xpu = baidu::xpu::api; diff --git a/paddle/fluid/platform/xpu_info.cc b/paddle/fluid/platform/xpu/xpu_info.cc similarity index 86% rename from paddle/fluid/platform/xpu_info.cc rename to paddle/fluid/platform/xpu/xpu_info.cc index f88248fda7..6b8ab16b47 100644 --- a/paddle/fluid/platform/xpu_info.cc +++ b/paddle/fluid/platform/xpu/xpu_info.cc @@ -8,14 +8,14 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #include #include #include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #include "paddle/fluid/string/split.h" DEFINE_string(selected_xpus, "", @@ -103,5 +103,21 @@ void SetXPUDeviceId(int id) { ret)); } +XPUVersion get_xpu_version(int dev_id) { + uint64_t v = 0; + int ret = xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "xpu_device_get_attr return wrong value[%d]", ret)); + + if (v == K100 || v == K200) { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU1\n"; + return XPU1; + } else { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU2\n"; + return XPU2; + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/xpu_info.h b/paddle/fluid/platform/xpu/xpu_info.h similarity index 95% rename from paddle/fluid/platform/xpu_info.h rename to paddle/fluid/platform/xpu/xpu_info.h index 2bf7b0b5cb..3cb79d51eb 100644 --- a/paddle/fluid/platform/xpu_info.h +++ b/paddle/fluid/platform/xpu/xpu_info.h @@ -51,6 +51,9 @@ class XPUDeviceGuard { int prev_id_{-1}; }; +enum XPUVersion { XPU1, XPU2 }; +XPUVersion get_xpu_version(int dev_id); + } // namespace platform } // namespace paddle #endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.cc b/paddle/fluid/platform/xpu/xpu_op_list.cc new file mode 100644 index 0000000000..b334940794 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/platform/xpu/xpu1_op_list.h" +#include "paddle/fluid/platform/xpu/xpu2_op_list.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" + +namespace paddle { +namespace platform { + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type) { + auto& ops = get_kl1_ops(); + auto v = + get_xpu_version(BOOST_GET_CONST(platform::XPUPlace, type.place_).device); + if (v == XPU2) { + ops = get_kl2_ops(); + } + + if (ops.find(op_name) != ops.end() && + ops[op_name].find(type) != ops[op_name].end()) { + return true; + } + return false; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.h b/paddle/fluid/platform/xpu/xpu_op_list.h new file mode 100644 index 0000000000..487bc8ac48 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using pOpKernelType = paddle::framework::OpKernelType; + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type); + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 3bbe6d6ef4..b58e905040 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -117,7 +117,7 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_CRYPTO -- GitLab