From 2d0f3d9bec62ba480863e3fede365c7b55cce8f1 Mon Sep 17 00:00:00 2001 From: QingshuChen Date: Tue, 3 Aug 2021 15:22:35 +0800 Subject: [PATCH] support Kunlun2 (#34459) * support Kunlun2 * support KL2 * support KL2 --- cmake/external/xpu.cmake | 2 +- paddle/fluid/framework/CMakeLists.txt | 5 + paddle/fluid/framework/operator.cc | 6 +- paddle/fluid/imperative/CMakeLists.txt | 4 + paddle/fluid/imperative/prepared_operator.cc | 7 +- .../memory/allocation/allocator_facade.cc | 2 +- .../allocation/naive_best_fit_allocator.cc | 2 +- paddle/fluid/memory/memcpy.cc | 2 +- paddle/fluid/operators/activation_op_xpu.cc | 2 +- paddle/fluid/operators/concat_op_xpu.cc | 2 +- .../fluid/operators/deformable_conv_op_xpu.cc | 2 +- paddle/fluid/operators/dropout_op_xpu.cc | 2 +- .../operators/metrics/accuracy_op_xpu.cc | 2 +- .../operators/reduce_ops/logsumexp_op_xpu.cc | 2 +- .../operators/reduce_ops/reduce_max_op_xpu.cc | 2 +- .../operators/reduce_ops/reduce_op_xpu.h | 2 +- .../operators/reduce_ops/reduce_sum_op_xpu.cc | 2 +- paddle/fluid/operators/rnn_op_xpu.cc | 2 +- paddle/fluid/operators/scale_op_xpu.cc | 2 +- paddle/fluid/operators/sign_op_xpu.cc | 2 +- paddle/fluid/operators/sum_op_xpu.cc | 2 +- paddle/fluid/operators/transpose_op_xpu.cc | 2 +- paddle/fluid/platform/CMakeLists.txt | 3 +- paddle/fluid/platform/device_context.cc | 5 +- paddle/fluid/platform/device_context.h | 7 +- paddle/fluid/platform/init.cc | 4 +- paddle/fluid/platform/xpu/xpu1_op_list.h | 230 ++++++++++++++++++ paddle/fluid/platform/xpu/xpu2_op_list.h | 42 ++++ paddle/fluid/platform/{ => xpu}/xpu_header.h | 5 +- paddle/fluid/platform/{ => xpu}/xpu_info.cc | 20 +- paddle/fluid/platform/{ => xpu}/xpu_info.h | 3 + paddle/fluid/platform/xpu/xpu_op_list.cc | 39 +++ paddle/fluid/platform/xpu/xpu_op_list.h | 27 ++ paddle/fluid/pybind/pybind.cc | 2 +- 34 files changed, 410 insertions(+), 35 deletions(-) create mode 100644 paddle/fluid/platform/xpu/xpu1_op_list.h create mode 100644 paddle/fluid/platform/xpu/xpu2_op_list.h rename paddle/fluid/platform/{ => xpu}/xpu_header.h (95%) rename paddle/fluid/platform/{ => xpu}/xpu_info.cc (86%) rename paddle/fluid/platform/{ => xpu}/xpu_info.h (95%) create mode 100644 paddle/fluid/platform/xpu/xpu_op_list.cc create mode 100644 paddle/fluid/platform/xpu/xpu_op_list.h diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 42de34fb520..640e2e37ad4 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -35,7 +35,7 @@ ELSE () ENDIF() SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") -SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701") +SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729") SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 1e4d813c6d3..08e912f52cc 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place) cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference) +IF(WITH_XPU) +cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto + shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ELSE() cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ENDIF() cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 20cffaa9590..0f7012940d7 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -36,7 +36,8 @@ class LoDTensor; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" #endif #ifdef PADDLE_WITH_MKLDNN @@ -1254,7 +1255,8 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, #endif #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(type_, expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << type_ << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index c9dffe2d76a..93b18e7e553 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,6 +1,10 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags) +IF(WITH_XPU) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ELSE() cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ENDIF() cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry) add_subdirectory(jit) cc_library(amp SRCS amp_auto_cast.cc DEPS layer ) diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index f1e1f79b99e..619d31c4f5b 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -17,7 +17,9 @@ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/imperative/infer_shape_context.h" - +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu/xpu_op_list.h" +#endif DECLARE_bool(check_nan_inf); namespace paddle { @@ -130,7 +132,8 @@ PreparedOp PrepareImpl(const NameVarMap& ins, auto kernel_iter = kernels.find(expected_kernel_key); #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(op.Type(), expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << op.Type() << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 8bc9775381b..bfc4a1d5982 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -33,7 +33,7 @@ #include "paddle/fluid/platform/gpu_info.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #include "paddle/fluid/platform/npu_info.h" diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index bc72b4b20d0..6c2fb82cb7c 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -31,7 +31,7 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif DEFINE_bool(init_allocated_mem, false, diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index f2f8c5d1fb5..3b3be9776c4 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index 2c7219ef688..257a91d7c15 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/concat_op_xpu.cc b/paddle/fluid/operators/concat_op_xpu.cc index be299babdba..dc9359ecf5c 100644 --- a/paddle/fluid/operators/concat_op_xpu.cc +++ b/paddle/fluid/operators/concat_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/deformable_conv_op_xpu.cc b/paddle/fluid/operators/deformable_conv_op_xpu.cc index 18bab83b0ed..45761675621 100644 --- a/paddle/fluid/operators/deformable_conv_op_xpu.cc +++ b/paddle/fluid/operators/deformable_conv_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc index 79d23907484..0b0b7095bd5 100644 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ b/paddle/fluid/operators/dropout_op_xpu.cc @@ -11,7 +11,7 @@ limitations under the License. */ #include "paddle/fluid/operators/dropout_op.h" #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc index d73e46df349..cb75616221b 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/metrics/accuracy_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index 9cc8ac200b8..4f98dde210f 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc index a4ed0c85f4f..ae27a5d7df4 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index fa9503ec3f0..5ae60713bc9 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -21,7 +21,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc index bf55221bd3f..f759b104d01 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/rnn_op_xpu.cc b/paddle/fluid/operators/rnn_op_xpu.cc index fb82d18e62f..9d637e1cee1 100644 --- a/paddle/fluid/operators/rnn_op_xpu.cc +++ b/paddle/fluid/operators/rnn_op_xpu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index fdb90797b69..e0dfad91570 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sign_op_xpu.cc b/paddle/fluid/operators/sign_op_xpu.cc index 86fe826c659..a164a9b0566 100644 --- a/paddle/fluid/operators/sign_op_xpu.cc +++ b/paddle/fluid/operators/sign_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/sign_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index 264cc4e2cf7..d16bb5562ed 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -13,7 +13,7 @@ limitations under the License. */ #include "paddle/fluid/operators/sum_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op_xpu.cc b/paddle/fluid/operators/transpose_op_xpu.cc index 2748c07f9e6..360c2125ed1 100644 --- a/paddle/fluid/operators/transpose_op_xpu.cc +++ b/paddle/fluid/operators/transpose_op_xpu.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 36a95676217..efd25bc8929 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -69,7 +69,8 @@ cc_library(place SRCS place.cc DEPS enforce boost) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) if(WITH_XPU) -cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_info SRCS xpu/xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_op_list SRCS xpu/xpu_op_list.cc DEPS gflags glog enforce xpulib) endif() if(WITH_ASCEND) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 82f14c612d1..c7162f58de2 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -196,7 +196,10 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { Place CPUDeviceContext::GetPlace() const { return place_; } #ifdef PADDLE_WITH_XPU -XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } +XPUDeviceContext::XPUDeviceContext() { + context_ = xpu::create_context(); + xpu_version_ = get_xpu_version(place_.device); +} XPUDeviceContext::~XPUDeviceContext() {} diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 68589f546dc..abac12ff266 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -68,8 +68,8 @@ struct GpuDevice; } // namespace Eigen #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_ASCEND_CL @@ -137,12 +137,14 @@ struct DefaultDeviceContextType { }; #ifdef PADDLE_WITH_XPU +namespace xpu = baidu::xpu::api; class XPUDeviceContext : public DeviceContext { public: XPUDeviceContext(); explicit XPUDeviceContext(XPUPlace place); virtual ~XPUDeviceContext(); Eigen::DefaultDevice* eigen_device() const { return nullptr; } + XPUVersion xpu_version() const { return xpu_version_; } Place GetPlace() const override; xpu::Context* x_context() const; @@ -159,6 +161,7 @@ class XPUDeviceContext : public DeviceContext { private: XPUPlace place_; + XPUVersion xpu_version_; xpu::Context* context_; #ifdef PADDLE_WITH_XPU_BKCL BKCLContext_t bkcl_context_; diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index ac6988d350f..2e0ba9d241c 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -29,8 +29,8 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef WITH_WIN_DUMP_DBG diff --git a/paddle/fluid/platform/xpu/xpu1_op_list.h b/paddle/fluid/platform/xpu/xpu1_op_list.h new file mode 100644 index 00000000000..131525718ca --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu1_op_list.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl1_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu1_kernels{ + {"relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"relu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pow", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"abs", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"assign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"cast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"clip_by_norm", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"coalesce_tensor", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_reduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_allreduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"broadcast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicaland", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalnot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_pow", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_floordiv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"fill_constant", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"load", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"accuracy", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rmsprop", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lamb", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"range", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"scale", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"shape", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_with_cross_entropy", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"top_k", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"truncated_gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"uniform_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"momuntem", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})} + // AddMore + }; + + return s_xpu1_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu2_op_list.h b/paddle/fluid/platform/xpu/xpu2_op_list.h new file mode 100644 index 00000000000..fc80e5ee962 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu2_op_list.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl2_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu2_kernels{ + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace())})}, + // AddMore + }; + + return s_xpu2_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu_header.h b/paddle/fluid/platform/xpu/xpu_header.h similarity index 95% rename from paddle/fluid/platform/xpu_header.h rename to paddle/fluid/platform/xpu/xpu_header.h index 99f4224b5d4..caee41ae299 100644 --- a/paddle/fluid/platform/xpu_header.h +++ b/paddle/fluid/platform/xpu/xpu_header.h @@ -21,12 +21,9 @@ #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/float16.h" -#include "xpu/api.h" -#include "xpu/refactor/fusion.h" -#include "xpu/refactor/math.h" -#include "xpu/refactor/nn.h" #include "xpu/runtime.h" #include "xpu/runtime_ex.h" +#include "xpu/xdnn.h" namespace xpu = baidu::xpu::api; diff --git a/paddle/fluid/platform/xpu_info.cc b/paddle/fluid/platform/xpu/xpu_info.cc similarity index 86% rename from paddle/fluid/platform/xpu_info.cc rename to paddle/fluid/platform/xpu/xpu_info.cc index f88248fda7e..6b8ab16b47d 100644 --- a/paddle/fluid/platform/xpu_info.cc +++ b/paddle/fluid/platform/xpu/xpu_info.cc @@ -8,14 +8,14 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #include #include #include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #include "paddle/fluid/string/split.h" DEFINE_string(selected_xpus, "", @@ -103,5 +103,21 @@ void SetXPUDeviceId(int id) { ret)); } +XPUVersion get_xpu_version(int dev_id) { + uint64_t v = 0; + int ret = xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "xpu_device_get_attr return wrong value[%d]", ret)); + + if (v == K100 || v == K200) { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU1\n"; + return XPU1; + } else { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU2\n"; + return XPU2; + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/xpu_info.h b/paddle/fluid/platform/xpu/xpu_info.h similarity index 95% rename from paddle/fluid/platform/xpu_info.h rename to paddle/fluid/platform/xpu/xpu_info.h index 2bf7b0b5cb6..3cb79d51eb7 100644 --- a/paddle/fluid/platform/xpu_info.h +++ b/paddle/fluid/platform/xpu/xpu_info.h @@ -51,6 +51,9 @@ class XPUDeviceGuard { int prev_id_{-1}; }; +enum XPUVersion { XPU1, XPU2 }; +XPUVersion get_xpu_version(int dev_id); + } // namespace platform } // namespace paddle #endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.cc b/paddle/fluid/platform/xpu/xpu_op_list.cc new file mode 100644 index 00000000000..b3349407942 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/platform/xpu/xpu1_op_list.h" +#include "paddle/fluid/platform/xpu/xpu2_op_list.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" + +namespace paddle { +namespace platform { + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type) { + auto& ops = get_kl1_ops(); + auto v = + get_xpu_version(BOOST_GET_CONST(platform::XPUPlace, type.place_).device); + if (v == XPU2) { + ops = get_kl2_ops(); + } + + if (ops.find(op_name) != ops.end() && + ops[op_name].find(type) != ops[op_name].end()) { + return true; + } + return false; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.h b/paddle/fluid/platform/xpu/xpu_op_list.h new file mode 100644 index 00000000000..487bc8ac48b --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using pOpKernelType = paddle::framework::OpKernelType; + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type); + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 3bbe6d6ef4b..b58e9050402 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -117,7 +117,7 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_CRYPTO -- GitLab