diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 42de34fb52061af23eee28377659ed4cbbb4de0a..640e2e37ad434d9e3a6f36eecd34708dceccf164 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -35,7 +35,7 @@ ELSE () ENDIF() SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") -SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701") +SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729") SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt index 1e4d813c6d3bd71f239cce9fd52549a8214915f0..08e912f52ccb570af8e1c10fc95480d479a2c6eb 100644 --- a/paddle/fluid/framework/CMakeLists.txt +++ b/paddle/fluid/framework/CMakeLists.txt @@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place) cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference) +IF(WITH_XPU) +cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto + shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ELSE() cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) +ENDIF() cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context) diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index 20cffaa9590196c5c54ae4f4448f04185ad0c276..0f7012940d76b0f2846a11710e082db22204bbb9 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -36,7 +36,8 @@ class LoDTensor; } // namespace framework } // namespace paddle #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" #endif #ifdef PADDLE_WITH_MKLDNN @@ -1254,7 +1255,8 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, #endif #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(type_, expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << type_ << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/imperative/CMakeLists.txt b/paddle/fluid/imperative/CMakeLists.txt index c9dffe2d76a436e9888b91caf10e311e5c771572..93b18e7e553b5e1d80fdd70dc9c6df02e04d0adb 100644 --- a/paddle/fluid/imperative/CMakeLists.txt +++ b/paddle/fluid/imperative/CMakeLists.txt @@ -1,6 +1,10 @@ cc_library(imperative_flag SRCS flags.cc DEPS gflags) +IF(WITH_XPU) +cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ELSE() cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) +ENDIF() cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry) add_subdirectory(jit) cc_library(amp SRCS amp_auto_cast.cc DEPS layer ) diff --git a/paddle/fluid/imperative/prepared_operator.cc b/paddle/fluid/imperative/prepared_operator.cc index f1e1f79b99ef74df77eb69d5469c73a73b81731b..619d31c4f5b257d841ea3410d4f96067b34f320c 100644 --- a/paddle/fluid/imperative/prepared_operator.cc +++ b/paddle/fluid/imperative/prepared_operator.cc @@ -17,7 +17,9 @@ #include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/imperative/infer_shape_context.h" - +#ifdef PADDLE_WITH_XPU +#include "paddle/fluid/platform/xpu/xpu_op_list.h" +#endif DECLARE_bool(check_nan_inf); namespace paddle { @@ -130,7 +132,8 @@ PreparedOp PrepareImpl(const NameVarMap& ins, auto kernel_iter = kernels.find(expected_kernel_key); #ifdef PADDLE_WITH_XPU if (kernel_iter == kernels.end() && - is_xpu_place(expected_kernel_key.place_)) { + is_xpu_place(expected_kernel_key.place_) && + !paddle::platform::is_xpu_support_op(op.Type(), expected_kernel_key)) { VLOG(3) << "missing XPU kernel: " << op.Type() << ", expected_kernel_key:" << expected_kernel_key << ", fallbacking to CPU one!"; diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 8bc9775381be572a07501bff2a60a4942048362b..bfc4a1d598200ed296bdb17e29c48bed2bca1e16 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -33,7 +33,7 @@ #include "paddle/fluid/platform/gpu_info.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #include "paddle/fluid/platform/npu_info.h" diff --git a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc index bc72b4b20d061445932d877417f02917dfd613cf..6c2fb82cb7cbe1f8600e177e1843ddc134f0c443 100644 --- a/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc +++ b/paddle/fluid/memory/allocation/naive_best_fit_allocator.cc @@ -31,7 +31,7 @@ #include "paddle/fluid/platform/cuda_device_guard.h" #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif DEFINE_bool(init_allocated_mem, false, diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index f2f8c5d1fb5551b4d41cb8d283a2f6b65e493269..3b3be9776c4c54362ed55cd1208e7d17c9e10315 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -19,7 +19,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #endif namespace paddle { diff --git a/paddle/fluid/operators/activation_op_xpu.cc b/paddle/fluid/operators/activation_op_xpu.cc index 2c7219ef6885b5d2b1aa62303feb33ca6289e254..257a91d7c15d7326bdcfe41cfd778f192a558aee 100644 --- a/paddle/fluid/operators/activation_op_xpu.cc +++ b/paddle/fluid/operators/activation_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/activation_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/concat_op_xpu.cc b/paddle/fluid/operators/concat_op_xpu.cc index be299babdba7a4f450bafdf5dce8e686f0493fce..dc9359ecf5c3d1647423a0828347f5e5bf0181d7 100644 --- a/paddle/fluid/operators/concat_op_xpu.cc +++ b/paddle/fluid/operators/concat_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/deformable_conv_op_xpu.cc b/paddle/fluid/operators/deformable_conv_op_xpu.cc index 18bab83b0edb84589b44b8abb9c784e62fc16cbf..457616756215c2afb26f3238d44774f35b433136 100644 --- a/paddle/fluid/operators/deformable_conv_op_xpu.cc +++ b/paddle/fluid/operators/deformable_conv_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include #include #include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/dropout_op_xpu.cc b/paddle/fluid/operators/dropout_op_xpu.cc index 79d239074845ad29f4f40e64a7d1ecc9f19168bb..0b0b7095bd5d163ff79733c2f99a2b93f4d811a7 100644 --- a/paddle/fluid/operators/dropout_op_xpu.cc +++ b/paddle/fluid/operators/dropout_op_xpu.cc @@ -11,7 +11,7 @@ limitations under the License. */ #include "paddle/fluid/operators/dropout_op.h" #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc index d73e46df3491be04c70a17abb9cd33b4da1dfdbd..cb75616221bc4d998b2bb0da7458e4692010ff26 100644 --- a/paddle/fluid/operators/metrics/accuracy_op_xpu.cc +++ b/paddle/fluid/operators/metrics/accuracy_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/metrics/accuracy_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc index 9cc8ac200b8eec1505177ce752ed8f103908f46a..4f98dde210f7a7b9d6fbcf6e697fbc940b45e258 100644 --- a/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/logsumexp_op_xpu.cc @@ -16,7 +16,7 @@ #include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc index a4ed0c85f4f9d9b48414f8101919abba36378bbe..ae27a5d7df473417cd8a1c48d8903dd6e887335f 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_max_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h index fa9503ec3f0aeca3960f0e8d1c98b73ef5bdc6dc..5ae60713bc912bf477ec1f918183d2317b8c7d4a 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h +++ b/paddle/fluid/operators/reduce_ops/reduce_op_xpu.h @@ -21,7 +21,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc index bf55221bd3ffdda95c964094abbe4f714fa79ca0..f759b104d01d1824713b05687dbeadf595cd4abe 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op_xpu.cc @@ -16,7 +16,7 @@ #include #include #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/rnn_op_xpu.cc b/paddle/fluid/operators/rnn_op_xpu.cc index fb82d18e62f3bfd8b100c7110b50c10ebe74ba30..9d637e1cee11763dd0af61d64102537bf0aaa371 100644 --- a/paddle/fluid/operators/rnn_op_xpu.cc +++ b/paddle/fluid/operators/rnn_op_xpu.cc @@ -14,7 +14,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/utils.h" #include "paddle/fluid/platform/device_context.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/scale_op_xpu.cc b/paddle/fluid/operators/scale_op_xpu.cc index fdb90797b69db5b4ca325eeeddf4ccc63353ce64..e0dfad91570ad689755e1a3e4a0a59fbedbede37 100644 --- a/paddle/fluid/operators/scale_op_xpu.cc +++ b/paddle/fluid/operators/scale_op_xpu.cc @@ -16,7 +16,7 @@ limitations under the License. */ #include "paddle/fluid/operators/scale_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sign_op_xpu.cc b/paddle/fluid/operators/sign_op_xpu.cc index 86fe826c659ef94a2473048f580d7cb698075522..a164a9b056677ad4c115941386ac7079582ac655 100644 --- a/paddle/fluid/operators/sign_op_xpu.cc +++ b/paddle/fluid/operators/sign_op_xpu.cc @@ -15,7 +15,7 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU #include "paddle/fluid/operators/sign_op.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/sum_op_xpu.cc b/paddle/fluid/operators/sum_op_xpu.cc index 264cc4e2cf794107bdcd717e963ea7d48c740020..d16bb5562ed3a7b2dfcca37646a0849a964dee1f 100644 --- a/paddle/fluid/operators/sum_op_xpu.cc +++ b/paddle/fluid/operators/sum_op_xpu.cc @@ -13,7 +13,7 @@ limitations under the License. */ #include "paddle/fluid/operators/sum_op.h" #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/operators/transpose_op_xpu.cc b/paddle/fluid/operators/transpose_op_xpu.cc index 2748c07f9e6d7ce9628eb9eec8447ff87e52044a..360c2125ed1f6f110b58b8febfeb775e5516b0eb 100644 --- a/paddle/fluid/operators/transpose_op_xpu.cc +++ b/paddle/fluid/operators/transpose_op_xpu.cc @@ -17,7 +17,7 @@ limitations under the License. */ #include #include #include -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" namespace paddle { namespace operators { diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 36a956762174e18ed7eef1d6e1158b82bf3ceeae..efd25bc89294097c7d60c802395d4d4d05dcab7a 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -69,7 +69,8 @@ cc_library(place SRCS place.cc DEPS enforce boost) cc_test(place_test SRCS place_test.cc DEPS place glog gflags) if(WITH_XPU) -cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_info SRCS xpu/xpu_info.cc DEPS gflags glog enforce xpulib) +cc_library(xpu_op_list SRCS xpu/xpu_op_list.cc DEPS gflags glog enforce xpulib) endif() if(WITH_ASCEND) diff --git a/paddle/fluid/platform/device_context.cc b/paddle/fluid/platform/device_context.cc index 82f14c612d1fa2bd303822fc1f787ea8863dcc30..c7162f58de284ccf20df269b26f6810ffedaf7e7 100644 --- a/paddle/fluid/platform/device_context.cc +++ b/paddle/fluid/platform/device_context.cc @@ -196,7 +196,10 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { Place CPUDeviceContext::GetPlace() const { return place_; } #ifdef PADDLE_WITH_XPU -XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } +XPUDeviceContext::XPUDeviceContext() { + context_ = xpu::create_context(); + xpu_version_ = get_xpu_version(place_.device); +} XPUDeviceContext::~XPUDeviceContext() {} diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 68589f546dc78773df715161c488b3eb0d50fc5d..abac12ff266486a74a0ee63188cbb20ed58f9bd8 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -68,8 +68,8 @@ struct GpuDevice; } // namespace Eigen #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_ASCEND_CL @@ -137,12 +137,14 @@ struct DefaultDeviceContextType { }; #ifdef PADDLE_WITH_XPU +namespace xpu = baidu::xpu::api; class XPUDeviceContext : public DeviceContext { public: XPUDeviceContext(); explicit XPUDeviceContext(XPUPlace place); virtual ~XPUDeviceContext(); Eigen::DefaultDevice* eigen_device() const { return nullptr; } + XPUVersion xpu_version() const { return xpu_version_; } Place GetPlace() const override; xpu::Context* x_context() const; @@ -159,6 +161,7 @@ class XPUDeviceContext : public DeviceContext { private: XPUPlace place_; + XPUVersion xpu_version_; xpu::Context* context_; #ifdef PADDLE_WITH_XPU_BKCL BKCLContext_t bkcl_context_; diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index ac6988d350f4f38c6e8da2a655c29069b8d0eda6..2e0ba9d241c72b2ec0e99f54b2241ba11fcc8590 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -29,8 +29,8 @@ limitations under the License. */ #include "paddle/fluid/platform/place.h" #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_header.h" -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef WITH_WIN_DUMP_DBG diff --git a/paddle/fluid/platform/xpu/xpu1_op_list.h b/paddle/fluid/platform/xpu/xpu1_op_list.h new file mode 100644 index 0000000000000000000000000000000000000000..131525718cac759f9310831e47f29caff9945f5c --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu1_op_list.h @@ -0,0 +1,230 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl1_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu1_kernels{ + {"relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"relu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sigmoid_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gelu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sqrt_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"square_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"hard_switch_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"leaky_relu_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pow", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"abs", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"affine_channel_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"assign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"batch_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"cast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"clip_by_norm", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"coalesce_tensor", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_reduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"c_allreduce_sum", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"broadcast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicaland", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logicalnot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"depthwise_conv2d_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"deformable_conv_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"dropout_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_sub_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_add_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_div_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_pow", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_floordiv", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_mul_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"elementwise_min_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"fill_constant", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gather_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"bilinear_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"nearest_interp_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"layer_norm_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"load", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"log_loss_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lookup_table_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"matmul_v2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"accuracy", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"mul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"one_hot_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rmsprop", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"lamb", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"pool2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"range", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_sum_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reduce_max_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"reshape2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"roi_align_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"scale", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"shape", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"slice_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"softmax_with_cross_entropy", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"squeeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"top_k", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"transpose2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"truncated_gaussian_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"uniform_random", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"unsqueeze2_grad", + XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}, + {"momuntem", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})} + // AddMore + }; + + return s_xpu1_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu2_op_list.h b/paddle/fluid/platform/xpu/xpu2_op_list.h new file mode 100644 index 0000000000000000000000000000000000000000..fc80e5ee962f99f65cc7d722ff50dcbc1325790a --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu2_op_list.h @@ -0,0 +1,42 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include +#include +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using vartype = paddle::framework::proto::VarType; +using pOpKernelType = paddle::framework::OpKernelType; +using XPUKernelSet = + std::unordered_set; +using XPUOpMap = std::unordered_map; + +XPUOpMap& get_kl2_ops() { + // KL1支持的op,通过op_name, data_type, place来索引 + static XPUOpMap s_xpu2_kernels{ + {"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()), + pOpKernelType(vartype::FP16, XPUPlace())})}, + // AddMore + }; + + return s_xpu2_kernels; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu_header.h b/paddle/fluid/platform/xpu/xpu_header.h similarity index 95% rename from paddle/fluid/platform/xpu_header.h rename to paddle/fluid/platform/xpu/xpu_header.h index 99f4224b5d408a6450d801ff643f658b74333387..caee41ae299c75a5019baf3411e5aef33fe46753 100644 --- a/paddle/fluid/platform/xpu_header.h +++ b/paddle/fluid/platform/xpu/xpu_header.h @@ -21,12 +21,9 @@ #include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/float16.h" -#include "xpu/api.h" -#include "xpu/refactor/fusion.h" -#include "xpu/refactor/math.h" -#include "xpu/refactor/nn.h" #include "xpu/runtime.h" #include "xpu/runtime_ex.h" +#include "xpu/xdnn.h" namespace xpu = baidu::xpu::api; diff --git a/paddle/fluid/platform/xpu_info.cc b/paddle/fluid/platform/xpu/xpu_info.cc similarity index 86% rename from paddle/fluid/platform/xpu_info.cc rename to paddle/fluid/platform/xpu/xpu_info.cc index f88248fda7e65e1b96448c0576880a18a9d8a4a9..6b8ab16b47d68c3d1cd8fb961aaf3bc6caa5b9b8 100644 --- a/paddle/fluid/platform/xpu_info.cc +++ b/paddle/fluid/platform/xpu/xpu_info.cc @@ -8,14 +8,14 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #include #include #include #include "gflags/gflags.h" #include "paddle/fluid/platform/enforce.h" -#include "paddle/fluid/platform/xpu_header.h" +#include "paddle/fluid/platform/xpu/xpu_header.h" #include "paddle/fluid/string/split.h" DEFINE_string(selected_xpus, "", @@ -103,5 +103,21 @@ void SetXPUDeviceId(int id) { ret)); } +XPUVersion get_xpu_version(int dev_id) { + uint64_t v = 0; + int ret = xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id); + PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS, + platform::errors::External( + "xpu_device_get_attr return wrong value[%d]", ret)); + + if (v == K100 || v == K200) { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU1\n"; + return XPU1; + } else { + VLOG(1) << "KUNLUN device " << dev_id << " is XPU2\n"; + return XPU2; + } +} + } // namespace platform } // namespace paddle diff --git a/paddle/fluid/platform/xpu_info.h b/paddle/fluid/platform/xpu/xpu_info.h similarity index 95% rename from paddle/fluid/platform/xpu_info.h rename to paddle/fluid/platform/xpu/xpu_info.h index 2bf7b0b5cb647aa1994f8de6d96dbb14be0edf36..3cb79d51eb7bb6857f6f3c6ccb6203b4231c8307 100644 --- a/paddle/fluid/platform/xpu_info.h +++ b/paddle/fluid/platform/xpu/xpu_info.h @@ -51,6 +51,9 @@ class XPUDeviceGuard { int prev_id_{-1}; }; +enum XPUVersion { XPU1, XPU2 }; +XPUVersion get_xpu_version(int dev_id); + } // namespace platform } // namespace paddle #endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.cc b/paddle/fluid/platform/xpu/xpu_op_list.cc new file mode 100644 index 0000000000000000000000000000000000000000..b3349407942bd17e2e4597c3a60aec833e14f839 --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/platform/xpu/xpu1_op_list.h" +#include "paddle/fluid/platform/xpu/xpu2_op_list.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_op_list.h" + +namespace paddle { +namespace platform { + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type) { + auto& ops = get_kl1_ops(); + auto v = + get_xpu_version(BOOST_GET_CONST(platform::XPUPlace, type.place_).device); + if (v == XPU2) { + ops = get_kl2_ops(); + } + + if (ops.find(op_name) != ops.end() && + ops[op_name].find(type) != ops[op_name].end()) { + return true; + } + return false; +} + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/xpu/xpu_op_list.h b/paddle/fluid/platform/xpu/xpu_op_list.h new file mode 100644 index 0000000000000000000000000000000000000000..487bc8ac48b66feefc6016632ffd5bfc0f09d56a --- /dev/null +++ b/paddle/fluid/platform/xpu/xpu_op_list.h @@ -0,0 +1,27 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_XPU +#include + +#include "paddle/fluid/framework/op_kernel_type.h" + +namespace paddle { +namespace platform { + +using pOpKernelType = paddle::framework::OpKernelType; + +bool is_xpu_support_op(std::string op_name, const pOpKernelType& type); + +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 3bbe6d6ef4b1f28bac142e4c9c9088de1f7f1810..b58e9050402bb7d584e0b5e9215a3af54718aa3b 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -117,7 +117,7 @@ limitations under the License. */ #endif #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/platform/xpu_info.h" +#include "paddle/fluid/platform/xpu/xpu_info.h" #endif #ifdef PADDLE_WITH_CRYPTO