未验证 提交 2d0f3d9b 编写于 作者: Q QingshuChen 提交者: GitHub

support Kunlun2 (#34459)

* support Kunlun2

* support KL2

* support KL2
上级 2aedf169
...@@ -35,7 +35,7 @@ ELSE () ...@@ -35,7 +35,7 @@ ELSE ()
ENDIF() ENDIF()
SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev") SET(XPU_BASE_URL_WITHOUT_DATE "https://baidu-kunlun-product.cdn.bcebos.com/KL-SDK/klsdk-dev")
SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210701") SET(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210729")
SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XRE_URL "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XDNN_URL "${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE) SET(XPU_XCCL_URL "${XPU_BASE_URL_WITHOUT_DATE}/20210623/${XPU_XCCL_DIR_NAME}.tar.gz" CACHE STRING "" FORCE)
......
...@@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place) ...@@ -188,8 +188,13 @@ cc_library(op_kernel_type SRCS op_kernel_type.cc DEPS device_context place)
cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference) cc_library(unused_var_check SRCS unused_var_check.cc DEPS glog no_need_buffer_vars_inference)
IF(WITH_XPU)
cc_library(operator SRCS operator.cc DEPS xpu_op_list op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils)
ELSE()
cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto cc_library(operator SRCS operator.cc DEPS op_info device_context tensor scope glog trainer_desc_proto data_feed_proto
shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils) shape_inference data_transform lod_tensor profiler transfer_scope_cache op_kernel_type op_call_stack unused_var_check nan_inf_utils)
ENDIF()
cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context) cc_test(operator_test SRCS operator_test.cc DEPS operator op_registry device_context)
cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context) cc_test(operator_exception_test SRCS operator_exception_test.cc DEPS operator op_registry device_context)
......
...@@ -36,7 +36,8 @@ class LoDTensor; ...@@ -36,7 +36,8 @@ class LoDTensor;
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_info.h" #include "paddle/fluid/platform/xpu/xpu_info.h"
#include "paddle/fluid/platform/xpu/xpu_op_list.h"
#endif #endif
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
...@@ -1254,7 +1255,8 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx, ...@@ -1254,7 +1255,8 @@ void OperatorWithKernel::ChooseKernel(const RuntimeContext& ctx,
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (kernel_iter == kernels.end() && if (kernel_iter == kernels.end() &&
is_xpu_place(expected_kernel_key.place_)) { is_xpu_place(expected_kernel_key.place_) &&
!paddle::platform::is_xpu_support_op(type_, expected_kernel_key)) {
VLOG(3) << "missing XPU kernel: " << type_ VLOG(3) << "missing XPU kernel: " << type_
<< ", expected_kernel_key:" << expected_kernel_key << ", expected_kernel_key:" << expected_kernel_key
<< ", fallbacking to CPU one!"; << ", fallbacking to CPU one!";
......
cc_library(imperative_flag SRCS flags.cc DEPS gflags) cc_library(imperative_flag SRCS flags.cc DEPS gflags)
IF(WITH_XPU)
cc_library(prepared_operator SRCS prepared_operator.cc DEPS xpu_op_list proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils)
ELSE()
cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils) cc_library(prepared_operator SRCS prepared_operator.cc DEPS proto_desc operator device_context lod_tensor selected_rows var_type_traits op_kernel_type data_transform nan_inf_utils)
ENDIF()
cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry) cc_library(layer SRCS layer.cc DEPS prepared_operator math_function imperative_flag variable_helper op_registry)
add_subdirectory(jit) add_subdirectory(jit)
cc_library(amp SRCS amp_auto_cast.cc DEPS layer ) cc_library(amp SRCS amp_auto_cast.cc DEPS layer )
......
...@@ -17,7 +17,9 @@ ...@@ -17,7 +17,9 @@
#include "paddle/fluid/framework/data_type_transform.h" #include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/details/nan_inf_utils.h" #include "paddle/fluid/framework/details/nan_inf_utils.h"
#include "paddle/fluid/imperative/infer_shape_context.h" #include "paddle/fluid/imperative/infer_shape_context.h"
#ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu/xpu_op_list.h"
#endif
DECLARE_bool(check_nan_inf); DECLARE_bool(check_nan_inf);
namespace paddle { namespace paddle {
...@@ -130,7 +132,8 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins, ...@@ -130,7 +132,8 @@ PreparedOp PrepareImpl(const NameVarMap<VarType>& ins,
auto kernel_iter = kernels.find(expected_kernel_key); auto kernel_iter = kernels.find(expected_kernel_key);
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
if (kernel_iter == kernels.end() && if (kernel_iter == kernels.end() &&
is_xpu_place(expected_kernel_key.place_)) { is_xpu_place(expected_kernel_key.place_) &&
!paddle::platform::is_xpu_support_op(op.Type(), expected_kernel_key)) {
VLOG(3) << "missing XPU kernel: " << op.Type() VLOG(3) << "missing XPU kernel: " << op.Type()
<< ", expected_kernel_key:" << expected_kernel_key << ", expected_kernel_key:" << expected_kernel_key
<< ", fallbacking to CPU one!"; << ", fallbacking to CPU one!";
......
...@@ -33,7 +33,7 @@ ...@@ -33,7 +33,7 @@
#include "paddle/fluid/platform/gpu_info.h" #include "paddle/fluid/platform/gpu_info.h"
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_info.h" #include "paddle/fluid/platform/xpu/xpu_info.h"
#endif #endif
#include "paddle/fluid/platform/npu_info.h" #include "paddle/fluid/platform/npu_info.h"
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "paddle/fluid/platform/cuda_device_guard.h" #include "paddle/fluid/platform/cuda_device_guard.h"
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#endif #endif
DEFINE_bool(init_allocated_mem, false, DEFINE_bool(init_allocated_mem, false,
......
...@@ -19,7 +19,7 @@ limitations under the License. */ ...@@ -19,7 +19,7 @@ limitations under the License. */
#include "paddle/fluid/platform/profiler.h" #include "paddle/fluid/platform/profiler.h"
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#endif #endif
namespace paddle { namespace paddle {
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/activation_op.h" #include "paddle/fluid/operators/activation_op.h"
#include <string> #include <string>
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include <algorithm> #include <algorithm>
#include <vector> #include <vector>
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -11,7 +11,7 @@ limitations under the License. */ ...@@ -11,7 +11,7 @@ limitations under the License. */
#include "paddle/fluid/operators/dropout_op.h" #include "paddle/fluid/operators/dropout_op.h"
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/metrics/accuracy_op.h" #include "paddle/fluid/operators/metrics/accuracy_op.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include "paddle/fluid/operators/reduce_ops/logsumexp_op.h" #include "paddle/fluid/operators/reduce_ops/logsumexp_op.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/operators/reduce_ops/reduce_op.h" #include "paddle/fluid/operators/reduce_ops/reduce_op.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
#include <memory> #include <memory>
#include <string> #include <string>
#include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h" #include "paddle/fluid/operators/reduce_ops/reduce_op_xpu.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -14,7 +14,7 @@ limitations under the License. */ ...@@ -14,7 +14,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/utils.h" #include "paddle/fluid/operators/utils.h"
#include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -16,7 +16,7 @@ limitations under the License. */ ...@@ -16,7 +16,7 @@ limitations under the License. */
#include "paddle/fluid/operators/scale_op.h" #include "paddle/fluid/operators/scale_op.h"
#include <string> #include <string>
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -15,7 +15,7 @@ limitations under the License. */ ...@@ -15,7 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/operators/sign_op.h" #include "paddle/fluid/operators/sign_op.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -13,7 +13,7 @@ limitations under the License. */ ...@@ -13,7 +13,7 @@ limitations under the License. */
#include "paddle/fluid/operators/sum_op.h" #include "paddle/fluid/operators/sum_op.h"
#include <vector> #include <vector>
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -17,7 +17,7 @@ limitations under the License. */ ...@@ -17,7 +17,7 @@ limitations under the License. */
#include <memory> #include <memory>
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
namespace paddle { namespace paddle {
namespace operators { namespace operators {
......
...@@ -69,7 +69,8 @@ cc_library(place SRCS place.cc DEPS enforce boost) ...@@ -69,7 +69,8 @@ cc_library(place SRCS place.cc DEPS enforce boost)
cc_test(place_test SRCS place_test.cc DEPS place glog gflags) cc_test(place_test SRCS place_test.cc DEPS place glog gflags)
if(WITH_XPU) if(WITH_XPU)
cc_library(xpu_info SRCS xpu_info.cc DEPS gflags glog enforce xpulib) cc_library(xpu_info SRCS xpu/xpu_info.cc DEPS gflags glog enforce xpulib)
cc_library(xpu_op_list SRCS xpu/xpu_op_list.cc DEPS gflags glog enforce xpulib)
endif() endif()
if(WITH_ASCEND) if(WITH_ASCEND)
......
...@@ -196,7 +196,10 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const { ...@@ -196,7 +196,10 @@ Eigen::DefaultDevice* CPUDeviceContext::eigen_device() const {
Place CPUDeviceContext::GetPlace() const { return place_; } Place CPUDeviceContext::GetPlace() const { return place_; }
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
XPUDeviceContext::XPUDeviceContext() { context_ = xpu::create_context(); } XPUDeviceContext::XPUDeviceContext() {
context_ = xpu::create_context();
xpu_version_ = get_xpu_version(place_.device);
}
XPUDeviceContext::~XPUDeviceContext() {} XPUDeviceContext::~XPUDeviceContext() {}
......
...@@ -68,8 +68,8 @@ struct GpuDevice; ...@@ -68,8 +68,8 @@ struct GpuDevice;
} // namespace Eigen } // namespace Eigen
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu_info.h" #include "paddle/fluid/platform/xpu/xpu_info.h"
#endif #endif
#ifdef PADDLE_WITH_ASCEND_CL #ifdef PADDLE_WITH_ASCEND_CL
...@@ -137,12 +137,14 @@ struct DefaultDeviceContextType<platform::CPUPlace> { ...@@ -137,12 +137,14 @@ struct DefaultDeviceContextType<platform::CPUPlace> {
}; };
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
namespace xpu = baidu::xpu::api;
class XPUDeviceContext : public DeviceContext { class XPUDeviceContext : public DeviceContext {
public: public:
XPUDeviceContext(); XPUDeviceContext();
explicit XPUDeviceContext(XPUPlace place); explicit XPUDeviceContext(XPUPlace place);
virtual ~XPUDeviceContext(); virtual ~XPUDeviceContext();
Eigen::DefaultDevice* eigen_device() const { return nullptr; } Eigen::DefaultDevice* eigen_device() const { return nullptr; }
XPUVersion xpu_version() const { return xpu_version_; }
Place GetPlace() const override; Place GetPlace() const override;
xpu::Context* x_context() const; xpu::Context* x_context() const;
...@@ -159,6 +161,7 @@ class XPUDeviceContext : public DeviceContext { ...@@ -159,6 +161,7 @@ class XPUDeviceContext : public DeviceContext {
private: private:
XPUPlace place_; XPUPlace place_;
XPUVersion xpu_version_;
xpu::Context* context_; xpu::Context* context_;
#ifdef PADDLE_WITH_XPU_BKCL #ifdef PADDLE_WITH_XPU_BKCL
BKCLContext_t bkcl_context_; BKCLContext_t bkcl_context_;
......
...@@ -29,8 +29,8 @@ limitations under the License. */ ...@@ -29,8 +29,8 @@ limitations under the License. */
#include "paddle/fluid/platform/place.h" #include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/platform/xpu_info.h" #include "paddle/fluid/platform/xpu/xpu_info.h"
#endif #endif
#ifdef WITH_WIN_DUMP_DBG #ifdef WITH_WIN_DUMP_DBG
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/op_kernel_type.h"
namespace paddle {
namespace platform {
using vartype = paddle::framework::proto::VarType;
using pOpKernelType = paddle::framework::OpKernelType;
using XPUKernelSet =
std::unordered_set<pOpKernelType, paddle::framework::OpKernelType::Hash>;
using XPUOpMap = std::unordered_map<std::string, XPUKernelSet>;
XPUOpMap& get_kl1_ops() {
// KL1支持的op,通过op_name, data_type, place来索引
static XPUOpMap s_xpu1_kernels{
{"relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"relu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"tanh", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"tanh_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sigmoid", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sigmoid_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"gelu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"gelu_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sqrt", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sqrt_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"square", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"square_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"hard_switch", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"hard_switch_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"leaky_relu", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"leaky_relu_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"log", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"pow", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"abs", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"affine_channel",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"affine_channel_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"assign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"batch_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"batch_norm_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"cast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"clip_by_norm",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"coalesce_tensor",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"c_reduce_sum",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"c_allreduce_sum",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"broadcast", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"concat", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"concat_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"logicalor", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"logicaland", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"logicalnot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"depthwise_conv2d",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"depthwise_conv2d_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"conv2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"conv2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"deformable_conv",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"deformable_conv_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"dropout", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"dropout_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_sub",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_sub_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_add",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_add_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_div",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_div_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_pow",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_floordiv",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_mul",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_mul_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_max",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_max_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_min",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"elementwise_min_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"fill_constant",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"gather", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"gather_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"gaussian_random",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"bilinear_interp",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"bilinear_interp_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"nearest_interp",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"nearest_interp_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"bilinear_interp_v2",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"bilinear_interp_v2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"nearest_interp_v2",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"nearest_interp_v2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"layer_norm", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"layer_norm_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"load", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"log_loss", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"log_loss_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"lookup_table_v2",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"lookup_table_v2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"matmul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"matmul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"matmul_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"matmul_v2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"mean_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"accuracy", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"mul_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"one_hot", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"one_hot_v2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sgd", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"adam", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"rmsprop", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"lamb", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"pool2d", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"pool2d_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"range", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_sum_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_mean", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"logsumexp", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_max", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reduce_max_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reshape2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"reshape2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"rnn", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"rnn_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"roi_align", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"roi_align_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"scale", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"shape", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sign", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"slice", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"slice_grad", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"softmax", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"softmax_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"softmax_with_cross_entropy",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"squeeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"squeeze_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"squeeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"squeeze2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"stack", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"sum", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"top_k", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"transpose", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"transpose_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"transpose2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"transpose2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"truncated_gaussian_random",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"uniform_random",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"unsqueeze", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"unsqueeze_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"unsqueeze2", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"unsqueeze2_grad",
XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})},
{"momuntem", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace())})}
// AddMore
};
return s_xpu1_kernels;
}
} // namespace platform
} // namespace paddle
#endif
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <string>
#include <unordered_map>
#include <unordered_set>
#include "paddle/fluid/framework/op_kernel_type.h"
namespace paddle {
namespace platform {
using vartype = paddle::framework::proto::VarType;
using pOpKernelType = paddle::framework::OpKernelType;
using XPUKernelSet =
std::unordered_set<pOpKernelType, paddle::framework::OpKernelType::Hash>;
using XPUOpMap = std::unordered_map<std::string, XPUKernelSet>;
XPUOpMap& get_kl2_ops() {
// KL1支持的op,通过op_name, data_type, place来索引
static XPUOpMap s_xpu2_kernels{
{"mul", XPUKernelSet({pOpKernelType(vartype::FP32, XPUPlace()),
pOpKernelType(vartype::FP16, XPUPlace())})},
// AddMore
};
return s_xpu2_kernels;
}
} // namespace platform
} // namespace paddle
#endif
...@@ -21,12 +21,9 @@ ...@@ -21,12 +21,9 @@
#include "paddle/fluid/platform/errors.h" #include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/float16.h" #include "paddle/fluid/platform/float16.h"
#include "xpu/api.h"
#include "xpu/refactor/fusion.h"
#include "xpu/refactor/math.h"
#include "xpu/refactor/nn.h"
#include "xpu/runtime.h" #include "xpu/runtime.h"
#include "xpu/runtime_ex.h" #include "xpu/runtime_ex.h"
#include "xpu/xdnn.h"
namespace xpu = baidu::xpu::api; namespace xpu = baidu::xpu::api;
......
...@@ -8,14 +8,14 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -8,14 +8,14 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include "paddle/fluid/platform/xpu_info.h" #include "paddle/fluid/platform/xpu/xpu_info.h"
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <string> #include <string>
#include "gflags/gflags.h" #include "gflags/gflags.h"
#include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/xpu_header.h" #include "paddle/fluid/platform/xpu/xpu_header.h"
#include "paddle/fluid/string/split.h" #include "paddle/fluid/string/split.h"
DEFINE_string(selected_xpus, "", DEFINE_string(selected_xpus, "",
...@@ -103,5 +103,21 @@ void SetXPUDeviceId(int id) { ...@@ -103,5 +103,21 @@ void SetXPUDeviceId(int id) {
ret)); ret));
} }
XPUVersion get_xpu_version(int dev_id) {
uint64_t v = 0;
int ret = xpu_device_get_attr(&v, XPUATTR_MODEL, dev_id);
PADDLE_ENFORCE_EQ(ret, XPU_SUCCESS,
platform::errors::External(
"xpu_device_get_attr return wrong value[%d]", ret));
if (v == K100 || v == K200) {
VLOG(1) << "KUNLUN device " << dev_id << " is XPU1\n";
return XPU1;
} else {
VLOG(1) << "KUNLUN device " << dev_id << " is XPU2\n";
return XPU2;
}
}
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
...@@ -51,6 +51,9 @@ class XPUDeviceGuard { ...@@ -51,6 +51,9 @@ class XPUDeviceGuard {
int prev_id_{-1}; int prev_id_{-1};
}; };
enum XPUVersion { XPU1, XPU2 };
XPUVersion get_xpu_version(int dev_id);
} // namespace platform } // namespace platform
} // namespace paddle } // namespace paddle
#endif #endif
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_XPU
#include <string>
#include "paddle/fluid/platform/xpu/xpu1_op_list.h"
#include "paddle/fluid/platform/xpu/xpu2_op_list.h"
#include "paddle/fluid/platform/xpu/xpu_info.h"
#include "paddle/fluid/platform/xpu/xpu_op_list.h"
namespace paddle {
namespace platform {
bool is_xpu_support_op(std::string op_name, const pOpKernelType& type) {
auto& ops = get_kl1_ops();
auto v =
get_xpu_version(BOOST_GET_CONST(platform::XPUPlace, type.place_).device);
if (v == XPU2) {
ops = get_kl2_ops();
}
if (ops.find(op_name) != ops.end() &&
ops[op_name].find(type) != ops[op_name].end()) {
return true;
}
return false;
}
} // namespace platform
} // namespace paddle
#endif
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_XPU
#include <string>
#include "paddle/fluid/framework/op_kernel_type.h"
namespace paddle {
namespace platform {
using pOpKernelType = paddle::framework::OpKernelType;
bool is_xpu_support_op(std::string op_name, const pOpKernelType& type);
} // namespace platform
} // namespace paddle
#endif
...@@ -117,7 +117,7 @@ limitations under the License. */ ...@@ -117,7 +117,7 @@ limitations under the License. */
#endif #endif
#ifdef PADDLE_WITH_XPU #ifdef PADDLE_WITH_XPU
#include "paddle/fluid/platform/xpu_info.h" #include "paddle/fluid/platform/xpu/xpu_info.h"
#endif #endif
#ifdef PADDLE_WITH_CRYPTO #ifdef PADDLE_WITH_CRYPTO
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册