From c9a3c6696dec5aaefb5952bb6238035c5ccb4fef Mon Sep 17 00:00:00 2001 From: jianghaicheng Date: Tue, 7 Dec 2021 10:32:49 +0800 Subject: [PATCH] add ipu device p1 (#37841) --- paddle/fluid/framework/garbage_collector.cc | 9 +++++ paddle/fluid/framework/garbage_collector.h | 10 +++++ paddle/fluid/framework/library_type.h | 4 +- paddle/fluid/memory/allocation/CMakeLists.txt | 2 + .../memory/allocation/allocator_facade.cc | 38 +++++++++++++++++++ paddle/fluid/memory/memcpy.cc | 26 +++++++++++++ paddle/fluid/platform/CMakeLists.txt | 8 +++- .../fluid/platform/device/ipu/CMakeLists.txt | 20 ++++++---- paddle/fluid/platform/device/ipu/device.cc | 2 +- paddle/fluid/platform/device/ipu/ipu_info.cc | 32 ++++++++++++++++ paddle/fluid/platform/device/ipu/ipu_info.h | 24 ++++++++++++ .../platform/device/ipu/ipu_optimizer.cc | 2 +- paddle/fluid/platform/device_context.h | 32 +++++++++++++++- paddle/fluid/platform/init.cc | 16 ++++++++ paddle/fluid/pybind/CMakeLists.txt | 3 ++ 15 files changed, 214 insertions(+), 14 deletions(-) create mode 100644 paddle/fluid/platform/device/ipu/ipu_info.cc create mode 100644 paddle/fluid/platform/device/ipu/ipu_info.h diff --git a/paddle/fluid/framework/garbage_collector.cc b/paddle/fluid/framework/garbage_collector.cc index 8b6a5747dbf..06d1ef84c19 100644 --- a/paddle/fluid/framework/garbage_collector.cc +++ b/paddle/fluid/framework/garbage_collector.cc @@ -53,6 +53,15 @@ void XPUGarbageCollector::ClearCallback(const std::function &callback) { } #endif +#ifdef PADDLE_WITH_IPU +IPUGarbageCollector::IPUGarbageCollector(const platform::IPUPlace &place, + size_t max_memory_size) + : GarbageCollector(place, max_memory_size) {} +void IPUGarbageCollector::ClearCallback(const std::function &callback) { + callback(); +} +#endif + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) UnsafeFastGPUGarbageCollector::UnsafeFastGPUGarbageCollector( const platform::CUDAPlace &place, size_t max_memory_size) diff --git a/paddle/fluid/framework/garbage_collector.h b/paddle/fluid/framework/garbage_collector.h index 2c2b57bbe42..0cfeda37c22 100644 --- a/paddle/fluid/framework/garbage_collector.h +++ b/paddle/fluid/framework/garbage_collector.h @@ -80,6 +80,16 @@ class XPUGarbageCollector : public GarbageCollector { }; #endif +#ifdef PADDLE_WITH_IPU +class IPUGarbageCollector : public GarbageCollector { + public: + IPUGarbageCollector(const platform::IPUPlace &place, size_t max_memory_size); + + protected: + void ClearCallback(const std::function &callback) override; +}; +#endif + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) class UnsafeFastGPUGarbageCollector : public GarbageCollector { public: diff --git a/paddle/fluid/framework/library_type.h b/paddle/fluid/framework/library_type.h index 8fe314cf5f1..f7539aa4859 100644 --- a/paddle/fluid/framework/library_type.h +++ b/paddle/fluid/framework/library_type.h @@ -61,6 +61,8 @@ inline LibraryType StringToLibraryType(const char* ctype) { return LibraryType::kPlain; } else if (s == std::string("XPU")) { return LibraryType::kPlain; + } else if (s == std::string("IPU")) { + return LibraryType::kPlain; } else if (s == std::string("NPU")) { return LibraryType::kPlain; } else if (s == std::string("CUDA")) { @@ -68,7 +70,7 @@ inline LibraryType StringToLibraryType(const char* ctype) { } else { PADDLE_THROW(platform::errors::Unimplemented( "Unknown LibraryType string (%s), only support library type string " - "include PLAIN, MKLDNN, CUDNN, CPU and CUDA.", + "include PLAIN, MKLDNN, CUDNN, CPU, CUDA and IPU.", s.c_str())); } } diff --git a/paddle/fluid/memory/allocation/CMakeLists.txt b/paddle/fluid/memory/allocation/CMakeLists.txt index 4d44c533b74..b3351f44dc3 100644 --- a/paddle/fluid/memory/allocation/CMakeLists.txt +++ b/paddle/fluid/memory/allocation/CMakeLists.txt @@ -48,6 +48,8 @@ if (WITH_GPU OR WITH_ROCM) endif() elseif(WITH_XPU) set(AllocatorFacadeDeps xpu_info) +elseif(WITH_IPU) + set(AllocatorFacadeDeps ipu_info) elseif(WITH_ASCEND) set(AllocatorFacadeDeps ascend_npu_info) else () diff --git a/paddle/fluid/memory/allocation/allocator_facade.cc b/paddle/fluid/memory/allocation/allocator_facade.cc index 8314a1df931..13cd980881b 100644 --- a/paddle/fluid/memory/allocation/allocator_facade.cc +++ b/paddle/fluid/memory/allocation/allocator_facade.cc @@ -51,6 +51,10 @@ #include "paddle/fluid/memory/allocation/npu_pinned_allocator.h" #endif +#ifdef PADDLE_WITH_IPU +#include "paddle/fluid/platform/device/ipu/ipu_info.h" +#endif + PADDLE_DEFINE_EXPORTED_int64( gpu_allocator_retry_time, 10000, "The retry time (milliseconds) when allocator fails " @@ -136,6 +140,11 @@ class AllocatorFacadePrivate { switch (strategy_) { case AllocatorStrategy::kNaiveBestFit: { InitNaiveBestFitCPUAllocator(); +#ifdef PADDLE_WITH_IPU + for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) { + InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id)); + } +#endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (FLAGS_use_stream_safe_cuda_allocator) { LOG(WARNING) << "FLAGS_use_stream_safe_cuda_allocator is invalid for " @@ -186,6 +195,11 @@ class AllocatorFacadePrivate { for (int dev_id = 0; dev_id < platform::GetXPUDeviceCount(); ++dev_id) { InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id)); } +#endif +#ifdef PADDLE_WITH_IPU + for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) { + InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id)); + } #endif break; } @@ -197,6 +211,11 @@ class AllocatorFacadePrivate { InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id)); } #endif +#ifdef PADDLE_WITH_IPU + for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) { + InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id)); + } +#endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) if (FLAGS_use_stream_safe_cuda_allocator) { LOG(WARNING) << "FLAGS_use_stream_safe_cuda_allocator is invalid for " @@ -570,6 +589,12 @@ class AllocatorFacadePrivate { } #endif +#ifdef PADDLE_WITH_IPU + void InitNaiveBestFitIPUAllocator(platform::IPUPlace p) { + allocators_[p] = std::make_shared(p); + } +#endif + #ifdef PADDLE_WITH_ASCEND_CL void InitNaiveBestFitNPUAllocator(platform::NPUPlace p) { allocators_[p] = std::make_shared(p); @@ -591,6 +616,13 @@ class AllocatorFacadePrivate { system_allocators_[p] = std::make_shared(p); } #endif +#ifdef PADDLE_WITH_IPU + int device_count = platform::GetIPUDeviceCount(); + for (int i = 0; i < device_count; ++i) { + platform::IPUPlace p(i); + system_allocators_[p] = std::make_shared(p); + } +#endif #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) system_allocators_[platform::CUDAPinnedPlace()] = std::make_shared(); @@ -625,6 +657,12 @@ class AllocatorFacadePrivate { places.emplace_back(platform::NPUPlace(dev_id)); } #endif +#ifdef PADDLE_WITH_IPU + int device_count = platform::GetIPUDeviceCount(); + for (int dev_id = 0; dev_id < device_count; ++dev_id) { + places.emplace_back(platform::IPUPlace(dev_id)); + } +#endif for (auto& p : places) { zero_size_allocators_[p] = std::make_shared(p); diff --git a/paddle/fluid/memory/memcpy.cc b/paddle/fluid/memory/memcpy.cc index 574b1520543..fe38200efa8 100644 --- a/paddle/fluid/memory/memcpy.cc +++ b/paddle/fluid/memory/memcpy.cc @@ -33,6 +33,32 @@ void Copy(platform::CPUPlace, void* dst, VLOG(4) << "src: " << src << ", dst: " << dst << ", num: " << num; std::memcpy(dst, src, num); } +#ifdef PADDLE_WITH_IPU +template <> +void Copy(platform::IPUPlace dst_place, + void* dst, + platform::CPUPlace src_place, + const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; + std::memcpy(dst, src, num); +} +template <> +void Copy(platform::CPUPlace dst_place, + void* dst, + platform::IPUPlace src_place, + const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; + std::memcpy(dst, src, num); +} +template <> +void Copy(platform::IPUPlace dst_place, + void* dst, + platform::IPUPlace src_place, + const void* src, size_t num) { + if (UNLIKELY(num == 0)) return; + std::memcpy(dst, src, num); +} +#endif #ifdef PADDLE_WITH_XPU template <> diff --git a/paddle/fluid/platform/CMakeLists.txt b/paddle/fluid/platform/CMakeLists.txt index 4f3c70f5ea0..d8d41e9d918 100644 --- a/paddle/fluid/platform/CMakeLists.txt +++ b/paddle/fluid/platform/CMakeLists.txt @@ -71,6 +71,12 @@ IF(WITH_GPU OR WITH_ROCM) set(GPU_CTX_DEPS dynload_cuda dynamic_loader cuda_stream) ENDIF() +IF(WITH_IPU) + set(IPU_CTX_DEPS ipu_backend) +ELSE() + set(IPU_CTX_DEPS) +ENDIF(WITH_IPU) + IF(WITH_ASCEND_CL) set(NPU_CTX_DEPS npu_stream npu_info) ENDIF() @@ -109,7 +115,7 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost) # memcpy depends on device_context, here add deps individually for # avoiding cycle dependencies cc_library(device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS} - place eigen3 stringpiece cpu_helper cpu_info framework_proto ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS} + place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS} ${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS}) cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce) diff --git a/paddle/fluid/platform/device/ipu/CMakeLists.txt b/paddle/fluid/platform/device/ipu/CMakeLists.txt index c4595e22d6c..25629ba74d9 100644 --- a/paddle/fluid/platform/device/ipu/CMakeLists.txt +++ b/paddle/fluid/platform/device/ipu/CMakeLists.txt @@ -1,8 +1,12 @@ -cc_library(ipu_device SRCS device.cc DEPS enforce popart) -cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart) -cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce) -cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce) -cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto) -cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils) -cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper) -cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper) +# IPU +IF(WITH_IPU) + cc_library(ipu_device SRCS device.cc DEPS enforce popart) + cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart) + cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce) + cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce) + cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto) + cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils) + cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper) + cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper) + cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend) +ENDIF() diff --git a/paddle/fluid/platform/device/ipu/device.cc b/paddle/fluid/platform/device/ipu/device.cc index 4aa9ab56d92..47e6475089d 100644 --- a/paddle/fluid/platform/device/ipu/device.cc +++ b/paddle/fluid/platform/device/ipu/device.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/ipu/device.h" +#include "paddle/fluid/platform/device/ipu/device.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device/ipu/ipu_info.cc b/paddle/fluid/platform/device/ipu/ipu_info.cc new file mode 100644 index 00000000000..c184149a9d3 --- /dev/null +++ b/paddle/fluid/platform/device/ipu/ipu_info.cc @@ -0,0 +1,32 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/platform/device/ipu/ipu_info.h" +#include "paddle/fluid/platform/device/ipu/ipu_backend.h" + +namespace paddle { +namespace platform { + +//! Get a list of device ids from environment variable or use all. +std::vector GetSelectedIPUDevices() { + std::shared_ptr ipu_backend = + platform::ipu::IpuBackend::GetInstance(); + return ipu_backend->GetDeviceIds(); +} + +//! Get the total number of IPU devices in system. +int GetIPUDeviceCount() { + std::shared_ptr ipu_backend = + platform::ipu::IpuBackend::GetInstance(); + return ipu_backend->GetNumDevices(); +} +} // namespace platform +} // namespace paddle diff --git a/paddle/fluid/platform/device/ipu/ipu_info.h b/paddle/fluid/platform/device/ipu/ipu_info.h new file mode 100644 index 00000000000..3d032eeb4bf --- /dev/null +++ b/paddle/fluid/platform/device/ipu/ipu_info.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ +#pragma once + +#ifdef PADDLE_WITH_IPU +#include +#include +#include "glog/logging.h" + +namespace paddle { +namespace platform { +std::vector GetSelectedIPUDevices(); +int GetIPUDeviceCount(); +} // namespace platform +} // namespace paddle +#endif diff --git a/paddle/fluid/platform/device/ipu/ipu_optimizer.cc b/paddle/fluid/platform/device/ipu/ipu_optimizer.cc index ea8ae8e1f02..92bb2ca3afc 100644 --- a/paddle/fluid/platform/device/ipu/ipu_optimizer.cc +++ b/paddle/fluid/platform/device/ipu/ipu_optimizer.cc @@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/platform/ipu/ipu_optimizer.h" +#include "paddle/fluid/platform/device/ipu/ipu_optimizer.h" namespace paddle { namespace platform { diff --git a/paddle/fluid/platform/device_context.h b/paddle/fluid/platform/device_context.h index 552d8f1a8c4..875132dfe89 100644 --- a/paddle/fluid/platform/device_context.h +++ b/paddle/fluid/platform/device_context.h @@ -62,6 +62,9 @@ limitations under the License. */ #include "paddle/fluid/platform/device/npu/enforce_npu.h" #include "paddle/fluid/platform/device/npu/npu_stream.h" #endif +#ifdef PADDLE_WITH_IPU +#include "paddle/fluid/platform/device/ipu/device.h" +#endif #include "unsupported/Eigen/CXX11/Tensor" namespace Eigen { @@ -99,8 +102,8 @@ enum DeviceType { CUDA = 1, XPU = 2, NPU = 3, - - MAX_DEVICE_TYPES = 4, + IPU = 4, + MAX_DEVICE_TYPES = 5, }; DeviceType Place2DeviceType(const platform::Place& place); @@ -109,6 +112,7 @@ constexpr DeviceType kCPU = DeviceType::CPU; constexpr DeviceType kCUDA = DeviceType::CUDA; constexpr DeviceType kXPU = DeviceType::XPU; constexpr DeviceType kNPU = DeviceType::NPU; +constexpr DeviceType kIPU = DeviceType::IPU; class DeviceContext { public: @@ -140,6 +144,30 @@ struct DefaultDeviceContextType { using TYPE = CPUDeviceContext; }; +// Graphcore IPU +#ifdef PADDLE_WITH_IPU +class IPUDeviceContext : public DeviceContext { + public: + IPUDeviceContext() = delete; + explicit IPUDeviceContext(IPUPlace place); + virtual ~IPUDeviceContext(); + Eigen::DefaultDevice* eigen_device() const { return nullptr; } + Place GetPlace() const override; + /*! \brief Wait for all operations completion in the stream. */ + void Wait() const override; + int DeviceId() const { return device_.getId(); } + + private: + IPUPlace place_; + platform::ipu::Device device_; +}; +template <> +struct DefaultDeviceContextType { + using TYPE = IPUDeviceContext; +}; + +#endif + #ifdef PADDLE_WITH_XPU namespace xpu = baidu::xpu::api; class XPUDeviceContext : public DeviceContext { diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 698563a53d2..b642f160da2 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -45,6 +45,10 @@ limitations under the License. */ #include "DbgHelp.h" #endif +#ifdef PADDLE_WITH_IPU +#include "paddle/fluid/platform/device/ipu/ipu_info.h" +#endif + DECLARE_int32(paddle_num_threads); PADDLE_DEFINE_EXPORTED_int32( multiple_of_cupti_buffer_size, 1, @@ -164,6 +168,15 @@ void InitDevices() { LOG(WARNING) << "Compiled with PADDLE_WITH_ASCEND_CL, but no NPU found in runtime."; } +#endif +#ifdef PADDLE_WITH_IPU + try { + // use user specified IPUs. + devices = platform::GetSelectedIPUDevices(); + } catch (const std::exception &exp) { + LOG(WARNING) + << "Compiled with PADDLE_WITH_IPU, but no IPU found in runtime."; + } #endif InitDevices(devices); } @@ -185,6 +198,9 @@ void InitDevices(const std::vector devices) { #ifdef PADDLE_WITH_XPU places.emplace_back(platform::XPUPlace(devices[i])); #endif +#ifdef PADDLE_WITH_IPU + places.emplace_back(platform::IPUPlace(devices[i])); +#endif #ifdef PADDLE_WITH_ASCEND_CL places.emplace_back(platform::NPUPlace(devices[i])); #endif diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index 521ca032a50..4f896f852ff 100644 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -16,6 +16,9 @@ endif() if (WITH_GPU) set(PYBIND_DEPS ${PYBIND_DEPS} cuda_profiler) endif() +if (WITH_IPU) + set(PYBIND_DEPS ${PYBIND_DEPS} ipu_info) +endif() if (WITH_NCCL OR WITH_RCCL) set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper) -- GitLab