未验证 提交 c9a3c669 编写于 作者: J jianghaicheng 提交者: GitHub

add ipu device p1 (#37841)

上级 de874cdd
......@@ -53,6 +53,15 @@ void XPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
}
#endif
#ifdef PADDLE_WITH_IPU
IPUGarbageCollector::IPUGarbageCollector(const platform::IPUPlace &place,
size_t max_memory_size)
: GarbageCollector(place, max_memory_size) {}
void IPUGarbageCollector::ClearCallback(const std::function<void()> &callback) {
callback();
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
UnsafeFastGPUGarbageCollector::UnsafeFastGPUGarbageCollector(
const platform::CUDAPlace &place, size_t max_memory_size)
......
......@@ -80,6 +80,16 @@ class XPUGarbageCollector : public GarbageCollector {
};
#endif
#ifdef PADDLE_WITH_IPU
class IPUGarbageCollector : public GarbageCollector {
public:
IPUGarbageCollector(const platform::IPUPlace &place, size_t max_memory_size);
protected:
void ClearCallback(const std::function<void()> &callback) override;
};
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
class UnsafeFastGPUGarbageCollector : public GarbageCollector {
public:
......
......@@ -61,6 +61,8 @@ inline LibraryType StringToLibraryType(const char* ctype) {
return LibraryType::kPlain;
} else if (s == std::string("XPU")) {
return LibraryType::kPlain;
} else if (s == std::string("IPU")) {
return LibraryType::kPlain;
} else if (s == std::string("NPU")) {
return LibraryType::kPlain;
} else if (s == std::string("CUDA")) {
......@@ -68,7 +70,7 @@ inline LibraryType StringToLibraryType(const char* ctype) {
} else {
PADDLE_THROW(platform::errors::Unimplemented(
"Unknown LibraryType string (%s), only support library type string "
"include PLAIN, MKLDNN, CUDNN, CPU and CUDA.",
"include PLAIN, MKLDNN, CUDNN, CPU, CUDA and IPU.",
s.c_str()));
}
}
......
......@@ -48,6 +48,8 @@ if (WITH_GPU OR WITH_ROCM)
endif()
elseif(WITH_XPU)
set(AllocatorFacadeDeps xpu_info)
elseif(WITH_IPU)
set(AllocatorFacadeDeps ipu_info)
elseif(WITH_ASCEND)
set(AllocatorFacadeDeps ascend_npu_info)
else ()
......
......@@ -51,6 +51,10 @@
#include "paddle/fluid/memory/allocation/npu_pinned_allocator.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif
PADDLE_DEFINE_EXPORTED_int64(
gpu_allocator_retry_time, 10000,
"The retry time (milliseconds) when allocator fails "
......@@ -136,6 +140,11 @@ class AllocatorFacadePrivate {
switch (strategy_) {
case AllocatorStrategy::kNaiveBestFit: {
InitNaiveBestFitCPUAllocator();
#ifdef PADDLE_WITH_IPU
for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) {
InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id));
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (FLAGS_use_stream_safe_cuda_allocator) {
LOG(WARNING) << "FLAGS_use_stream_safe_cuda_allocator is invalid for "
......@@ -186,6 +195,11 @@ class AllocatorFacadePrivate {
for (int dev_id = 0; dev_id < platform::GetXPUDeviceCount(); ++dev_id) {
InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id));
}
#endif
#ifdef PADDLE_WITH_IPU
for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) {
InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id));
}
#endif
break;
}
......@@ -197,6 +211,11 @@ class AllocatorFacadePrivate {
InitNaiveBestFitXPUAllocator(platform::XPUPlace(dev_id));
}
#endif
#ifdef PADDLE_WITH_IPU
for (int dev_id = 0; dev_id < platform::GetIPUDeviceCount(); ++dev_id) {
InitNaiveBestFitIPUAllocator(platform::IPUPlace(dev_id));
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
if (FLAGS_use_stream_safe_cuda_allocator) {
LOG(WARNING) << "FLAGS_use_stream_safe_cuda_allocator is invalid for "
......@@ -570,6 +589,12 @@ class AllocatorFacadePrivate {
}
#endif
#ifdef PADDLE_WITH_IPU
void InitNaiveBestFitIPUAllocator(platform::IPUPlace p) {
allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}
#endif
#ifdef PADDLE_WITH_ASCEND_CL
void InitNaiveBestFitNPUAllocator(platform::NPUPlace p) {
allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
......@@ -591,6 +616,13 @@ class AllocatorFacadePrivate {
system_allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}
#endif
#ifdef PADDLE_WITH_IPU
int device_count = platform::GetIPUDeviceCount();
for (int i = 0; i < device_count; ++i) {
platform::IPUPlace p(i);
system_allocators_[p] = std::make_shared<NaiveBestFitAllocator>(p);
}
#endif
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
system_allocators_[platform::CUDAPinnedPlace()] =
std::make_shared<CPUPinnedAllocator>();
......@@ -625,6 +657,12 @@ class AllocatorFacadePrivate {
places.emplace_back(platform::NPUPlace(dev_id));
}
#endif
#ifdef PADDLE_WITH_IPU
int device_count = platform::GetIPUDeviceCount();
for (int dev_id = 0; dev_id < device_count; ++dev_id) {
places.emplace_back(platform::IPUPlace(dev_id));
}
#endif
for (auto& p : places) {
zero_size_allocators_[p] = std::make_shared<ZeroSizeAllocator>(p);
......
......@@ -33,6 +33,32 @@ void Copy<platform::CPUPlace, platform::CPUPlace>(platform::CPUPlace, void* dst,
VLOG(4) << "src: " << src << ", dst: " << dst << ", num: " << num;
std::memcpy(dst, src, num);
}
#ifdef PADDLE_WITH_IPU
template <>
void Copy<platform::IPUPlace, platform::CPUPlace>(platform::IPUPlace dst_place,
void* dst,
platform::CPUPlace src_place,
const void* src, size_t num) {
if (UNLIKELY(num == 0)) return;
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::CPUPlace, platform::IPUPlace>(platform::CPUPlace dst_place,
void* dst,
platform::IPUPlace src_place,
const void* src, size_t num) {
if (UNLIKELY(num == 0)) return;
std::memcpy(dst, src, num);
}
template <>
void Copy<platform::IPUPlace, platform::IPUPlace>(platform::IPUPlace dst_place,
void* dst,
platform::IPUPlace src_place,
const void* src, size_t num) {
if (UNLIKELY(num == 0)) return;
std::memcpy(dst, src, num);
}
#endif
#ifdef PADDLE_WITH_XPU
template <>
......
......@@ -71,6 +71,12 @@ IF(WITH_GPU OR WITH_ROCM)
set(GPU_CTX_DEPS dynload_cuda dynamic_loader cuda_stream)
ENDIF()
IF(WITH_IPU)
set(IPU_CTX_DEPS ipu_backend)
ELSE()
set(IPU_CTX_DEPS)
ENDIF(WITH_IPU)
IF(WITH_ASCEND_CL)
set(NPU_CTX_DEPS npu_stream npu_info)
ENDIF()
......@@ -109,7 +115,7 @@ cc_library(cudnn_workspace_helper SRCS cudnn_workspace_helper.cc DEPS boost)
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library(device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc xxhash ${STREAM_CALLBACK_DEPS}
place eigen3 stringpiece cpu_helper cpu_info framework_proto ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
place eigen3 stringpiece cpu_helper cpu_info framework_proto ${IPU_CTX_DEPS} ${GPU_CTX_DEPS} ${NPU_CTX_DEPS} ${MKLDNN_CTX_DEPS}
${dgc_deps} dlpack cudnn_workspace_helper ${XPU_CTX_DEPS})
cc_library(collective_helper SRCS collective_helper.cc gen_comm_id_helper.cc DEPS framework_proto device_context enforce)
......
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce)
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto)
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils)
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper)
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper)
# IPU
IF(WITH_IPU)
cc_library(ipu_device SRCS device.cc DEPS enforce popart)
cc_library(ipu_utils SRCS ipu_utils.cc DEPS memory framework_proto popart)
cc_library(ipu_strategy SRCS ipu_strategy.cc DEPS popart graph framework_proto enforce)
cc_library(ipu_optimizer SRCS ipu_optimizer.cc DEPS popart enforce)
cc_library(ipu_executor SRCS ipu_executor.cc DEPS ipu_optimizer ipu_utils popart graph framework_proto)
cc_library(popart_canonicalization_utils SRCS ${POPART_CANONICALIZATION_SRC} DEPS framework_proto enforce ipu_utils)
cc_library(ipu_compiler SRCS ipu_compiler.cc DEPS popart graph ipu_utils graph_helper)
cc_library(ipu_backend SRCS ipu_backend.cc DEPS popart ipu_compiler graph framework_proto enforce ipu_utils ipu_strategy ipu_device ipu_executor graph_helper)
cc_library(ipu_info SRCS ipu_info.cc DEPS ipu_backend)
ENDIF()
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/device.h"
#include "paddle/fluid/platform/device/ipu/device.h"
namespace paddle {
namespace platform {
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#include "paddle/fluid/platform/device/ipu/ipu_backend.h"
namespace paddle {
namespace platform {
//! Get a list of device ids from environment variable or use all.
std::vector<int> GetSelectedIPUDevices() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetDeviceIds();
}
//! Get the total number of IPU devices in system.
int GetIPUDeviceCount() {
std::shared_ptr<platform::ipu::IpuBackend> ipu_backend =
platform::ipu::IpuBackend::GetInstance();
return ipu_backend->GetNumDevices();
}
} // namespace platform
} // namespace paddle
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#ifdef PADDLE_WITH_IPU
#include <memory>
#include <vector>
#include "glog/logging.h"
namespace paddle {
namespace platform {
std::vector<int> GetSelectedIPUDevices();
int GetIPUDeviceCount();
} // namespace platform
} // namespace paddle
#endif
......@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/platform/ipu/ipu_optimizer.h"
#include "paddle/fluid/platform/device/ipu/ipu_optimizer.h"
namespace paddle {
namespace platform {
......
......@@ -62,6 +62,9 @@ limitations under the License. */
#include "paddle/fluid/platform/device/npu/enforce_npu.h"
#include "paddle/fluid/platform/device/npu/npu_stream.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/device.h"
#endif
#include "unsupported/Eigen/CXX11/Tensor"
namespace Eigen {
......@@ -99,8 +102,8 @@ enum DeviceType {
CUDA = 1,
XPU = 2,
NPU = 3,
MAX_DEVICE_TYPES = 4,
IPU = 4,
MAX_DEVICE_TYPES = 5,
};
DeviceType Place2DeviceType(const platform::Place& place);
......@@ -109,6 +112,7 @@ constexpr DeviceType kCPU = DeviceType::CPU;
constexpr DeviceType kCUDA = DeviceType::CUDA;
constexpr DeviceType kXPU = DeviceType::XPU;
constexpr DeviceType kNPU = DeviceType::NPU;
constexpr DeviceType kIPU = DeviceType::IPU;
class DeviceContext {
public:
......@@ -140,6 +144,30 @@ struct DefaultDeviceContextType<platform::CPUPlace> {
using TYPE = CPUDeviceContext;
};
// Graphcore IPU
#ifdef PADDLE_WITH_IPU
class IPUDeviceContext : public DeviceContext {
public:
IPUDeviceContext() = delete;
explicit IPUDeviceContext(IPUPlace place);
virtual ~IPUDeviceContext();
Eigen::DefaultDevice* eigen_device() const { return nullptr; }
Place GetPlace() const override;
/*! \brief Wait for all operations completion in the stream. */
void Wait() const override;
int DeviceId() const { return device_.getId(); }
private:
IPUPlace place_;
platform::ipu::Device device_;
};
template <>
struct DefaultDeviceContextType<platform::IPUPlace> {
using TYPE = IPUDeviceContext;
};
#endif
#ifdef PADDLE_WITH_XPU
namespace xpu = baidu::xpu::api;
class XPUDeviceContext : public DeviceContext {
......
......@@ -45,6 +45,10 @@ limitations under the License. */
#include "DbgHelp.h"
#endif
#ifdef PADDLE_WITH_IPU
#include "paddle/fluid/platform/device/ipu/ipu_info.h"
#endif
DECLARE_int32(paddle_num_threads);
PADDLE_DEFINE_EXPORTED_int32(
multiple_of_cupti_buffer_size, 1,
......@@ -164,6 +168,15 @@ void InitDevices() {
LOG(WARNING)
<< "Compiled with PADDLE_WITH_ASCEND_CL, but no NPU found in runtime.";
}
#endif
#ifdef PADDLE_WITH_IPU
try {
// use user specified IPUs.
devices = platform::GetSelectedIPUDevices();
} catch (const std::exception &exp) {
LOG(WARNING)
<< "Compiled with PADDLE_WITH_IPU, but no IPU found in runtime.";
}
#endif
InitDevices(devices);
}
......@@ -185,6 +198,9 @@ void InitDevices(const std::vector<int> devices) {
#ifdef PADDLE_WITH_XPU
places.emplace_back(platform::XPUPlace(devices[i]));
#endif
#ifdef PADDLE_WITH_IPU
places.emplace_back(platform::IPUPlace(devices[i]));
#endif
#ifdef PADDLE_WITH_ASCEND_CL
places.emplace_back(platform::NPUPlace(devices[i]));
#endif
......
......@@ -16,6 +16,9 @@ endif()
if (WITH_GPU)
set(PYBIND_DEPS ${PYBIND_DEPS} cuda_profiler)
endif()
if (WITH_IPU)
set(PYBIND_DEPS ${PYBIND_DEPS} ipu_info)
endif()
if (WITH_NCCL OR WITH_RCCL)
set(PYBIND_DEPS ${PYBIND_DEPS} nccl_wrapper)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册