// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #pragma once #include #include #include #include #include #include "paddle/fluid/platform/macros.h" #include "paddle/phi/api/include/tensor.h" #include "paddle/phi/backends/cpu/forwards.h" #include "paddle/phi/common/place.h" #include "unsupported/Eigen/CXX11/Tensor" #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #include "paddle/fluid/platform/device/gpu/gpu_types.h" #include "paddle/phi/backends/gpu/forwards.h" #include "paddle/phi/backends/gpu/gpu_decls.h" #include "paddle/phi/backends/gpu/gpu_resources.h" #endif namespace paddle { namespace internal { class EigenGpuStreamDevice; } // namespace internal class CPUContextResource { public: CPUContextResource(); Eigen::DefaultDevice* GetCPUEigenDevice() const; private: void InitCPUResource(); private: std::unique_ptr cpu_eigen_device_; }; #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) class GPUContextResource { public: explicit GPUContextResource(const phi::Place& place, void* stream); ~GPUContextResource(); phi::Place Place() const; std::function GetDnnHandleCreator(); std::function GetBlasHandleCreator(); std::function GetBlasTensorCoreHandleCreator(); std::function GetBlasTF32TensorCoreHandleCreator(); std::function GetBlasLtHandleCreator(); std::function GetSolverDnHandleCreator(); std::function GetSparseHandleCreator(); std::function GetGpuEigenDeviceCreator(); gpuStream_t GetStream() const; dnnHandle_t GetDnnHandle() const; blasHandle_t GetBlasHandle() const; blasHandle_t GetBlasTensorCoreHandle() const; blasHandle_t GetBlasTF32Handle() const; blasLtHandle_t GetBlasLtHandle() const; phi::solverHandle_t GetSolverDnHandle() const; phi::sparseHandle_t GetSparseHandle() const; Eigen::GpuDevice* GetGpuEigenDevice() const; int GetGpuComputeCapability() const; int GetGpuRuntimeVersion() const; int GetGpuDriverVersion() const; int GetGPUMultiProcessors() const; int GetGpuMaxThreadsPerMp() const; int GetGpuMaxThreadsPerBlock() const; std::array GetGpuMaxGridDimSize() const; // If stream changes, we need to rebind all handle to new stream. void ReBindStream(gpuStream_t stream); void ReBindDnnHandle(gpuStream_t stream) const; void ReBindBlasHandle(gpuStream_t stream) const; void ReBindBlasTensorCoreHandle(gpuStream_t stream) const; void ReBindBlasTF32Handle(gpuStream_t stream) const; void ReBindSolverDnHandle(gpuStream_t stream) const; void ReBindSparseHandle(gpuStream_t stream) const; void ReBindEigenDevice(gpuStream_t stream, GPUPlace place) const; private: void InitGPUResource(void* stream); void DestroyGPUResource(); void InitGpuProperties(); void InitGpuEigenDevice(); void InitDnnHanlde(); void DestroyDnnHandle(); void DestroyBlasHandle(); void InitBlasLtHandle(); void DestroyBlasLtHandle(); void InitSolverHandle(); void DestroySolverHandle(); void InitSparseHandle(); void DestroySparseHandle(); private: phi::Place place_; int compute_capability_; int runtime_version_; int driver_version_; int multi_process_; int max_threads_per_mp_; int max_threads_per_block_; std::array max_grid_dim_size_; bool owned_stream_{true}; gpuStream_t stream_; std::unique_ptr gpu_eigen_device_; std::unique_ptr eigen_stream_; blasHandle_t blas_handle_{nullptr}; blasHandle_t blas_tensor_core_handle_{nullptr}; blasHandle_t blas_tf32_tensor_core_handle_{nullptr}; blasLtHandle_t blaslt_handle_{nullptr}; dnnHandle_t dnn_handle_{nullptr}; phi::solverHandle_t solver_handle_{nullptr}; phi::sparseHandle_t sparse_handle_{nullptr}; // DnnWorkspaceHandle }; #endif #if defined(PADDLE_WITH_XPU) class XPUContextResource { public: explicit XPUContextResource(const phi::Place& place, void* stream); ~XPUContextResource(); phi::Place Place() const; void* GetStream() const; int GetDriverVersion() const; int GetRuntimeVersion() const; int GetXpuVersion() const; void ReBindStream(void* stream); private: void InitXPUResource(void* stream); void InitXpuProperties(); private: bool owned_stream_{true}; void* stream_; phi::Place place_; int driver_version_; int runtime_version_; int xpu_version_; }; // class XPUContextResource #endif class ResourceManager { public: ResourceManager() = default; static ResourceManager& Instance() { static ResourceManager* resource_manager = new ResourceManager; return *resource_manager; } // CPU Resource public: void InitCPUResource(); CPUContextResource* GetCPUResource() const; private: std::mutex cpu_mutex_; std::unique_ptr cpu_resource_{nullptr}; // GPU Resource #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) public: void* InitGPUResource(const phi::Place& place, void* stream); void DestroyGPUResource(void* stream); GPUContextResource* GetGPUResource(void* stream) const; int RefCount(void* stream) const; void GpuResourceReBindStream(void* old_stream, void* new_stream); private: void Decrease(void* stream); void Increase(void* stream); private: std::mutex gpu_mutex_; // a stream corresponding to a series of resource. std::map> ref_count_; std::map> gpu_resources_; #endif // XPU Resource #if defined(PADDLE_WITH_XPU) public: void* InitXPUResource(const phi::Place& place, void* stream); void DestroyXPUResource(void* stream); XPUContextResource* GetXPUResource(void* stream) const; int RefCount(void* stream) const; void XpuResourceReBindStream(void* old_stream, void* new_stream); private: void Decrease(void* stream); void Increase(void* stream); private: std::mutex xpu_mutex_; // a stream corresponding to a series of resource. std::map> ref_count_; std::map> xpu_resources_; #endif private: DISABLE_COPY_AND_ASSIGN(ResourceManager); }; } // namespace paddle