Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
907433a7
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
907433a7
编写于
3月 16, 2023
作者:
H
Huang Jiyi
提交者:
GitHub
3月 16, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[phi decoupling] remove fluid gpu_info usage in phi (#51699)
* remove fluid thread_data_registry * update * fix bug
上级
3f3372b6
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
79 addition
and
204 deletion
+79
-204
paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h
...d/framework/new_executor/workqueue/thread_data_registry.h
+0
-177
paddle/fluid/memory/stats.h
paddle/fluid/memory/stats.h
+2
-2
paddle/fluid/platform/device/gpu/gpu_info.cc
paddle/fluid/platform/device/gpu/gpu_info.cc
+3
-16
paddle/fluid/platform/device/gpu/gpu_info.h
paddle/fluid/platform/device/gpu/gpu_info.h
+2
-1
paddle/fluid/platform/init.cc
paddle/fluid/platform/init.cc
+4
-0
paddle/fluid/platform/profiler/host_event_recorder.h
paddle/fluid/platform/profiler/host_event_recorder.h
+0
-1
paddle/phi/backends/gpu/gpu_info.cc
paddle/phi/backends/gpu/gpu_info.cc
+27
-0
paddle/phi/backends/gpu/gpu_info.h
paddle/phi/backends/gpu/gpu_info.h
+5
-4
paddle/phi/common/memory_utils.cc
paddle/phi/common/memory_utils.cc
+7
-0
paddle/phi/common/memory_utils.h
paddle/phi/common/memory_utils.h
+28
-0
paddle/phi/kernels/gpudnn/conv_gpudnn_base.h
paddle/phi/kernels/gpudnn/conv_gpudnn_base.h
+1
-2
paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
+0
-1
未找到文件。
paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h
已删除
100644 → 0
浏览文件 @
3f3372b6
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <functional>
#include <memory>
#include <mutex>
#include <shared_mutex>
#include <thread>
#include <type_traits>
#include <unordered_map>
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
class
ThreadDataRegistry
{
public:
// Singleton
static
ThreadDataRegistry
&
GetInstance
()
{
static
ThreadDataRegistry
instance
;
return
instance
;
}
T
*
GetMutableCurrentThreadData
()
{
return
&
CurrentThreadData
();
}
const
T
&
GetCurrentThreadData
()
{
return
CurrentThreadData
();
}
template
<
typename
Alias
=
T
,
typename
=
std
::
enable_if_t
<
std
::
is_copy_assignable
<
Alias
>
::
value
>>
void
SetCurrentThreadData
(
const
T
&
val
)
{
CurrentThreadData
()
=
val
;
}
// Returns current snapshot of all threads. Make sure there is no thread
// create/destory when using it.
template
<
typename
Alias
=
T
,
typename
=
std
::
enable_if_t
<
std
::
is_copy_constructible
<
Alias
>
::
value
>>
std
::
unordered_map
<
uint64_t
,
T
>
GetAllThreadDataByValue
()
{
return
impl_
->
GetAllThreadDataByValue
();
}
// Returns current snapshot of all threads. Make sure there is no thread
// create/destory when using it.
std
::
unordered_map
<
uint64_t
,
std
::
reference_wrapper
<
T
>>
GetAllThreadDataByRef
()
{
return
impl_
->
GetAllThreadDataByRef
();
}
private:
// types
// Lock types
#if defined(__clang__) || defined(__GNUC__) // CLANG or GCC
#ifndef __APPLE__
#if __cplusplus >= 201703L
using
LockType
=
std
::
shared_mutex
;
using
SharedLockGuardType
=
std
::
shared_lock
<
std
::
shared_mutex
>
;
#elif __cplusplus >= 201402L
using
LockType
=
std
::
shared_timed_mutex
;
using
SharedLockGuardType
=
std
::
shared_lock
<
std
::
shared_timed_mutex
>
;
#else
using
LockType
=
std
::
mutex
;
using
SharedLockGuardType
=
std
::
lock_guard
<
std
::
mutex
>
;
#endif
// Special case : mac. https://github.com/facebook/react-native/issues/31250
#else
using
LockType
=
std
::
mutex
;
using
SharedLockGuardType
=
std
::
lock_guard
<
std
::
mutex
>
;
#endif
#elif defined(_MSC_VER) // MSVC
#if _MSVC_LANG >= 201703L
using
LockType
=
std
::
shared_mutex
;
using
SharedLockGuardType
=
std
::
shared_lock
<
std
::
shared_mutex
>
;
#elif _MSVC_LANG >= 201402L
using
LockType
=
std
::
shared_timed_mutex
;
using
SharedLockGuardType
=
std
::
shared_lock
<
std
::
shared_timed_mutex
>
;
#else
using
LockType
=
std
::
mutex
;
using
SharedLockGuardType
=
std
::
lock_guard
<
std
::
mutex
>
;
#endif
#else // other compilers
using
LockType
=
std
::
mutex
;
using
SharedLockGuardType
=
std
::
lock_guard
<
std
::
mutex
>
;
#endif
class
ThreadDataHolder
;
class
ThreadDataRegistryImpl
{
public:
void
RegisterData
(
uint64_t
tid
,
ThreadDataHolder
*
tls_obj
)
{
std
::
lock_guard
<
LockType
>
guard
(
lock_
);
tid_map_
[
tid
]
=
tls_obj
;
}
void
UnregisterData
(
uint64_t
tid
)
{
std
::
lock_guard
<
LockType
>
guard
(
lock_
);
tid_map_
.
erase
(
tid
);
}
template
<
typename
Alias
=
T
,
typename
=
std
::
enable_if_t
<
std
::
is_copy_constructible
<
Alias
>
::
value
>>
std
::
unordered_map
<
uint64_t
,
T
>
GetAllThreadDataByValue
()
{
std
::
unordered_map
<
uint64_t
,
T
>
data_copy
;
SharedLockGuardType
guard
(
lock_
);
data_copy
.
reserve
(
tid_map_
.
size
());
for
(
auto
&
kv
:
tid_map_
)
{
data_copy
.
emplace
(
kv
.
first
,
kv
.
second
->
GetData
());
}
return
data_copy
;
}
std
::
unordered_map
<
uint64_t
,
std
::
reference_wrapper
<
T
>>
GetAllThreadDataByRef
()
{
std
::
unordered_map
<
uint64_t
,
std
::
reference_wrapper
<
T
>>
data_ref
;
SharedLockGuardType
guard
(
lock_
);
data_ref
.
reserve
(
tid_map_
.
size
());
for
(
auto
&
kv
:
tid_map_
)
{
data_ref
.
emplace
(
kv
.
first
,
std
::
ref
(
kv
.
second
->
GetData
()));
}
return
data_ref
;
}
private:
LockType
lock_
;
std
::
unordered_map
<
uint64_t
,
ThreadDataHolder
*>
tid_map_
;
// not owned
};
class
ThreadDataHolder
{
public:
explicit
ThreadDataHolder
(
std
::
shared_ptr
<
ThreadDataRegistryImpl
>
registry
)
{
registry_
=
std
::
move
(
registry
);
tid_
=
std
::
hash
<
std
::
thread
::
id
>
()(
std
::
this_thread
::
get_id
());
registry_
->
RegisterData
(
tid_
,
this
);
}
~
ThreadDataHolder
()
{
registry_
->
UnregisterData
(
tid_
);
}
T
&
GetData
()
{
return
data_
;
}
private:
std
::
shared_ptr
<
ThreadDataRegistryImpl
>
registry_
;
uint64_t
tid_
;
T
data_
;
};
// methods
ThreadDataRegistry
()
{
impl_
=
std
::
make_shared
<
ThreadDataRegistryImpl
>
();
}
ThreadDataRegistry
(
const
ThreadDataRegistry
&
)
=
delete
;
ThreadDataRegistry
&
operator
=
(
const
ThreadDataRegistry
&
)
=
delete
;
T
&
CurrentThreadData
()
{
static
thread_local
ThreadDataHolder
thread_data
(
impl_
);
return
thread_data
.
GetData
();
}
// data
std
::
shared_ptr
<
ThreadDataRegistryImpl
>
impl_
;
};
}
// namespace framework
}
// namespace paddle
paddle/fluid/memory/stats.h
浏览文件 @
907433a7
...
...
@@ -18,15 +18,15 @@ limitations under the License. */
#include <map>
#include <string>
#include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/errors.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/phi/common/thread_data_registry.h"
namespace
paddle
{
namespace
memory
{
using
framework
::
ThreadDataRegistry
;
using
phi
::
ThreadDataRegistry
;
struct
ThreadLocalStatBase
{
int64_t
current
{
0
};
...
...
paddle/fluid/platform/device/gpu/gpu_info.cc
浏览文件 @
907433a7
...
...
@@ -61,8 +61,6 @@ PADDLE_DEFINE_EXPORTED_bool(enable_gpu_memory_usage_log_mb,
"Whether to print the message of gpu memory usage "
"MB as a unit of measurement."
);
constexpr
static
float
fraction_reserve_gpu_memory
=
0.05
f
;
USE_GPU_MEM_STAT
;
namespace
paddle
{
namespace
platform
{
...
...
@@ -77,20 +75,7 @@ void GpuMemoryUsage(size_t *available, size_t *total) {
}
size_t
GpuAvailableMemToAlloc
()
{
size_t
total
=
0
;
size_t
available
=
0
;
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
available
);
// If available size is less than minimum chunk size, no usable memory exists
size_t
available_to_alloc
=
available
-
reserving
;
size_t
min_chunk_size
=
GpuMinChunkSize
();
if
(
available_to_alloc
<
min_chunk_size
)
{
available_to_alloc
=
0
;
}
VLOG
(
10
)
<<
"GPU usage "
<<
(
available
>>
20
)
<<
"M/"
<<
(
total
>>
20
)
<<
"M, "
<<
(
available_to_alloc
>>
20
)
<<
"M available to allocate"
;
return
available_to_alloc
;
return
phi
::
backends
::
gpu
::
GpuAvailableMemToAlloc
();
}
size_t
GpuMaxAllocSize
()
{
...
...
@@ -124,6 +109,8 @@ size_t GpuInitAllocSize() { return GpuAllocSize(/* realloc = */ false); }
size_t
GpuReallocSize
()
{
return
GpuAllocSize
(
/* realloc = */
true
);
}
size_t
GpuMinChunkSize
()
{
return
phi
::
backends
::
gpu
::
GpuMinChunkSize
();
}
size_t
GpuMaxChunkSize
()
{
size_t
max_chunk_size
=
GpuMaxAllocSize
();
VLOG
(
10
)
<<
"Max chunk size "
<<
(
max_chunk_size
>>
20
)
<<
"M"
;
...
...
paddle/fluid/platform/device/gpu/gpu_info.h
浏览文件 @
907433a7
...
...
@@ -82,7 +82,8 @@ size_t GpuInitAllocSize();
//! Get the re-allocation size of current GPU device.
size_t
GpuReallocSize
();
using
phi
::
backends
::
gpu
::
GpuMinChunkSize
;
//! Get the minimum chunk size for GPU buddy allocator.
size_t
GpuMinChunkSize
();
//! Get the maximum chunk size for GPU buddy allocator.
size_t
GpuMaxChunkSize
();
...
...
paddle/fluid/platform/init.cc
浏览文件 @
907433a7
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/phi/backends/cpu/cpu_info.h"
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#endif
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/dynload/cupti.h"
...
...
@@ -468,6 +469,9 @@ void InitMemoryMethod() {
memory_method
->
copy
=
paddle
::
memory
::
Copy
<
phi
::
Place
,
phi
::
Place
>
;
memory_method
->
device_memory_stat_current_value
=
paddle
::
memory
::
DeviceMemoryStatCurrentValue
;
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
memory_method
->
gpu_memory_usage
=
paddle
::
platform
::
GpuMemoryUsage
;
#endif
memory_utils
.
Init
(
std
::
move
(
memory_method
));
});
}
...
...
paddle/fluid/platform/profiler/host_event_recorder.h
浏览文件 @
907433a7
...
...
@@ -18,7 +18,6 @@
#include <type_traits>
#include <vector>
#include "paddle/fluid/framework/new_executor/workqueue/thread_data_registry.h"
#include "paddle/fluid/platform/macros.h"
#include "paddle/fluid/platform/os_info.h"
#include "paddle/phi/api/profiler/host_event_recorder.h"
...
...
paddle/phi/backends/gpu/gpu_info.cc
浏览文件 @
907433a7
...
...
@@ -18,6 +18,9 @@ limitations under the License. */
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/phi/common/memory_utils.h"
DECLARE_string
(
selected_gpus
);
...
...
@@ -56,6 +59,30 @@ std::vector<int> GetSelectedDevices() {
return
devices
;
}
constexpr
static
float
fraction_reserve_gpu_memory
=
0.05
f
;
size_t
GpuAvailableMemToAlloc
()
{
size_t
total
=
0
;
size_t
available
=
0
;
memory_utils
::
GpuMemoryUsage
(
&
available
,
&
total
);
size_t
reserving
=
static_cast
<
size_t
>
(
fraction_reserve_gpu_memory
*
available
);
// If available size is less than minimum chunk size, no usable memory exists
size_t
available_to_alloc
=
available
-
reserving
;
size_t
min_chunk_size
=
GpuMinChunkSize
();
if
(
available_to_alloc
<
min_chunk_size
)
{
available_to_alloc
=
0
;
}
VLOG
(
10
)
<<
"GPU usage "
<<
(
available
>>
20
)
<<
"M/"
<<
(
total
>>
20
)
<<
"M, "
<<
(
available_to_alloc
>>
20
)
<<
"M available to allocate"
;
return
available_to_alloc
;
}
size_t
GpuMinChunkSize
()
{
// Allow to allocate the minimum chunk size is 256 bytes.
return
1
<<
8
;
}
}
// namespace gpu
}
// namespace backends
}
// namespace phi
paddle/phi/backends/gpu/gpu_info.h
浏览文件 @
907433a7
...
...
@@ -70,11 +70,12 @@ const gpuDeviceProp &GetDeviceProperties(int id);
//! Set the GPU device id for next execution.
void
SetDeviceId
(
int
device_id
);
//! Get the available memory to allocate, which is the size of available gpu
//! minus reserving.
size_t
GpuAvailableMemToAlloc
();
//! Get the minimum chunk size for GPU buddy allocator.
inline
size_t
GpuMinChunkSize
()
{
// Allow to allocate the minimum chunk size is 256 bytes.
return
1
<<
8
;
}
size_t
GpuMinChunkSize
();
//! Copy memory from address src to dst asynchronously.
void
GpuMemcpyAsync
(
void
*
dst
,
...
...
paddle/phi/common/memory_utils.cc
浏览文件 @
907433a7
...
...
@@ -68,6 +68,13 @@ int64_t DeviceMemoryStatCurrentValue(const std::string& stat_type, int dev_id) {
return
MemoryUtils
::
Instance
().
DeviceMemoryStatCurrentValue
(
stat_type
,
dev_id
);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
)
{
return
MemoryUtils
::
Instance
().
GpuMemoryUsage
(
available
,
total
);
}
#endif
}
// namespace memory_utils
}
// namespace phi
paddle/phi/common/memory_utils.h
浏览文件 @
907433a7
...
...
@@ -113,6 +113,16 @@ struct MemoryInterface {
*/
int64_t
(
*
device_memory_stat_current_value
)(
const
std
::
string
&
stat_type
,
int
dev_id
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
/**
* @brief get the memory usage of current GPU device.
*
* @param[size_t] available device available memory to alloc
* @param[size_t] total device total memory
*/
void
(
*
gpu_memory_usage
)(
size_t
*
available
,
size_t
*
total
);
#endif
};
class
MemoryUtils
{
...
...
@@ -234,6 +244,18 @@ class MemoryUtils {
return
memory_method_
->
device_memory_stat_current_value
(
stat_type
,
dev_id
);
}
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
)
{
CheckMemoryMethod
();
PADDLE_ENFORCE_NOT_NULL
(
memory_method_
->
gpu_memory_usage
,
phi
::
errors
::
Unavailable
(
"gpu_memory_usage method in memory_method_ is not initiazed "
"yet. You need init it first."
));
return
memory_method_
->
gpu_memory_usage
(
available
,
total
);
}
#endif
void
CheckMemoryMethod
()
{
PADDLE_ENFORCE_NE
(
memory_method_
.
get
(),
...
...
@@ -288,7 +310,13 @@ void Copy(const Place& dst_place,
const
Place
&
src_place
,
const
void
*
src
,
size_t
num
);
int64_t
DeviceMemoryStatCurrentValue
(
const
std
::
string
&
stat_type
,
int
dev_id
);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
void
GpuMemoryUsage
(
size_t
*
available
,
size_t
*
total
);
#endif
}
// namespace memory_utils
}
// namespace phi
paddle/phi/kernels/gpudnn/conv_gpudnn_base.h
浏览文件 @
907433a7
...
...
@@ -20,7 +20,6 @@ limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/platform/device/gpu/gpu_info.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/memory_utils.h"
#include "paddle/phi/kernels/autotune/cache.h"
...
...
@@ -53,7 +52,7 @@ static size_t CalcWorkspaceLimitInBytes(bool use_fixed_workspace) {
memory_utils
::
DeviceMemoryStatCurrentValue
(
"Allocated"
,
device_id
);
int64_t
reserved
=
memory_utils
::
DeviceMemoryStatCurrentValue
(
"Reserved"
,
device_id
);
int64_t
availble
=
p
addle
::
platform
::
GpuAvailableMemToAlloc
();
int64_t
availble
=
p
hi
::
backends
::
gpu
::
GpuAvailableMemToAlloc
();
VLOG
(
3
)
<<
"[memory] allocated="
<<
ToMegaBytes
(
allocated
)
<<
" MB, reserved="
<<
ToMegaBytes
(
reserved
)
<<
" MB, available_to_alloc="
<<
ToMegaBytes
(
availble
)
<<
" MB."
;
...
...
paddle/phi/kernels/gpudnn/conv_grad_kernel.cu
浏览文件 @
907433a7
...
...
@@ -627,7 +627,6 @@ void ConvCudnnGradKernel(const Context& ctx,
compute_format
==
phi
::
backends
::
gpu
::
DataLayout
::
kNHWC
?
phi
::
backends
::
gpu
::
DataLayout
::
kNHWC
:
phi
::
backends
::
gpu
::
DataLayout
::
kNCHW
;
// TODO(phlrain): replace paddle::platform::DataLaytout to phi::DataLayout
if
(
transformed_input
.
dims
().
size
()
==
5
)
{
layout
=
compute_format
==
phi
::
backends
::
gpu
::
DataLayout
::
kNHWC
?
phi
::
backends
::
gpu
::
DataLayout
::
kNDHWC
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录