Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
37f76407
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
37f76407
编写于
9月 24, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
9月 24, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cuda dev_ctx allocator cmake deps, test=develop (#19953)
上级
ebff68fa
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
159 addition
and
308 deletion
+159
-308
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+0
-2
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+0
-6
paddle/fluid/memory/allocation/cuda_device_context_allocation.cc
...fluid/memory/allocation/cuda_device_context_allocation.cc
+0
-47
paddle/fluid/memory/allocation/cuda_device_context_allocation.h
.../fluid/memory/allocation/cuda_device_context_allocation.h
+0
-42
paddle/fluid/memory/allocation/cuda_device_context_allocator.cc
.../fluid/memory/allocation/cuda_device_context_allocator.cc
+0
-66
paddle/fluid/memory/allocation/cuda_device_context_allocator.h
...e/fluid/memory/allocation/cuda_device_context_allocator.h
+120
-4
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.cc
...d/memory/allocation/cuda_device_context_allocator_pool.cc
+0
-59
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
...id/memory/allocation/cuda_device_context_allocator_pool.h
+0
-48
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+0
-25
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+1
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+37
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+1
-8
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
37f76407
...
...
@@ -17,8 +17,6 @@ cc_library(memory
memcpy
)
if
(
WITH_GPU
)
add_dependencies
(
malloc cuda_device_context_allocator_pool
)
target_link_libraries
(
malloc cuda_device_context_allocator_pool
)
nv_test
(
malloc_test
SRCS malloc_test.cu
DEPS device_context malloc
)
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
37f76407
...
...
@@ -14,12 +14,6 @@ endif()
if
(
WITH_GPU
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
nv_library
(
cuda_device_context_allocation SRCS cuda_device_context_allocation.cc
DEPS allocator enforce place
${
MKLDNN_CTX_DEPS
}
)
nv_library
(
cuda_device_context_allocator SRCS cuda_device_context_allocator.cc
DEPS allocator enforce place cuda_device_context_allocation
${
MKLDNN_CTX_DEPS
}
)
nv_library
(
cuda_device_context_allocator_pool SRCS cuda_device_context_allocator_pool.cc
DEPS allocator enforce place cuda_device_context_allocation cuda_device_context_allocator
${
MKLDNN_CTX_DEPS
}
)
endif
()
cc_library
(
retry_allocator SRCS retry_allocator.cc DEPS allocator
)
...
...
paddle/fluid/memory/allocation/cuda_device_context_allocation.cc
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_device_context_allocation.h"
#include <utility>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
CUDADeviceContextAllocation
::
CUDADeviceContextAllocation
(
AllocationPtr
allocation
)
:
Allocation
(
allocation
->
ptr
(),
allocation
->
size
(),
allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
allocation
))
{}
CUDADeviceContextAllocation
::~
CUDADeviceContextAllocation
()
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx_
,
"Didn't set device context for CUDADeviceContextAllocation"
);
auto
*
p_allocation
=
underlying_allocation_
.
release
();
VLOG
(
4
)
<<
"Adding callback to delete CUDADeviceContextAllocation at "
<<
p_allocation
;
dev_ctx_
->
AddStreamCallback
([
p_allocation
]
{
VLOG
(
4
)
<<
"Delete CUDADeviceContextAllocation at "
<<
p_allocation
;
AllocationDeleter
()(
p_allocation
);
});
}
void
CUDADeviceContextAllocation
::
SetCUDADeviceContext
(
const
platform
::
CUDADeviceContext
*
dev_ctx
)
{
dev_ctx_
=
dev_ctx
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocation.h
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
/**
* CUDADeviceContextAllocation is a wrapper of the underbeneath allocation.
* CUDADeviceContextAllocation adds a CUDA stream callback for the underbeneath
* allocation so that CUDADeviceContextAllocation can be used in a CUDA stream
* which deletes allocation in the callback.
*/
class
CUDADeviceContextAllocation
:
public
Allocation
{
public:
explicit
CUDADeviceContextAllocation
(
AllocationPtr
allocation
);
~
CUDADeviceContextAllocation
();
void
SetCUDADeviceContext
(
const
platform
::
CUDADeviceContext
*
dev_ctx
);
private:
AllocationPtr
underlying_allocation_
;
const
platform
::
CUDADeviceContext
*
dev_ctx_
{
nullptr
};
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator.cc
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocation.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
CUDADeviceContextAllocator
::
CUDADeviceContextAllocator
(
const
platform
::
CUDAPlace
place
,
cudaStream_t
default_stream
)
:
place_
(
place
),
default_stream_
(
default_stream
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventCreate
(
&
event_
,
cudaEventDisableTiming
),
"Create event failed in CUDADeviceContextAllocator"
);
}
CUDADeviceContextAllocator
::~
CUDADeviceContextAllocator
()
{
if
(
event_
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventDestroy
(
event_
),
"Destory event failed in CUDADeviceContextAllocator destroctor"
);
}
}
Allocation
*
CUDADeviceContextAllocator
::
AllocateImpl
(
size_t
size
)
{
PADDLE_ENFORCE_NOT_NULL
(
default_stream_
,
"Didn't set default stream for CUDADeviceContextAllocator"
);
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
auto
allocation
=
new
CUDADeviceContextAllocation
(
memory
::
Alloc
(
place_
,
size
));
// Wait for the event on stream
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventRecord
(
event_
,
default_stream_
),
"Failed to record event in CUDADeviceContextAllocator"
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
default_stream_
,
event_
,
0
),
"Failed to wait event in CUDADeviceContextAllocator"
);
return
allocation
;
}
void
CUDADeviceContextAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator.h
浏览文件 @
37f76407
...
...
@@ -15,15 +15,58 @@
#pragma once
#include <cuda_runtime.h>
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
platform
{
class
CUDADeviceContext
;
}
// namespace platform
namespace
memory
{
namespace
allocation
{
/**
* CUDADeviceContextAllocation is a wrapper of the underbeneath allocation.
* CUDADeviceContextAllocation adds a CUDA stream callback for the underbeneath
* allocation so that CUDADeviceContextAllocation can be used in a CUDA stream
* which deletes allocation in the callback.
*/
class
CUDADeviceContextAllocation
:
public
Allocation
{
public:
explicit
CUDADeviceContextAllocation
(
AllocationPtr
allocation
)
:
Allocation
(
allocation
->
ptr
(),
allocation
->
size
(),
allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
allocation
))
{}
~
CUDADeviceContextAllocation
()
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx_
,
"Didn't set device context for CUDADeviceContextAllocation"
);
auto
*
p_allocation
=
underlying_allocation_
.
release
();
VLOG
(
4
)
<<
"Adding callback to delete CUDADeviceContextAllocation at "
<<
p_allocation
;
dev_ctx_
->
AddStreamCallback
([
p_allocation
]
{
VLOG
(
4
)
<<
"Delete CUDADeviceContextAllocation at "
<<
p_allocation
;
AllocationDeleter
()(
p_allocation
);
});
}
void
SetCUDADeviceContext
(
const
platform
::
CUDADeviceContext
*
dev_ctx
)
{
dev_ctx_
=
dev_ctx
;
}
private:
AllocationPtr
underlying_allocation_
;
const
platform
::
CUDADeviceContext
*
dev_ctx_
{
nullptr
};
};
/**
* CUDADeviceContextAllocator will allocate a CUDADeviceContextAllocation
* after waiting for a self-created event on the default stream. It does so to
...
...
@@ -33,12 +76,42 @@ namespace allocation {
class
CUDADeviceContextAllocator
:
public
Allocator
{
public:
explicit
CUDADeviceContextAllocator
(
platform
::
CUDAPlace
place
,
cudaStream_t
default_stream
);
~
CUDADeviceContextAllocator
();
cudaStream_t
default_stream
)
:
place_
(
place
),
default_stream_
(
default_stream
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventCreate
(
&
event_
,
cudaEventDisableTiming
),
"Create event failed in CUDADeviceContextAllocator"
);
}
~
CUDADeviceContextAllocator
()
{
if
(
event_
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventDestroy
(
event_
),
"Destory event failed in CUDADeviceContextAllocator destroctor"
);
}
}
protected:
Allocation
*
AllocateImpl
(
size_t
size
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
)
override
{
PADDLE_ENFORCE_NOT_NULL
(
default_stream_
,
"Didn't set default stream for CUDADeviceContextAllocator"
);
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
auto
allocation
=
new
CUDADeviceContextAllocation
(
memory
::
Alloc
(
place_
,
size
));
// Wait for the event on stream
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventRecord
(
event_
,
default_stream_
),
"Failed to record event in CUDADeviceContextAllocator"
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
default_stream_
,
event_
,
0
),
"Failed to wait event in CUDADeviceContextAllocator"
);
return
allocation
;
}
void
FreeImpl
(
Allocation
*
allocation
)
override
{
delete
allocation
;
}
private:
platform
::
CUDAPlace
place_
;
...
...
@@ -46,6 +119,49 @@ class CUDADeviceContextAllocator : public Allocator {
cudaStream_t
default_stream_
{
nullptr
};
};
/**
* CUDADeviceContextAllocatorPool is a singletion stores mapping from
* CUDAPlace(s) to std::shared_ptr<CUDADeviceContextAllocator>. When a
* CUDADeviceContext's compute stream isn't default stream, it can call this
* class to allocate GPU memory which will be released by a callback after
* stream execution.
*/
class
CUDADeviceContextAllocatorPool
{
public:
static
CUDADeviceContextAllocatorPool
&
Instance
()
{
static
CUDADeviceContextAllocatorPool
pool
;
return
pool
;
}
AllocationPtr
Alloc
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
iter
=
allocators_
.
find
(
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx
.
GetPlace
()));
PADDLE_ENFORCE_EQ
(
iter
!=
allocators_
.
end
(),
true
,
"CUDADeviceContextAllocatorPool initialization error"
);
auto
&
allocator
=
iter
->
second
;
AllocationPtr
allocation
=
allocator
->
Allocate
(
size
);
static_cast
<
CUDADeviceContextAllocation
*>
(
allocation
.
get
())
->
SetCUDADeviceContext
(
&
dev_ctx
);
return
allocation
;
}
private:
CUDADeviceContextAllocatorPool
()
{
std
::
vector
<
int
>
devices
=
platform
::
GetSelectedDevices
();
for
(
int
i
:
devices
)
{
auto
place
=
platform
::
CUDAPlace
(
i
);
auto
compute_stream
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
place
)
->
stream
();
auto
allocator
=
std
::
shared_ptr
<
CUDADeviceContextAllocator
>
(
new
CUDADeviceContextAllocator
(
place
,
compute_stream
));
allocators_
.
insert
(
make_pair
(
place
,
allocator
));
}
}
std
::
map
<
platform
::
CUDAPlace
,
std
::
shared_ptr
<
CUDADeviceContextAllocator
>>
allocators_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.cc
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h"
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/cuda_device_context_allocation.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
CUDADeviceContextAllocatorPool
&
CUDADeviceContextAllocatorPool
::
Instance
()
{
static
CUDADeviceContextAllocatorPool
pool
;
return
pool
;
}
AllocationPtr
CUDADeviceContextAllocatorPool
::
Alloc
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
iter
=
allocators_
.
find
(
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx
.
GetPlace
()));
PADDLE_ENFORCE_EQ
(
iter
!=
allocators_
.
end
(),
true
,
"CUDADeviceContextAllocatorPool initialization error"
);
auto
&
allocator
=
iter
->
second
;
AllocationPtr
allocation
=
allocator
->
Allocate
(
size
);
static_cast
<
CUDADeviceContextAllocation
*>
(
allocation
.
get
())
->
SetCUDADeviceContext
(
&
dev_ctx
);
return
allocation
;
}
CUDADeviceContextAllocatorPool
::
CUDADeviceContextAllocatorPool
()
{
std
::
vector
<
int
>
devices
=
platform
::
GetSelectedDevices
();
for
(
int
i
:
devices
)
{
auto
place
=
platform
::
CUDAPlace
(
i
);
auto
compute_stream
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
place
)
->
stream
();
auto
allocator
=
std
::
shared_ptr
<
CUDADeviceContextAllocator
>
(
new
CUDADeviceContextAllocator
(
place
,
compute_stream
));
allocators_
.
insert
(
make_pair
(
place
,
allocator
));
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
/**
* CUDADeviceContextAllocatorPool is a singletion stores mapping from
* CUDAPlace(s) to std::shared_ptr<CUDADeviceContextAllocator>. When a
* CUDADeviceContext's compute stream isn't default stream, it can call this
* class to allocate GPU memory which will be released by a callback after
* stream execution.
*/
class
CUDADeviceContextAllocatorPool
{
public:
static
CUDADeviceContextAllocatorPool
&
Instance
();
AllocationPtr
Alloc
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
size_t
size
);
private:
CUDADeviceContextAllocatorPool
();
std
::
map
<
platform
::
CUDAPlace
,
std
::
shared_ptr
<
CUDADeviceContextAllocator
>>
allocators_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/malloc.cc
浏览文件 @
37f76407
...
...
@@ -17,10 +17,6 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h"
#endif
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -35,26 +31,5 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) {
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
);
}
AllocationPtr
Alloc
(
const
platform
::
DeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
place
=
dev_ctx
.
GetPlace
();
#ifdef PADDLE_WITH_CUDA
if
(
size
==
0
||
!
platform
::
is_gpu_place
(
place
))
{
return
Alloc
(
place
,
size
);
}
auto
*
default_dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
&
desired_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
if
(
default_dev_ctx
->
stream
()
==
desired_dev_ctx
.
stream
())
{
return
Alloc
(
place
,
size
);
}
else
{
return
allocation
::
CUDADeviceContextAllocatorPool
::
Instance
().
Alloc
(
desired_dev_ctx
,
size
);
}
#else
return
Alloc
(
place
,
size
);
#endif
}
}
// namespace memory
}
// namespace paddle
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
37f76407
...
...
@@ -70,7 +70,7 @@ ENDIF()
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library
(
device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc
${
STREAM_CALLBACK_DEPS
}
cc_library
(
device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc
xxhash
${
STREAM_CALLBACK_DEPS
}
place eigen3 stringpiece cpu_helper cpu_info framework_proto
${
GPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
${
dgc_deps
}
)
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
37f76407
...
...
@@ -18,11 +18,39 @@ limitations under the License. */
#include "paddle/fluid/memory/memory.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "glog/logging.h"
namespace
paddle
{
namespace
memory
{
AllocationPtr
Alloc
(
const
platform
::
DeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
place
=
dev_ctx
.
GetPlace
();
#ifdef PADDLE_WITH_CUDA
if
(
size
==
0
||
!
platform
::
is_gpu_place
(
place
))
{
return
Alloc
(
place
,
size
);
}
auto
*
default_dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
&
desired_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
if
(
default_dev_ctx
->
stream
()
==
desired_dev_ctx
.
stream
())
{
return
Alloc
(
place
,
size
);
}
else
{
return
allocation
::
CUDADeviceContextAllocatorPool
::
Instance
().
Alloc
(
desired_dev_ctx
,
size
);
}
#else
return
Alloc
(
place
,
size
);
#endif
}
}
// namespace memory
}
// namespace paddle
namespace
paddle
{
namespace
platform
{
...
...
@@ -174,6 +202,15 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
mutable
std
::
unordered_map
<
void
*
,
memory
::
AllocationPtr
>
allocations_
;
};
void
CudnnWorkspaceHandle
::
ReallocWorkspace
(
size_t
required_workspace_bytes
)
{
if
(
required_workspace_bytes
<=
WorkspaceSize
())
{
return
;
}
// reset allocation first before re-allocate to save memory
allocation_
.
reset
();
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
place_
(
place
)
{
CUDADeviceGuard
guard
(
place_
.
device
);
compute_capability_
=
GetCUDAComputeCapability
(
place_
.
device
);
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
37f76407
...
...
@@ -220,14 +220,7 @@ class CudnnWorkspaceHandle {
ResetWorkspace
();
}
inline
void
ReallocWorkspace
(
size_t
required_workspace_bytes
)
{
if
(
required_workspace_bytes
<=
WorkspaceSize
())
{
return
;
}
// reset allocation first before re-allocate to save memory
allocation_
.
reset
();
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
void
ReallocWorkspace
(
size_t
required_workspace_bytes
);
inline
void
ResetWorkspace
()
{
allocation_
=
nullptr
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录