Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
37f76407
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
37f76407
编写于
9月 24, 2019
作者:
Z
Zeng Jinle
提交者:
GitHub
9月 24, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix cuda dev_ctx allocator cmake deps, test=develop (#19953)
上级
ebff68fa
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
159 addition
and
308 deletion
+159
-308
paddle/fluid/memory/CMakeLists.txt
paddle/fluid/memory/CMakeLists.txt
+0
-2
paddle/fluid/memory/allocation/CMakeLists.txt
paddle/fluid/memory/allocation/CMakeLists.txt
+0
-6
paddle/fluid/memory/allocation/cuda_device_context_allocation.cc
...fluid/memory/allocation/cuda_device_context_allocation.cc
+0
-47
paddle/fluid/memory/allocation/cuda_device_context_allocation.h
.../fluid/memory/allocation/cuda_device_context_allocation.h
+0
-42
paddle/fluid/memory/allocation/cuda_device_context_allocator.cc
.../fluid/memory/allocation/cuda_device_context_allocator.cc
+0
-66
paddle/fluid/memory/allocation/cuda_device_context_allocator.h
...e/fluid/memory/allocation/cuda_device_context_allocator.h
+120
-4
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.cc
...d/memory/allocation/cuda_device_context_allocator_pool.cc
+0
-59
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
...id/memory/allocation/cuda_device_context_allocator_pool.h
+0
-48
paddle/fluid/memory/malloc.cc
paddle/fluid/memory/malloc.cc
+0
-25
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+1
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+37
-0
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+1
-8
未找到文件。
paddle/fluid/memory/CMakeLists.txt
浏览文件 @
37f76407
...
...
@@ -17,8 +17,6 @@ cc_library(memory
memcpy
)
if
(
WITH_GPU
)
add_dependencies
(
malloc cuda_device_context_allocator_pool
)
target_link_libraries
(
malloc cuda_device_context_allocator_pool
)
nv_test
(
malloc_test
SRCS malloc_test.cu
DEPS device_context malloc
)
...
...
paddle/fluid/memory/allocation/CMakeLists.txt
浏览文件 @
37f76407
...
...
@@ -14,12 +14,6 @@ endif()
if
(
WITH_GPU
)
nv_library
(
cuda_allocator SRCS cuda_allocator.cc DEPS allocator cuda_device_guard
)
nv_library
(
cuda_device_context_allocation SRCS cuda_device_context_allocation.cc
DEPS allocator enforce place
${
MKLDNN_CTX_DEPS
}
)
nv_library
(
cuda_device_context_allocator SRCS cuda_device_context_allocator.cc
DEPS allocator enforce place cuda_device_context_allocation
${
MKLDNN_CTX_DEPS
}
)
nv_library
(
cuda_device_context_allocator_pool SRCS cuda_device_context_allocator_pool.cc
DEPS allocator enforce place cuda_device_context_allocation cuda_device_context_allocator
${
MKLDNN_CTX_DEPS
}
)
endif
()
cc_library
(
retry_allocator SRCS retry_allocator.cc DEPS allocator
)
...
...
paddle/fluid/memory/allocation/cuda_device_context_allocation.cc
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_device_context_allocation.h"
#include <utility>
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
CUDADeviceContextAllocation
::
CUDADeviceContextAllocation
(
AllocationPtr
allocation
)
:
Allocation
(
allocation
->
ptr
(),
allocation
->
size
(),
allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
allocation
))
{}
CUDADeviceContextAllocation
::~
CUDADeviceContextAllocation
()
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx_
,
"Didn't set device context for CUDADeviceContextAllocation"
);
auto
*
p_allocation
=
underlying_allocation_
.
release
();
VLOG
(
4
)
<<
"Adding callback to delete CUDADeviceContextAllocation at "
<<
p_allocation
;
dev_ctx_
->
AddStreamCallback
([
p_allocation
]
{
VLOG
(
4
)
<<
"Delete CUDADeviceContextAllocation at "
<<
p_allocation
;
AllocationDeleter
()(
p_allocation
);
});
}
void
CUDADeviceContextAllocation
::
SetCUDADeviceContext
(
const
platform
::
CUDADeviceContext
*
dev_ctx
)
{
dev_ctx_
=
dev_ctx
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocation.h
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
/**
* CUDADeviceContextAllocation is a wrapper of the underbeneath allocation.
* CUDADeviceContextAllocation adds a CUDA stream callback for the underbeneath
* allocation so that CUDADeviceContextAllocation can be used in a CUDA stream
* which deletes allocation in the callback.
*/
class
CUDADeviceContextAllocation
:
public
Allocation
{
public:
explicit
CUDADeviceContextAllocation
(
AllocationPtr
allocation
);
~
CUDADeviceContextAllocation
();
void
SetCUDADeviceContext
(
const
platform
::
CUDADeviceContext
*
dev_ctx
);
private:
AllocationPtr
underlying_allocation_
;
const
platform
::
CUDADeviceContext
*
dev_ctx_
{
nullptr
};
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator.cc
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocation.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
CUDADeviceContextAllocator
::
CUDADeviceContextAllocator
(
const
platform
::
CUDAPlace
place
,
cudaStream_t
default_stream
)
:
place_
(
place
),
default_stream_
(
default_stream
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventCreate
(
&
event_
,
cudaEventDisableTiming
),
"Create event failed in CUDADeviceContextAllocator"
);
}
CUDADeviceContextAllocator
::~
CUDADeviceContextAllocator
()
{
if
(
event_
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventDestroy
(
event_
),
"Destory event failed in CUDADeviceContextAllocator destroctor"
);
}
}
Allocation
*
CUDADeviceContextAllocator
::
AllocateImpl
(
size_t
size
)
{
PADDLE_ENFORCE_NOT_NULL
(
default_stream_
,
"Didn't set default stream for CUDADeviceContextAllocator"
);
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
auto
allocation
=
new
CUDADeviceContextAllocation
(
memory
::
Alloc
(
place_
,
size
));
// Wait for the event on stream
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventRecord
(
event_
,
default_stream_
),
"Failed to record event in CUDADeviceContextAllocator"
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
default_stream_
,
event_
,
0
),
"Failed to wait event in CUDADeviceContextAllocator"
);
return
allocation
;
}
void
CUDADeviceContextAllocator
::
FreeImpl
(
Allocation
*
allocation
)
{
delete
allocation
;
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator.h
浏览文件 @
37f76407
...
...
@@ -15,15 +15,58 @@
#pragma once
#include <cuda_runtime.h>
#include <map>
#include <memory>
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
platform
{
class
CUDADeviceContext
;
}
// namespace platform
namespace
memory
{
namespace
allocation
{
/**
* CUDADeviceContextAllocation is a wrapper of the underbeneath allocation.
* CUDADeviceContextAllocation adds a CUDA stream callback for the underbeneath
* allocation so that CUDADeviceContextAllocation can be used in a CUDA stream
* which deletes allocation in the callback.
*/
class
CUDADeviceContextAllocation
:
public
Allocation
{
public:
explicit
CUDADeviceContextAllocation
(
AllocationPtr
allocation
)
:
Allocation
(
allocation
->
ptr
(),
allocation
->
size
(),
allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
allocation
))
{}
~
CUDADeviceContextAllocation
()
{
PADDLE_ENFORCE_NOT_NULL
(
dev_ctx_
,
"Didn't set device context for CUDADeviceContextAllocation"
);
auto
*
p_allocation
=
underlying_allocation_
.
release
();
VLOG
(
4
)
<<
"Adding callback to delete CUDADeviceContextAllocation at "
<<
p_allocation
;
dev_ctx_
->
AddStreamCallback
([
p_allocation
]
{
VLOG
(
4
)
<<
"Delete CUDADeviceContextAllocation at "
<<
p_allocation
;
AllocationDeleter
()(
p_allocation
);
});
}
void
SetCUDADeviceContext
(
const
platform
::
CUDADeviceContext
*
dev_ctx
)
{
dev_ctx_
=
dev_ctx
;
}
private:
AllocationPtr
underlying_allocation_
;
const
platform
::
CUDADeviceContext
*
dev_ctx_
{
nullptr
};
};
/**
* CUDADeviceContextAllocator will allocate a CUDADeviceContextAllocation
* after waiting for a self-created event on the default stream. It does so to
...
...
@@ -33,12 +76,42 @@ namespace allocation {
class
CUDADeviceContextAllocator
:
public
Allocator
{
public:
explicit
CUDADeviceContextAllocator
(
platform
::
CUDAPlace
place
,
cudaStream_t
default_stream
);
~
CUDADeviceContextAllocator
();
cudaStream_t
default_stream
)
:
place_
(
place
),
default_stream_
(
default_stream
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventCreate
(
&
event_
,
cudaEventDisableTiming
),
"Create event failed in CUDADeviceContextAllocator"
);
}
~
CUDADeviceContextAllocator
()
{
if
(
event_
)
{
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventDestroy
(
event_
),
"Destory event failed in CUDADeviceContextAllocator destroctor"
);
}
}
protected:
Allocation
*
AllocateImpl
(
size_t
size
)
override
;
void
FreeImpl
(
Allocation
*
allocation
)
override
;
Allocation
*
AllocateImpl
(
size_t
size
)
override
{
PADDLE_ENFORCE_NOT_NULL
(
default_stream_
,
"Didn't set default stream for CUDADeviceContextAllocator"
);
platform
::
CUDADeviceGuard
guard
(
place_
.
device
);
auto
allocation
=
new
CUDADeviceContextAllocation
(
memory
::
Alloc
(
place_
,
size
));
// Wait for the event on stream
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaEventRecord
(
event_
,
default_stream_
),
"Failed to record event in CUDADeviceContextAllocator"
);
PADDLE_ENFORCE_CUDA_SUCCESS
(
cudaStreamWaitEvent
(
default_stream_
,
event_
,
0
),
"Failed to wait event in CUDADeviceContextAllocator"
);
return
allocation
;
}
void
FreeImpl
(
Allocation
*
allocation
)
override
{
delete
allocation
;
}
private:
platform
::
CUDAPlace
place_
;
...
...
@@ -46,6 +119,49 @@ class CUDADeviceContextAllocator : public Allocator {
cudaStream_t
default_stream_
{
nullptr
};
};
/**
* CUDADeviceContextAllocatorPool is a singletion stores mapping from
* CUDAPlace(s) to std::shared_ptr<CUDADeviceContextAllocator>. When a
* CUDADeviceContext's compute stream isn't default stream, it can call this
* class to allocate GPU memory which will be released by a callback after
* stream execution.
*/
class
CUDADeviceContextAllocatorPool
{
public:
static
CUDADeviceContextAllocatorPool
&
Instance
()
{
static
CUDADeviceContextAllocatorPool
pool
;
return
pool
;
}
AllocationPtr
Alloc
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
iter
=
allocators_
.
find
(
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx
.
GetPlace
()));
PADDLE_ENFORCE_EQ
(
iter
!=
allocators_
.
end
(),
true
,
"CUDADeviceContextAllocatorPool initialization error"
);
auto
&
allocator
=
iter
->
second
;
AllocationPtr
allocation
=
allocator
->
Allocate
(
size
);
static_cast
<
CUDADeviceContextAllocation
*>
(
allocation
.
get
())
->
SetCUDADeviceContext
(
&
dev_ctx
);
return
allocation
;
}
private:
CUDADeviceContextAllocatorPool
()
{
std
::
vector
<
int
>
devices
=
platform
::
GetSelectedDevices
();
for
(
int
i
:
devices
)
{
auto
place
=
platform
::
CUDAPlace
(
i
);
auto
compute_stream
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
place
)
->
stream
();
auto
allocator
=
std
::
shared_ptr
<
CUDADeviceContextAllocator
>
(
new
CUDADeviceContextAllocator
(
place
,
compute_stream
));
allocators_
.
insert
(
make_pair
(
place
,
allocator
));
}
}
std
::
map
<
platform
::
CUDAPlace
,
std
::
shared_ptr
<
CUDADeviceContextAllocator
>>
allocators_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.cc
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h"
#include <utility>
#include <vector>
#include "paddle/fluid/memory/allocation/cuda_device_context_allocation.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
CUDADeviceContextAllocatorPool
&
CUDADeviceContextAllocatorPool
::
Instance
()
{
static
CUDADeviceContextAllocatorPool
pool
;
return
pool
;
}
AllocationPtr
CUDADeviceContextAllocatorPool
::
Alloc
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
iter
=
allocators_
.
find
(
boost
::
get
<
platform
::
CUDAPlace
>
(
dev_ctx
.
GetPlace
()));
PADDLE_ENFORCE_EQ
(
iter
!=
allocators_
.
end
(),
true
,
"CUDADeviceContextAllocatorPool initialization error"
);
auto
&
allocator
=
iter
->
second
;
AllocationPtr
allocation
=
allocator
->
Allocate
(
size
);
static_cast
<
CUDADeviceContextAllocation
*>
(
allocation
.
get
())
->
SetCUDADeviceContext
(
&
dev_ctx
);
return
allocation
;
}
CUDADeviceContextAllocatorPool
::
CUDADeviceContextAllocatorPool
()
{
std
::
vector
<
int
>
devices
=
platform
::
GetSelectedDevices
();
for
(
int
i
:
devices
)
{
auto
place
=
platform
::
CUDAPlace
(
i
);
auto
compute_stream
=
platform
::
DeviceContextPool
::
Instance
().
GetByPlace
(
place
)
->
stream
();
auto
allocator
=
std
::
shared_ptr
<
CUDADeviceContextAllocator
>
(
new
CUDADeviceContextAllocator
(
place
,
compute_stream
));
allocators_
.
insert
(
make_pair
(
place
,
allocator
));
}
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h
已删除
100644 → 0
浏览文件 @
ebff68fa
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <memory>
#include "paddle/fluid/memory/allocation/allocator.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
/**
* CUDADeviceContextAllocatorPool is a singletion stores mapping from
* CUDAPlace(s) to std::shared_ptr<CUDADeviceContextAllocator>. When a
* CUDADeviceContext's compute stream isn't default stream, it can call this
* class to allocate GPU memory which will be released by a callback after
* stream execution.
*/
class
CUDADeviceContextAllocatorPool
{
public:
static
CUDADeviceContextAllocatorPool
&
Instance
();
AllocationPtr
Alloc
(
const
platform
::
CUDADeviceContext
&
dev_ctx
,
size_t
size
);
private:
CUDADeviceContextAllocatorPool
();
std
::
map
<
platform
::
CUDAPlace
,
std
::
shared_ptr
<
CUDADeviceContextAllocator
>>
allocators_
;
};
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/malloc.cc
浏览文件 @
37f76407
...
...
@@ -17,10 +17,6 @@ limitations under the License. */
#include <vector>
#include "paddle/fluid/memory/allocation/allocator_facade.h"
#include "paddle/fluid/memory/allocation/allocator_strategy.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator_pool.h"
#endif
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/place.h"
namespace
paddle
{
...
...
@@ -35,26 +31,5 @@ AllocationPtr Alloc(const platform::Place &place, size_t size) {
return
allocation
::
AllocatorFacade
::
Instance
().
Alloc
(
place
,
size
);
}
AllocationPtr
Alloc
(
const
platform
::
DeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
place
=
dev_ctx
.
GetPlace
();
#ifdef PADDLE_WITH_CUDA
if
(
size
==
0
||
!
platform
::
is_gpu_place
(
place
))
{
return
Alloc
(
place
,
size
);
}
auto
*
default_dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
&
desired_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
if
(
default_dev_ctx
->
stream
()
==
desired_dev_ctx
.
stream
())
{
return
Alloc
(
place
,
size
);
}
else
{
return
allocation
::
CUDADeviceContextAllocatorPool
::
Instance
().
Alloc
(
desired_dev_ctx
,
size
);
}
#else
return
Alloc
(
place
,
size
);
#endif
}
}
// namespace memory
}
// namespace paddle
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
37f76407
...
...
@@ -70,7 +70,7 @@ ENDIF()
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
cc_library
(
device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc
${
STREAM_CALLBACK_DEPS
}
cc_library
(
device_context SRCS device_context.cc init.cc DEPS simple_threadpool malloc
xxhash
${
STREAM_CALLBACK_DEPS
}
place eigen3 stringpiece cpu_helper cpu_info framework_proto
${
GPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
${
dgc_deps
}
)
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
37f76407
...
...
@@ -18,11 +18,39 @@ limitations under the License. */
#include "paddle/fluid/memory/memory.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/framework/rw_lock.h"
#include "paddle/fluid/memory/allocation/cuda_device_context_allocator.h"
#include "paddle/fluid/platform/cuda_device_guard.h"
#endif
#include "glog/logging.h"
namespace
paddle
{
namespace
memory
{
AllocationPtr
Alloc
(
const
platform
::
DeviceContext
&
dev_ctx
,
size_t
size
)
{
auto
place
=
dev_ctx
.
GetPlace
();
#ifdef PADDLE_WITH_CUDA
if
(
size
==
0
||
!
platform
::
is_gpu_place
(
place
))
{
return
Alloc
(
place
,
size
);
}
auto
*
default_dev_ctx
=
static_cast
<
platform
::
CUDADeviceContext
*>
(
platform
::
DeviceContextPool
::
Instance
().
Get
(
place
));
auto
&
desired_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
if
(
default_dev_ctx
->
stream
()
==
desired_dev_ctx
.
stream
())
{
return
Alloc
(
place
,
size
);
}
else
{
return
allocation
::
CUDADeviceContextAllocatorPool
::
Instance
().
Alloc
(
desired_dev_ctx
,
size
);
}
#else
return
Alloc
(
place
,
size
);
#endif
}
}
// namespace memory
}
// namespace paddle
namespace
paddle
{
namespace
platform
{
...
...
@@ -174,6 +202,15 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
mutable
std
::
unordered_map
<
void
*
,
memory
::
AllocationPtr
>
allocations_
;
};
void
CudnnWorkspaceHandle
::
ReallocWorkspace
(
size_t
required_workspace_bytes
)
{
if
(
required_workspace_bytes
<=
WorkspaceSize
())
{
return
;
}
// reset allocation first before re-allocate to save memory
allocation_
.
reset
();
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
CUDADeviceContext
::
CUDADeviceContext
(
CUDAPlace
place
)
:
place_
(
place
)
{
CUDADeviceGuard
guard
(
place_
.
device
);
compute_capability_
=
GetCUDAComputeCapability
(
place_
.
device
);
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
37f76407
...
...
@@ -220,14 +220,7 @@ class CudnnWorkspaceHandle {
ResetWorkspace
();
}
inline
void
ReallocWorkspace
(
size_t
required_workspace_bytes
)
{
if
(
required_workspace_bytes
<=
WorkspaceSize
())
{
return
;
}
// reset allocation first before re-allocate to save memory
allocation_
.
reset
();
allocation_
=
memory
::
Alloc
(
device_context_
,
required_workspace_bytes
);
}
void
ReallocWorkspace
(
size_t
required_workspace_bytes
);
inline
void
ResetWorkspace
()
{
allocation_
=
nullptr
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录