Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
15076c32
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
15076c32
编写于
10月 02, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add comments and polish code style
上级
b4f54d33
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
194 addition
and
28 deletion
+194
-28
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+3
-2
paddle/fluid/memory/allocation/aligned_allocator.cc
paddle/fluid/memory/allocation/aligned_allocator.cc
+5
-0
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+36
-7
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
+3
-0
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+76
-9
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+2
-2
paddle/fluid/memory/allocation/allocator_facade.h
paddle/fluid/memory/allocation/allocator_facade.h
+7
-0
paddle/fluid/memory/allocation/auto_increment_allocator.h
paddle/fluid/memory/allocation/auto_increment_allocator.h
+22
-2
paddle/fluid/memory/allocation/conditional_allocator.h
paddle/fluid/memory/allocation/conditional_allocator.h
+16
-0
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+7
-1
paddle/fluid/memory/allocation/cuda_allocator.h
paddle/fluid/memory/allocation/cuda_allocator.h
+1
-0
paddle/fluid/memory/allocation/locked_allocator.h
paddle/fluid/memory/allocation/locked_allocator.h
+1
-0
paddle/fluid/memory/allocation/naive_managed_allocator.h
paddle/fluid/memory/allocation/naive_managed_allocator.h
+5
-0
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+1
-1
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+1
-0
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+3
-0
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+2
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+2
-2
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+1
-1
未找到文件。
paddle/fluid/framework/tensor_util.cc
浏览文件 @
15076c32
...
...
@@ -15,6 +15,7 @@
#include <algorithm>
#include <limits>
#include <vector>
#include "../memory/allocation/allocator.h"
#include "paddle/fluid/framework/data_type.h"
namespace
paddle
{
...
...
@@ -111,8 +112,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
dst
->
set_layout
(
src
.
layout
());
auto
src_place
=
src
.
place
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
(),
memory
::
Allocator
::
kCommunication
);
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
(),
memory
::
Allocator
::
kCrossDevice
);
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/fluid/memory/allocation/aligned_allocator.cc
浏览文件 @
15076c32
...
...
@@ -21,6 +21,11 @@ namespace allocation {
ThinAlignedAllocator
::
ThinAlignedAllocator
(
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
)
:
underlying_allocator_
(
std
::
move
(
underlyning_allocator
))
{}
std
::
shared_ptr
<
Allocation
>
ThinAlignedAllocator
::
AllocateShared
(
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Allocate
(
size
,
attr
).
release
());
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,34 +20,66 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// The aligned allocation and allocator will wrap a managed allocator,
// and returns the aligned pointer.
//
// NOTE(yy): For speed reason, I just use a template parameter to get
// alignment, however, it can be an private member if necessary.
//
// NOTE(yy): kAlignment must be 2^N. a `static_assert` should be added.
template
<
size_t
kAlignment
>
class
AlignedAllocation
:
public
Allocation
{
public:
AlignedAllocation
(
std
::
unique_ptr
<
Allocation
>&&
underlying_allocation
,
size_t
size
)
:
Allocation
(
AlignedPtr
(
underlying_allocation
->
ptr
()),
size
,
:
Allocation
(
AlignedPtr
(
underlying_allocation
->
ptr
()),
size
+
kAlignment
-
Offset
(
underlying_allocation
->
ptr
()),
underlying_allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
private:
static
void
*
AlignedPtr
(
void
*
ptr
)
{
auto
ptr_addr
=
reinterpret_cast
<
uintptr_t
>
(
ptr
);
ptr_addr
=
(
ptr_addr
&
~
(
kAlignment
-
1
))
+
kAlignment
;
return
reinterpret_cast
<
void
*>
(
ptr_addr
);
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
ptr
)
+
Offset
(
ptr
));
}
// Offset to aligned pointer.
// if ptr is already aligned, returns 0.
static
size_t
Offset
(
void
*
ptr
)
{
auto
ptr_addr
=
reinterpret_cast
<
intptr_t
>
(
ptr
);
intptr_t
aligned_addr
=
(
ptr_addr
&
~
(
kAlignment
-
1
));
intptr_t
diff
=
aligned_addr
-
ptr_addr
;
if
(
diff
==
0
)
{
return
0
;
}
else
{
return
kAlignment
+
diff
;
}
}
std
::
unique_ptr
<
Allocation
>
underlying_allocation_
;
};
// Thin aligned allocator is trivial and used to generate a small size binary.
//
// NOTE(yy): This is a trick to make a template class. This class extract the
// common code into a `thin` class. So if there are multiple specification of
// the template class, the binary size will not extended too much.
//
// NOTE(yy): This could be an over design. If it harms readability of code, it
// could be removed later.
class
ThinAlignedAllocator
:
public
ManagedAllocator
{
public:
explicit
ThinAlignedAllocator
(
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
);
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
protected:
std
::
shared_ptr
<
ManagedAllocator
>
underlying_allocator_
;
};
// An aligned allocator will allocate `size+kAlignment` allocation and adjust
// the pointer offset.
template
<
size_t
kAlignment
>
class
AlignedAllocator
:
public
ThinAlignedAllocator
{
public:
...
...
@@ -58,9 +90,6 @@ class AlignedAllocator : public ThinAlignedAllocator {
return
std
::
unique_ptr
<
Allocation
>
(
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
));
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
return
std
::
shared_ptr
<
Allocation
>
(
Allocate
(
size
,
attr
).
release
());
}
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
浏览文件 @
15076c32
...
...
@@ -18,6 +18,9 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "unsupported/Eigen/CXX11/Tensor"
// NOTE(yy): this unittest is not important. It just used for debugging.
// It can be removed later.
struct
FillZero
{
public:
float
*
ptr_
;
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
15076c32
...
...
@@ -12,6 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <utility>
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
...
...
@@ -21,15 +37,22 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// Exception when `Alloc`/`AllocShared` failed
class
BadAlloc
:
public
std
::
exception
{
public:
explicit
BadAlloc
(
const
std
::
string
&
msg
)
:
msg_
(
msg
)
{}
explicit
BadAlloc
(
std
::
string
msg
)
:
msg_
(
std
::
move
(
msg
)
)
{}
const
char
*
what
()
const
noexcept
override
;
private:
std
::
string
msg_
;
};
// Allocation is the object holding the actually pointer. Use
// `Allocation::ptr()` will returns the pointer that allocated.
//
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
...
...
@@ -38,8 +61,22 @@ class Allocation {
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void
*
ptr
()
const
{
return
ptr_
;
}
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t
size
()
const
{
return
size_
;
}
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
...
...
@@ -52,22 +89,51 @@ class Allocation {
platform
::
Place
place_
;
};
// Base interface class of memory Allocator.
// To allocate a memory, allocator needs two parameters:
// 1. size of bytes.
// 2. Attribute of memory.
// NOTE: the attribute of memory might be ignored if the allocator does not
// care it.
class
Allocator
{
public:
enum
Attr
{
kDefault
=
0
,
kTiny
=
1
,
kFixedHuge
=
2
,
kFluxHuge
=
3
,
kTmp
=
4
,
kCommunication
=
5
,
NumOfAttrs
=
6
kDefault
=
0
,
// Default attribute. Uses the fast or stablest allocation
// algorithm.
kFixedHuge
=
1
,
// The allocation may not be freed until the program
// ends. e.g., `Parameters` and `Momentum`.
kFluxHuge
=
2
,
// The allocation may create and freed frequently and the
// allocation is considerable huge. Like `activations`
// and gradients.
kScratchpad
=
3
,
// The `Scratchpad` memory is allocated and freed very soon,
// usually within an operator or aux memory.
// Like CUDNN workspace, AUX memory in batch norm, etc.
//
// https://en.wikipedia.org/wiki/Scratchpad_memory
kCrossDevice
=
4
,
// The memory used cross-device memory copy/communication.
// For example:
// 1. it can use an `pinned` memory for CPU-GPU
// communication.
// 2. it can use an `registered` memory for RDMA
// communication.
NumOfAttrs
=
5
// The number of all attributes. It is used internally.
};
virtual
~
Allocator
();
// Allocate an allocation. Note the return allocation might need to be freed
// manually if the Allocator is an `UnmanagedAllocator`.
virtual
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
=
kDefault
)
=
0
;
// True if the `Allocate` is thread safe.
virtual
bool
IsAllocThreadSafe
()
const
;
};
...
...
@@ -82,7 +148,8 @@ class UnmanagedAllocator : public Allocator {
}
};
// The allocation will be managed by smart pointers
// The allocation will be managed by smart pointers. i.e., users do not need
// to free allocation manually.
class
ManagedAllocator
:
public
Allocator
{
public:
virtual
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
15076c32
...
...
@@ -46,7 +46,7 @@ class CPUManagedAllocator : public ManagedAllocator {
std
::
unique_ptr
<
Allocator
>
(
new
CPUPinnedAllocator
())))
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kC
ommunication
)
{
if
(
attr
==
kC
rossDevice
)
{
return
communication_allocator_
->
Allocate
(
size
,
attr
);
}
else
{
return
normal_allocator_
->
Allocate
(
size
,
attr
);
...
...
@@ -54,7 +54,7 @@ class CPUManagedAllocator : public ManagedAllocator {
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kC
ommunication
)
{
if
(
attr
==
kC
rossDevice
)
{
return
communication_allocator_
->
AllocateShared
(
size
,
attr
);
}
else
{
return
normal_allocator_
->
AllocateShared
(
size
,
attr
);
...
...
paddle/fluid/memory/allocation/allocator_facade.h
浏览文件 @
15076c32
...
...
@@ -24,6 +24,10 @@ namespace allocation {
// Allocator Facade is the interface exposed to other modules.
// All the configuration or dirty code under development should
// be hidden behind this facade.
//
// NOTE(yy): This class is a singleton class.
// NOTE(yy): To create a stable ABI and make compilation faster. Here we use
// a Pimpl trick;
class
AllocatorFacadePrivate
;
class
AllocatorFacade
{
public:
...
...
@@ -33,13 +37,16 @@ class AllocatorFacade {
static
AllocatorFacade
&
Instance
();
// Allocate a shared allocation.
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
// Allocate a unique allocation.
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
// TODO(yy): Allocate a Copy-On-Write allocation?
private:
AllocatorFacade
();
AllocatorFacadePrivate
*
m_
;
...
...
paddle/fluid/memory/allocation/auto_increment_allocator.h
浏览文件 @
15076c32
...
...
@@ -24,12 +24,27 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// The AutoIncrementAllocator manages many underlying allocators. If none of
// them can allocate the request memory, a new allocator will be created and
// invoke its `allocate` method.
//
// NOTE(yy): The AutoIncrementAllocator will prefer to allocate memory from
// the latest sucessful allocator.
//
// NOTE(yy): We may need to release an underlying allocator if it allocate
// nothing. However, it is generally not useful, since it will make performance
// undetermined.
//
// NOTE(yy): This allocator is only locked when creating new underlying
// allocator. The allocation requests from many threads may be dispatched
// to the same underlying allocator. So the underlying allocator must be
// thread safe.
class
AutoIncrementAllocator
:
public
ManagedAllocator
{
public:
// Creator is the method to create ManagedAllocator
using
AllocatorCreator
=
std
::
function
<
std
::
shared_ptr
<
ManagedAllocator
>
()
>
;
template
<
typename
Creator
>
explicit
AutoIncrementAllocator
(
Creator
&&
creator
)
explicit
AutoIncrementAllocator
(
AllocatorCreator
&&
creator
)
:
creator_
(
std
::
move
(
creator
)),
prev_success_allocator_
{
0
}
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
...
...
@@ -65,6 +80,11 @@ class AutoIncrementAllocator : public ManagedAllocator {
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
underlying_allocators_
.
emplace_back
(
creator_
());
prev_success_allocator_
=
underlying_allocators_
.
size
()
-
1
;
PADDLE_ENFORCE
(
underlying_allocators_
[
prev_success_allocator_
]
->
IsAllocThreadSafe
(),
"the underlying allocator must be thread safe. This is a program "
"bug."
);
return
callback
(
*
underlying_allocators_
[
prev_success_allocator_
]);
}
}
...
...
paddle/fluid/memory/allocation/conditional_allocator.h
浏览文件 @
15076c32
...
...
@@ -22,6 +22,22 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// A composite allocator who will dispatch the allocation request by registered
// condition.
//
// For example:
//
// auto* cond_allocator = new ConditionalAllocator();
// cond_allocator->AddAllocator([](size_t size, Attr attr){
// // if size > 10
// return size > 10;
// }, allocator_a).AddAllocator([](size_t size, Attr attr){
// // elif attr is kDefault
// return attr == kDefault;
// }, allocator_b).AddAllocator([](size_t size, Attr attr){
// // else
// return true;
// }, allocator_c);
class
ConditionalAllocator
:
public
ManagedAllocator
{
public:
ConditionalAllocator
()
=
default
;
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
15076c32
...
...
@@ -18,7 +18,13 @@
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
// CPU system allocator and allocation.
//
// NOTE(yy): Should we just use `malloc` here since there is an
// aligned_allocator.
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class
CPUAllocation
:
public
Allocation
{
public:
CPUAllocation
(
void
*
ptr
,
size_t
size
)
...
...
paddle/fluid/memory/allocation/cuda_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,6 +20,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// CUDA System allocator and allocation.
// Just a flag type.
class
CUDAAllocation
:
public
Allocation
{
public:
...
...
paddle/fluid/memory/allocation/locked_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,6 +20,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// A allocator to make underlying allocator thread safe.
class
LockedAllocator
:
public
UnmanagedAllocator
{
public:
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>&&
underlying_allocator
);
...
...
paddle/fluid/memory/allocation/naive_managed_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,6 +20,11 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// An allocator to wrap an UnmanagedAllocator and make the allocation managed
// by C++ smart ptr.
//
// NOTE: if the NaiveManagedAllocator is destroyed before
// NaiveManagedAllocations, the allocation will never be released.
class
NaiveManagedAllocator
;
class
NaiveManagedAllocation
:
public
Allocation
{
public:
...
...
paddle/fluid/memory/allocation/pinned_allocator.cc
浏览文件 @
15076c32
...
...
@@ -23,7 +23,7 @@ namespace allocation {
std
::
unique_ptr
<
Allocation
>
CPUPinnedAllocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
,
kC
ommunication
,
attr
,
kC
rossDevice
,
"CPUPinnedAllocator should be used for Cross-Device Communication"
);
void
*
ptr
;
...
...
paddle/fluid/memory/allocation/pinned_allocator.h
浏览文件 @
15076c32
...
...
@@ -19,6 +19,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// Allocator uses `cudaMallocHost`
class
CPUPinnedAllocation
:
public
Allocation
{
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
...
...
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
15076c32
...
...
@@ -22,6 +22,9 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
15076c32
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/fluid/memory/allocation/allocator.h>
#include <stdio.h>
#include <string>
#include <vector>
...
...
@@ -70,7 +71,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx,
// Allocate temporary storage
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
auto
d_temp_storage
=
memory
::
Alloc
(
place
,
temp_storage_bytes
,
memory
::
Allocator
::
k
Tmp
);
memory
::
Alloc
(
place
,
temp_storage_bytes
,
memory
::
Allocator
::
k
Scratchpad
);
// Run sorting operation
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
15076c32
...
...
@@ -112,8 +112,8 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
}
void
*
allocate
(
size_t
num_bytes
)
const
override
{
auto
buf
=
paddle
::
memory
::
Alloc
(
place_
,
num_bytes
,
memory
::
Allocator
::
kTiny
);
auto
buf
=
paddle
::
memory
::
Alloc
(
place_
,
num_bytes
,
memory
::
Allocator
::
kScratchpad
);
void
*
retv
=
buf
->
ptr
();
allocations_
[
buf
->
ptr
()]
=
std
::
move
(
buf
);
return
retv
;
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
15076c32
...
...
@@ -64,7 +64,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
auto
*
src_ptr
=
static_cast
<
const
void
*>
(
tensor
.
data
<
CUR_TYPE
>
());
auto
*
dst_ptr
=
static_cast
<
void
*>
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
tensor
.
dims
(),
platform
::
CPUPlace
(),
memory
::
Allocator
::
kC
ommunication
));
memory
::
Allocator
::
kC
rossDevice
));
paddle
::
platform
::
GpuMemcpySync
(
dst_ptr
,
src_ptr
,
sizeof
(
CUR_TYPE
)
*
tensor
.
numel
(),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录