Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
15076c32
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
15076c32
编写于
10月 02, 2018
作者:
Y
Yu Yang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add comments and polish code style
上级
b4f54d33
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
194 addition
and
28 deletion
+194
-28
paddle/fluid/framework/tensor_util.cc
paddle/fluid/framework/tensor_util.cc
+3
-2
paddle/fluid/memory/allocation/aligned_allocator.cc
paddle/fluid/memory/allocation/aligned_allocator.cc
+5
-0
paddle/fluid/memory/allocation/aligned_allocator.h
paddle/fluid/memory/allocation/aligned_allocator.h
+36
-7
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
+3
-0
paddle/fluid/memory/allocation/allocator.h
paddle/fluid/memory/allocation/allocator.h
+76
-9
paddle/fluid/memory/allocation/allocator_facade.cc
paddle/fluid/memory/allocation/allocator_facade.cc
+2
-2
paddle/fluid/memory/allocation/allocator_facade.h
paddle/fluid/memory/allocation/allocator_facade.h
+7
-0
paddle/fluid/memory/allocation/auto_increment_allocator.h
paddle/fluid/memory/allocation/auto_increment_allocator.h
+22
-2
paddle/fluid/memory/allocation/conditional_allocator.h
paddle/fluid/memory/allocation/conditional_allocator.h
+16
-0
paddle/fluid/memory/allocation/cpu_allocator.h
paddle/fluid/memory/allocation/cpu_allocator.h
+7
-1
paddle/fluid/memory/allocation/cuda_allocator.h
paddle/fluid/memory/allocation/cuda_allocator.h
+1
-0
paddle/fluid/memory/allocation/locked_allocator.h
paddle/fluid/memory/allocation/locked_allocator.h
+1
-0
paddle/fluid/memory/allocation/naive_managed_allocator.h
paddle/fluid/memory/allocation/naive_managed_allocator.h
+5
-0
paddle/fluid/memory/allocation/pinned_allocator.cc
paddle/fluid/memory/allocation/pinned_allocator.cc
+1
-1
paddle/fluid/memory/allocation/pinned_allocator.h
paddle/fluid/memory/allocation/pinned_allocator.h
+1
-0
paddle/fluid/memory/allocation/zero_size_allocator.h
paddle/fluid/memory/allocation/zero_size_allocator.h
+3
-0
paddle/fluid/operators/detection/generate_proposals_op.cu
paddle/fluid/operators/detection/generate_proposals_op.cu
+2
-1
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+2
-2
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+1
-1
未找到文件。
paddle/fluid/framework/tensor_util.cc
浏览文件 @
15076c32
...
...
@@ -15,6 +15,7 @@
#include <algorithm>
#include <limits>
#include <vector>
#include "../memory/allocation/allocator.h"
#include "paddle/fluid/framework/data_type.h"
namespace
paddle
{
...
...
@@ -111,8 +112,8 @@ void TensorCopySync(const Tensor& src, const platform::Place& dst_place,
dst
->
set_layout
(
src
.
layout
());
auto
src_place
=
src
.
place
();
auto
src_ptr
=
src
.
data
<
void
>
();
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
(),
memory
::
Allocator
::
kCommunication
);
auto
dst_ptr
=
dst
->
mutable_data
(
dst_place
,
src
.
type
(),
memory
::
Allocator
::
kCrossDevice
);
auto
size
=
src
.
numel
()
*
SizeOfType
(
src
.
type
());
if
(
platform
::
is_cpu_place
(
src_place
)
&&
platform
::
is_cpu_place
(
dst_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
...
...
paddle/fluid/memory/allocation/aligned_allocator.cc
浏览文件 @
15076c32
...
...
@@ -21,6 +21,11 @@ namespace allocation {
ThinAlignedAllocator
::
ThinAlignedAllocator
(
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
)
:
underlying_allocator_
(
std
::
move
(
underlyning_allocator
))
{}
std
::
shared_ptr
<
Allocation
>
ThinAlignedAllocator
::
AllocateShared
(
size_t
size
,
Allocator
::
Attr
attr
)
{
return
std
::
shared_ptr
<
Allocation
>
(
Allocate
(
size
,
attr
).
release
());
}
}
// namespace allocation
}
// namespace memory
}
// namespace paddle
paddle/fluid/memory/allocation/aligned_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,34 +20,66 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// The aligned allocation and allocator will wrap a managed allocator,
// and returns the aligned pointer.
//
// NOTE(yy): For speed reason, I just use a template parameter to get
// alignment, however, it can be an private member if necessary.
//
// NOTE(yy): kAlignment must be 2^N. a `static_assert` should be added.
template
<
size_t
kAlignment
>
class
AlignedAllocation
:
public
Allocation
{
public:
AlignedAllocation
(
std
::
unique_ptr
<
Allocation
>&&
underlying_allocation
,
size_t
size
)
:
Allocation
(
AlignedPtr
(
underlying_allocation
->
ptr
()),
size
,
:
Allocation
(
AlignedPtr
(
underlying_allocation
->
ptr
()),
size
+
kAlignment
-
Offset
(
underlying_allocation
->
ptr
()),
underlying_allocation
->
place
()),
underlying_allocation_
(
std
::
move
(
underlying_allocation
))
{}
private:
static
void
*
AlignedPtr
(
void
*
ptr
)
{
auto
ptr_addr
=
reinterpret_cast
<
uintptr_t
>
(
ptr
);
ptr_addr
=
(
ptr_addr
&
~
(
kAlignment
-
1
))
+
kAlignment
;
return
reinterpret_cast
<
void
*>
(
ptr_addr
);
return
reinterpret_cast
<
void
*>
(
reinterpret_cast
<
uintptr_t
>
(
ptr
)
+
Offset
(
ptr
));
}
// Offset to aligned pointer.
// if ptr is already aligned, returns 0.
static
size_t
Offset
(
void
*
ptr
)
{
auto
ptr_addr
=
reinterpret_cast
<
intptr_t
>
(
ptr
);
intptr_t
aligned_addr
=
(
ptr_addr
&
~
(
kAlignment
-
1
));
intptr_t
diff
=
aligned_addr
-
ptr_addr
;
if
(
diff
==
0
)
{
return
0
;
}
else
{
return
kAlignment
+
diff
;
}
}
std
::
unique_ptr
<
Allocation
>
underlying_allocation_
;
};
// Thin aligned allocator is trivial and used to generate a small size binary.
//
// NOTE(yy): This is a trick to make a template class. This class extract the
// common code into a `thin` class. So if there are multiple specification of
// the template class, the binary size will not extended too much.
//
// NOTE(yy): This could be an over design. If it harms readability of code, it
// could be removed later.
class
ThinAlignedAllocator
:
public
ManagedAllocator
{
public:
explicit
ThinAlignedAllocator
(
std
::
shared_ptr
<
ManagedAllocator
>
underlyning_allocator
);
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
protected:
std
::
shared_ptr
<
ManagedAllocator
>
underlying_allocator_
;
};
// An aligned allocator will allocate `size+kAlignment` allocation and adjust
// the pointer offset.
template
<
size_t
kAlignment
>
class
AlignedAllocator
:
public
ThinAlignedAllocator
{
public:
...
...
@@ -58,9 +90,6 @@ class AlignedAllocator : public ThinAlignedAllocator {
return
std
::
unique_ptr
<
Allocation
>
(
new
AlignedAllocation
<
kAlignment
>
(
std
::
move
(
raw_allocation
),
size
));
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
return
std
::
shared_ptr
<
Allocation
>
(
Allocate
(
size
,
attr
).
release
());
}
};
}
// namespace allocation
...
...
paddle/fluid/memory/allocation/allocation_and_eigen_test.cu
浏览文件 @
15076c32
...
...
@@ -18,6 +18,9 @@
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/for_range.h"
#include "unsupported/Eigen/CXX11/Tensor"
// NOTE(yy): this unittest is not important. It just used for debugging.
// It can be removed later.
struct
FillZero
{
public:
float
*
ptr_
;
...
...
paddle/fluid/memory/allocation/allocator.h
浏览文件 @
15076c32
...
...
@@ -12,6 +12,22 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include <utility>
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <memory>
#include <string>
...
...
@@ -21,15 +37,22 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// Exception when `Alloc`/`AllocShared` failed
class
BadAlloc
:
public
std
::
exception
{
public:
explicit
BadAlloc
(
const
std
::
string
&
msg
)
:
msg_
(
msg
)
{}
explicit
BadAlloc
(
std
::
string
msg
)
:
msg_
(
std
::
move
(
msg
)
)
{}
const
char
*
what
()
const
noexcept
override
;
private:
std
::
string
msg_
;
};
// Allocation is the object holding the actually pointer. Use
// `Allocation::ptr()` will returns the pointer that allocated.
//
// NOTE: this is the base class of Allocation. Each allocator can use its own
// allocation object.
// NOTE: the `Allocation::ptr()` could be nullptr, if the allocation size is 0
class
Allocation
{
public:
Allocation
(
void
*
ptr
,
size_t
size
,
platform
::
Place
place
)
...
...
@@ -38,8 +61,22 @@ class Allocation {
Allocation
(
const
Allocation
&
o
)
=
delete
;
Allocation
&
operator
=
(
const
Allocation
&
o
)
=
delete
;
// Returns the holding pointer.
// NOTE: For performance consideration, it is better not to make this method
// as a virtual method. If we want to implement a `defragmentation` later,
// we might need to make `ptr_` field as a protected field, and add a virtual
// method like `defragmentation` to change `ptr_`.
void
*
ptr
()
const
{
return
ptr_
;
}
// Returns the size of this memory buffer, i.e., ptr() + size() - 1 is the
// last valid element.
//
// NOTE: Some allocator might alloc more memory than request. The size
// could larger than its request. For example,
// the AlignedAllocator will always allocate memory as size + kAlignment.
// The raw pointer might not aligned, so an offset might be added to raw
// the pointer. The size of this allocation will be
// `size + kAlignemnt - offset`.
size_t
size
()
const
{
return
size_
;
}
const
platform
::
Place
&
place
()
const
{
return
place_
;
}
...
...
@@ -52,22 +89,51 @@ class Allocation {
platform
::
Place
place_
;
};
// Base interface class of memory Allocator.
// To allocate a memory, allocator needs two parameters:
// 1. size of bytes.
// 2. Attribute of memory.
// NOTE: the attribute of memory might be ignored if the allocator does not
// care it.
class
Allocator
{
public:
enum
Attr
{
kDefault
=
0
,
kTiny
=
1
,
kFixedHuge
=
2
,
kFluxHuge
=
3
,
kTmp
=
4
,
kCommunication
=
5
,
NumOfAttrs
=
6
kDefault
=
0
,
// Default attribute. Uses the fast or stablest allocation
// algorithm.
kFixedHuge
=
1
,
// The allocation may not be freed until the program
// ends. e.g., `Parameters` and `Momentum`.
kFluxHuge
=
2
,
// The allocation may create and freed frequently and the
// allocation is considerable huge. Like `activations`
// and gradients.
kScratchpad
=
3
,
// The `Scratchpad` memory is allocated and freed very soon,
// usually within an operator or aux memory.
// Like CUDNN workspace, AUX memory in batch norm, etc.
//
// https://en.wikipedia.org/wiki/Scratchpad_memory
kCrossDevice
=
4
,
// The memory used cross-device memory copy/communication.
// For example:
// 1. it can use an `pinned` memory for CPU-GPU
// communication.
// 2. it can use an `registered` memory for RDMA
// communication.
NumOfAttrs
=
5
// The number of all attributes. It is used internally.
};
virtual
~
Allocator
();
// Allocate an allocation. Note the return allocation might need to be freed
// manually if the Allocator is an `UnmanagedAllocator`.
virtual
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
=
kDefault
)
=
0
;
// True if the `Allocate` is thread safe.
virtual
bool
IsAllocThreadSafe
()
const
;
};
...
...
@@ -82,7 +148,8 @@ class UnmanagedAllocator : public Allocator {
}
};
// The allocation will be managed by smart pointers
// The allocation will be managed by smart pointers. i.e., users do not need
// to free allocation manually.
class
ManagedAllocator
:
public
Allocator
{
public:
virtual
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
...
...
paddle/fluid/memory/allocation/allocator_facade.cc
浏览文件 @
15076c32
...
...
@@ -46,7 +46,7 @@ class CPUManagedAllocator : public ManagedAllocator {
std
::
unique_ptr
<
Allocator
>
(
new
CPUPinnedAllocator
())))
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kC
ommunication
)
{
if
(
attr
==
kC
rossDevice
)
{
return
communication_allocator_
->
Allocate
(
size
,
attr
);
}
else
{
return
normal_allocator_
->
Allocate
(
size
,
attr
);
...
...
@@ -54,7 +54,7 @@ class CPUManagedAllocator : public ManagedAllocator {
}
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
{
if
(
attr
==
kC
ommunication
)
{
if
(
attr
==
kC
rossDevice
)
{
return
communication_allocator_
->
AllocateShared
(
size
,
attr
);
}
else
{
return
normal_allocator_
->
AllocateShared
(
size
,
attr
);
...
...
paddle/fluid/memory/allocation/allocator_facade.h
浏览文件 @
15076c32
...
...
@@ -24,6 +24,10 @@ namespace allocation {
// Allocator Facade is the interface exposed to other modules.
// All the configuration or dirty code under development should
// be hidden behind this facade.
//
// NOTE(yy): This class is a singleton class.
// NOTE(yy): To create a stable ABI and make compilation faster. Here we use
// a Pimpl trick;
class
AllocatorFacadePrivate
;
class
AllocatorFacade
{
public:
...
...
@@ -33,13 +37,16 @@ class AllocatorFacade {
static
AllocatorFacade
&
Instance
();
// Allocate a shared allocation.
std
::
shared_ptr
<
Allocation
>
AllocShared
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
// Allocate a unique allocation.
std
::
unique_ptr
<
Allocation
>
Alloc
(
const
platform
::
Place
&
place
,
size_t
size
,
Allocator
::
Attr
attr
=
Allocator
::
kDefault
);
// TODO(yy): Allocate a Copy-On-Write allocation?
private:
AllocatorFacade
();
AllocatorFacadePrivate
*
m_
;
...
...
paddle/fluid/memory/allocation/auto_increment_allocator.h
浏览文件 @
15076c32
...
...
@@ -24,12 +24,27 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// The AutoIncrementAllocator manages many underlying allocators. If none of
// them can allocate the request memory, a new allocator will be created and
// invoke its `allocate` method.
//
// NOTE(yy): The AutoIncrementAllocator will prefer to allocate memory from
// the latest sucessful allocator.
//
// NOTE(yy): We may need to release an underlying allocator if it allocate
// nothing. However, it is generally not useful, since it will make performance
// undetermined.
//
// NOTE(yy): This allocator is only locked when creating new underlying
// allocator. The allocation requests from many threads may be dispatched
// to the same underlying allocator. So the underlying allocator must be
// thread safe.
class
AutoIncrementAllocator
:
public
ManagedAllocator
{
public:
// Creator is the method to create ManagedAllocator
using
AllocatorCreator
=
std
::
function
<
std
::
shared_ptr
<
ManagedAllocator
>
()
>
;
template
<
typename
Creator
>
explicit
AutoIncrementAllocator
(
Creator
&&
creator
)
explicit
AutoIncrementAllocator
(
AllocatorCreator
&&
creator
)
:
creator_
(
std
::
move
(
creator
)),
prev_success_allocator_
{
0
}
{}
std
::
unique_ptr
<
Allocation
>
Allocate
(
size_t
size
,
Attr
attr
)
override
;
std
::
shared_ptr
<
Allocation
>
AllocateShared
(
size_t
size
,
Attr
attr
)
override
;
...
...
@@ -65,6 +80,11 @@ class AutoIncrementAllocator : public ManagedAllocator {
std
::
lock_guard
<
std
::
mutex
>
guard
(
mtx_
);
underlying_allocators_
.
emplace_back
(
creator_
());
prev_success_allocator_
=
underlying_allocators_
.
size
()
-
1
;
PADDLE_ENFORCE
(
underlying_allocators_
[
prev_success_allocator_
]
->
IsAllocThreadSafe
(),
"the underlying allocator must be thread safe. This is a program "
"bug."
);
return
callback
(
*
underlying_allocators_
[
prev_success_allocator_
]);
}
}
...
...
paddle/fluid/memory/allocation/conditional_allocator.h
浏览文件 @
15076c32
...
...
@@ -22,6 +22,22 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// A composite allocator who will dispatch the allocation request by registered
// condition.
//
// For example:
//
// auto* cond_allocator = new ConditionalAllocator();
// cond_allocator->AddAllocator([](size_t size, Attr attr){
// // if size > 10
// return size > 10;
// }, allocator_a).AddAllocator([](size_t size, Attr attr){
// // elif attr is kDefault
// return attr == kDefault;
// }, allocator_b).AddAllocator([](size_t size, Attr attr){
// // else
// return true;
// }, allocator_c);
class
ConditionalAllocator
:
public
ManagedAllocator
{
public:
ConditionalAllocator
()
=
default
;
...
...
paddle/fluid/memory/allocation/cpu_allocator.h
浏览文件 @
15076c32
...
...
@@ -18,7 +18,13 @@
namespace
paddle
{
namespace
memory
{
namespace
allocation
{
// CPU system allocator and allocation.
//
// NOTE(yy): Should we just use `malloc` here since there is an
// aligned_allocator.
//
// NOTE(yy): It is no need to use `BestFitAllocator` in CPU. We can import
// an open-sourced allocator into Paddle.
class
CPUAllocation
:
public
Allocation
{
public:
CPUAllocation
(
void
*
ptr
,
size_t
size
)
...
...
paddle/fluid/memory/allocation/cuda_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,6 +20,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// CUDA System allocator and allocation.
// Just a flag type.
class
CUDAAllocation
:
public
Allocation
{
public:
...
...
paddle/fluid/memory/allocation/locked_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,6 +20,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// A allocator to make underlying allocator thread safe.
class
LockedAllocator
:
public
UnmanagedAllocator
{
public:
explicit
LockedAllocator
(
std
::
unique_ptr
<
Allocator
>&&
underlying_allocator
);
...
...
paddle/fluid/memory/allocation/naive_managed_allocator.h
浏览文件 @
15076c32
...
...
@@ -20,6 +20,11 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// An allocator to wrap an UnmanagedAllocator and make the allocation managed
// by C++ smart ptr.
//
// NOTE: if the NaiveManagedAllocator is destroyed before
// NaiveManagedAllocations, the allocation will never be released.
class
NaiveManagedAllocator
;
class
NaiveManagedAllocation
:
public
Allocation
{
public:
...
...
paddle/fluid/memory/allocation/pinned_allocator.cc
浏览文件 @
15076c32
...
...
@@ -23,7 +23,7 @@ namespace allocation {
std
::
unique_ptr
<
Allocation
>
CPUPinnedAllocator
::
Allocate
(
size_t
size
,
Allocator
::
Attr
attr
)
{
PADDLE_ENFORCE_EQ
(
attr
,
kC
ommunication
,
attr
,
kC
rossDevice
,
"CPUPinnedAllocator should be used for Cross-Device Communication"
);
void
*
ptr
;
...
...
paddle/fluid/memory/allocation/pinned_allocator.h
浏览文件 @
15076c32
...
...
@@ -19,6 +19,7 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// Allocator uses `cudaMallocHost`
class
CPUPinnedAllocation
:
public
Allocation
{
public:
CPUPinnedAllocation
(
void
*
ptr
,
size_t
size
)
...
...
paddle/fluid/memory/allocation/zero_size_allocator.h
浏览文件 @
15076c32
...
...
@@ -22,6 +22,9 @@ namespace paddle {
namespace
memory
{
namespace
allocation
{
// The allocator handles the request's size is zero. Allocator will always
// return an allocation even the request size is zero. However, the
// allocation.ptr() is nullptr
class
ZeroSizeAllocation
:
public
Allocation
{
public:
explicit
ZeroSizeAllocation
(
const
platform
::
Place
&
p
)
...
...
paddle/fluid/operators/detection/generate_proposals_op.cu
浏览文件 @
15076c32
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <paddle/fluid/memory/allocation/allocator.h>
#include <stdio.h>
#include <string>
#include <vector>
...
...
@@ -70,7 +71,7 @@ static void SortDescending(const platform::CUDADeviceContext &ctx,
// Allocate temporary storage
auto
place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
auto
d_temp_storage
=
memory
::
Alloc
(
place
,
temp_storage_bytes
,
memory
::
Allocator
::
k
Tmp
);
memory
::
Alloc
(
place
,
temp_storage_bytes
,
memory
::
Allocator
::
k
Scratchpad
);
// Run sorting operation
cub
::
DeviceRadixSort
::
SortPairsDescending
<
T
,
int
>
(
...
...
paddle/fluid/platform/device_context.cc
浏览文件 @
15076c32
...
...
@@ -112,8 +112,8 @@ class EigenCudaStreamDevice : public Eigen::StreamInterface {
}
void
*
allocate
(
size_t
num_bytes
)
const
override
{
auto
buf
=
paddle
::
memory
::
Alloc
(
place_
,
num_bytes
,
memory
::
Allocator
::
kTiny
);
auto
buf
=
paddle
::
memory
::
Alloc
(
place_
,
num_bytes
,
memory
::
Allocator
::
kScratchpad
);
void
*
retv
=
buf
->
ptr
();
allocations_
[
buf
->
ptr
()]
=
std
::
move
(
buf
);
return
retv
;
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
15076c32
...
...
@@ -64,7 +64,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
auto
*
src_ptr
=
static_cast
<
const
void
*>
(
tensor
.
data
<
CUR_TYPE
>
());
auto
*
dst_ptr
=
static_cast
<
void
*>
(
dst_tensor
.
mutable_data
<
CUR_TYPE
>
(
tensor
.
dims
(),
platform
::
CPUPlace
(),
memory
::
Allocator
::
kC
ommunication
));
memory
::
Allocator
::
kC
rossDevice
));
paddle
::
platform
::
GpuMemcpySync
(
dst_ptr
,
src_ptr
,
sizeof
(
CUR_TYPE
)
*
tensor
.
numel
(),
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录