Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
1cfc8b6e
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1cfc8b6e
编写于
7月 26, 2017
作者:
Y
Yi Wang
提交者:
GitHub
7月 26, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #3056 from gangliao/cpu_mem
ENH: Refine Tensor and Add CopyFrom
上级
55115ac6
1c68f119
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
324 addition
and
128 deletion
+324
-128
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+1
-1
paddle/framework/detail/tensor-inl.h
paddle/framework/detail/tensor-inl.h
+160
-0
paddle/framework/tensor.cc
paddle/framework/tensor.cc
+1
-1
paddle/framework/tensor.h
paddle/framework/tensor.h
+88
-102
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+70
-20
paddle/memory/memory.h
paddle/memory/memory.h
+3
-3
paddle/platform/device_context.h
paddle/platform/device_context.h
+1
-1
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
1cfc8b6e
...
...
@@ -3,7 +3,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test
(
ddim_test SRCS ddim_test.cc DEPS ddim
)
nv_test
(
dim_test SRCS dim_test.cu DEPS ddim
)
cc_library
(
tensor SRCS tensor.cc DEPS ddim place paddle_memory
)
cc_library
(
tensor SRCS tensor.cc DEPS ddim place paddle_memory
device_context
)
cc_test
(
tensor_test SRCS tensor_test.cc DEPS tensor
)
cc_test
(
eigen_test SRCS eigen_test.cc DEPS tensor
)
...
...
paddle/framework/detail/tensor-inl.h
0 → 100644
浏览文件 @
1cfc8b6e
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/memory/memcpy.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
inline
void
Tensor
::
check_memory_size
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Tenosr holds no memory. Call Tensor::mutable_data first."
);
PADDLE_ENFORCE
(
holder_
->
size
()
>=
product
(
dims_
)
*
sizeof
(
T
)
+
offset_
,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory."
);
}
template
<
typename
T
>
inline
const
T
*
Tensor
::
data
()
const
{
check_memory_size
<
T
>
();
return
reinterpret_cast
<
const
T
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
template
<
typename
T
>
inline
T
*
Tensor
::
data
()
{
check_memory_size
<
T
>
();
return
reinterpret_cast
<
T
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
template
<
typename
T
>
inline
T
*
Tensor
::
mutable_data
(
DDim
dims
,
platform
::
Place
place
)
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
Resize
(
dims
);
return
mutable_data
<
T
>
(
place
);
}
template
<
typename
T
>
inline
T
*
Tensor
::
mutable_data
(
platform
::
Place
place
)
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
PADDLE_ENFORCE
(
product
(
dims_
)
>
0
,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first."
);
/* some versions of boost::variant don't have operator!= */
size_t
size
=
product
(
dims_
)
*
sizeof
(
T
);
if
(
holder_
==
nullptr
||
!
(
holder_
->
place
()
==
place
)
||
holder_
->
size
()
<
size
+
offset_
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
size
));
}
#ifndef PADDLE_ONLY_CPU
else
if
(
platform
::
is_gpu_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
GPUPlace
>
(
boost
::
get
<
platform
::
GPUPlace
>
(
place
),
size
));
}
#endif
offset_
=
0
;
}
return
reinterpret_cast
<
T
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
template
<
typename
T
>
inline
void
Tensor
::
ShareDataWith
(
const
Tensor
&
src
)
{
src
.
check_memory_size
<
T
>
();
*
this
=
src
;
}
template
<
typename
T
>
inline
void
Tensor
::
CopyFrom
(
const
Tensor
&
src
,
const
platform
::
CPUDeviceContext
&
ctx
)
{
src
.
check_memory_size
<
T
>
();
Resize
(
src
.
dims
());
auto
src_place
=
src
.
holder_
->
place
();
auto
src_ptr
=
static_cast
<
const
void
*>
(
src
.
data
<
T
>
());
auto
dst_place
=
ctx
.
GetPlace
();
auto
dst_ptr
=
static_cast
<
void
*>
(
mutable_data
<
T
>
(
dst_place
));
auto
size
=
product
(
src
.
dims_
)
*
sizeof
(
T
);
if
(
platform
::
is_cpu_place
(
src_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
);
}
#ifndef PADDLE_ONLY_CPU
else
if
(
platform
::
is_gpu_place
(
src_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
CPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
GPUPlace
>
(
src_place
),
src_ptr
,
size
,
0
);
}
#endif
}
#ifndef PADDLE_ONLY_CPU
template
<
typename
T
>
inline
void
Tensor
::
CopyFrom
(
const
Tensor
&
src
,
const
platform
::
CUDADeviceContext
&
ctx
)
{
src
.
check_memory_size
<
T
>
();
Resize
(
src
.
dims
());
auto
src_place
=
src
.
holder_
->
place
();
auto
src_ptr
=
static_cast
<
const
void
*>
(
src
.
data
<
T
>
());
auto
dst_place
=
ctx
.
GetPlace
();
auto
dst_ptr
=
static_cast
<
void
*>
(
mutable_data
<
T
>
(
dst_place
));
auto
size
=
product
(
src
.
dims_
)
*
sizeof
(
T
);
if
(
platform
::
is_cpu_place
(
src_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
GPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
CPUPlace
>
(
src_place
),
src_ptr
,
size
,
ctx
.
stream
());
}
else
if
(
platform
::
is_gpu_place
(
src_place
))
{
memory
::
Copy
(
boost
::
get
<
platform
::
GPUPlace
>
(
dst_place
),
dst_ptr
,
boost
::
get
<
platform
::
GPUPlace
>
(
src_place
),
src_ptr
,
size
,
ctx
.
stream
());
}
}
#endif
template
<
typename
T
>
inline
Tensor
Tensor
::
Slice
(
const
int
&
begin_idx
,
const
int
&
end_idx
)
const
{
check_memory_size
<
T
>
();
PADDLE_ENFORCE
(
begin_idx
>=
0
,
"Slice begin index is less than zero."
);
PADDLE_ENFORCE
(
end_idx
<=
dims_
[
0
],
"Slice end index is out of bound."
);
PADDLE_ENFORCE
(
begin_idx
<
end_idx
,
"Begin index must be less than end index."
);
PADDLE_ENFORCE
(
dims_
[
0
]
!=
1
,
"Can not slice a tensor with dims_[0] = 1."
);
int
base
=
product
(
dims_
)
/
dims_
[
0
];
Tensor
dst
;
dst
.
holder_
=
holder_
;
DDim
dst_dims
=
dims_
;
dst_dims
[
0
]
=
end_idx
-
begin_idx
;
dst
.
Resize
(
dst_dims
);
dst
.
offset_
=
offset_
+
begin_idx
*
base
*
sizeof
(
T
);
return
dst
;
}
inline
void
Tensor
::
Resize
(
const
DDim
&
dims
)
{
dims_
=
dims
;
}
inline
const
DDim
&
Tensor
::
dims
()
const
{
return
dims_
;
}
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor.cc
浏览文件 @
1cfc8b6e
...
...
@@ -12,7 +12,7 @@
See the License for the specific language governing permissions and
limitations under the License. */
#include
<paddle/framework/tensor.h>
#include
"paddle/framework/tensor.h"
namespace
paddle
{
namespace
framework
{}
...
...
paddle/framework/tensor.h
浏览文件 @
1cfc8b6e
...
...
@@ -20,6 +20,7 @@ limitations under the License. */
#include <typeindex>
#include "paddle/framework/ddim.h"
#include "paddle/memory/memory.h"
#include "paddle/platform/device_context.h"
#include "paddle/platform/enforce.h"
#include "paddle/platform/place.h"
#include "unsupported/Eigen/CXX11/Tensor"
...
...
@@ -31,9 +32,11 @@ template <bool less, size_t i, typename... args>
struct
CastToPyBufferImpl
;
}
// namespace details
}
// namespace pybind
namespace
framework
{
class
Tensor
{
public:
template
<
bool
less
,
size_t
i
,
typename
...
args
>
friend
struct
paddle
::
pybind
::
details
::
CastToPyBufferImpl
;
...
...
@@ -46,106 +49,84 @@ class Tensor {
public:
Tensor
()
:
offset_
(
0
)
{}
/*! Return a pointer to mutable memory block. */
template
<
typename
T
>
const
T
*
data
()
const
{
EnforceSufficientMemory
<
T
>
();
return
reinterpret_cast
<
const
T
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
inline
T
*
data
();
/*! Return a pointer to constant memory block. */
template
<
typename
T
>
T
*
data
()
{
EnforceSufficientMemory
<
T
>
();
return
reinterpret_cast
<
T
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
template
<
typename
T
,
// must be POD types
typename
std
::
enable_if
<
std
::
is_pod
<
T
>
::
value
>::
type
*
=
nullptr
>
T
*
mutable_data
(
DDim
dims
,
platform
::
Place
place
)
{
Resize
(
dims
);
return
mutable_data
<
T
>
(
place
);
}
template
<
typename
T
,
// must be POD types
typename
std
::
enable_if
<
std
::
is_pod
<
T
>
::
value
>::
type
*
=
nullptr
>
T
*
mutable_data
(
platform
::
Place
place
)
{
PADDLE_ENFORCE
(
product
(
dims_
)
>
0
,
"Tensor's numel must be larger than zero to call "
"Tensor::mutable_data. Call Tensor::set_dim first."
);
if
(
holder_
==
nullptr
||
!
(
holder_
->
place
()
==
place
)
/* some versions of boost::variant don't have operator!= */
||
holder_
->
size
()
<
product
(
dims_
)
*
sizeof
(
T
)
+
offset_
)
{
if
(
platform
::
is_cpu_place
(
place
))
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
CPUPlace
>
(
boost
::
get
<
platform
::
CPUPlace
>
(
place
),
product
(
dims_
)
*
sizeof
(
T
)));
}
else
if
(
platform
::
is_gpu_place
(
place
))
{
#ifdef PADDLE_ONLY_CPU
PADDLE_THROW
(
"'GPUPlace' is not supported in CPU only device."
);
#else
holder_
.
reset
(
new
PlaceholderImpl
<
T
,
platform
::
GPUPlace
>
(
boost
::
get
<
platform
::
GPUPlace
>
(
place
),
product
(
dims_
)
*
sizeof
(
T
)));
#endif
}
else
{
PADDLE_THROW
(
"Unknown 'place'."
);
}
offset_
=
0
;
}
return
reinterpret_cast
<
T
*>
(
reinterpret_cast
<
uintptr_t
>
(
holder_
->
ptr
())
+
offset_
);
}
inline
const
T
*
data
()
const
;
/**
* @brief Return a pointer to mutable memory block.
* @note If not exist, then allocation.
*/
template
<
typename
T
>
inline
T
*
mutable_data
(
platform
::
Place
place
);
/**
* @brief Return a pointer to mutable memory block.
*
* @param[in] dims The dimensions of the memory block.
* @param[in] place The place of the memory block.
*
* @note If not exist, then allocation.
*/
template
<
typename
T
>
inline
T
*
mutable_data
(
DDim
dims
,
platform
::
Place
place
);
/*! Return the dimensions of the memory block. */
inline
const
DDim
&
dims
()
const
;
/*! Resize the dimensions of the memory block. */
inline
void
Resize
(
const
DDim
&
dims
);
/*! The internal of two tensors share the same memory block. */
template
<
typename
T
>
inline
void
ShareDataWith
(
const
Tensor
&
src
);
/**
* @brief Copy the content of external tensor to a new place.
*
* @param[in] src The external tensor.
* @param[in] ctx The device context contains place where to store.
*
* @note CopyFrom supports CPU <-> GPU, GPU <-> GPU.
*/
template
<
typename
T
>
void
ShareDataWith
(
const
Tensor
&
src
)
{
src
.
EnforceSufficientMemory
<
T
>
();
*
this
=
src
;
}
inline
void
CopyFrom
(
const
Tensor
&
src
,
const
platform
::
CPUDeviceContext
&
ctx
);
#ifndef PADDLE_ONLY_CPU
template
<
typename
T
>
void
CopyFrom
(
const
Tensor
&
src
,
platform
::
Place
dst_place
)
{
PADDLE_ENFORCE
(
platform
::
is_cpu_place
(
src
.
holder_
->
place
())
&&
platform
::
is_cpu_place
(
dst_place
),
"Tensor::CopyFrom only support CPU now."
);
src
.
EnforceSufficientMemory
<
T
>
();
size_t
size
=
product
(
src
.
dims_
)
*
sizeof
(
T
);
Resize
(
src
.
dims
());
const
void
*
src_ptr
=
static_cast
<
const
void
*>
(
src
.
data
<
T
>
());
void
*
dst_ptr
=
static_cast
<
void
*>
(
mutable_data
<
T
>
(
dst_place
));
memcpy
(
dst_ptr
,
src_ptr
,
size
);
}
inline
void
CopyFrom
(
const
Tensor
&
src
,
const
platform
::
CUDADeviceContext
&
ctx
);
#endif
/**
* @brief Return the slice of the tensor.
*
* @param[in] begin_idx The begin index of the slice.
* @param[in] end_idx The end index of the slice.
*/
template
<
typename
T
>
Tensor
Slice
(
const
int
&
begin_idx
,
const
int
&
end_idx
)
const
{
EnforceSufficientMemory
<
T
>
();
PADDLE_ENFORCE
(
begin_idx
>=
0
,
"Slice begin index is less than zero."
);
PADDLE_ENFORCE
(
end_idx
<=
dims_
[
0
],
"Slice end index is out of bound."
);
PADDLE_ENFORCE
(
begin_idx
<
end_idx
,
"Begin index must be less than end index."
);
PADDLE_ENFORCE
(
dims_
[
0
]
!=
1
,
"Can not slice a tensor with dims_[0] = 1."
);
int
base
=
product
(
dims_
)
/
dims_
[
0
];
Tensor
dst
;
dst
.
holder_
=
holder_
;
DDim
dst_dims
=
dims_
;
dst_dims
[
0
]
=
end_idx
-
begin_idx
;
dst
.
Resize
(
dst_dims
);
dst
.
offset_
=
offset_
+
begin_idx
*
base
*
sizeof
(
T
);
return
dst
;
}
void
Resize
(
const
DDim
&
dims
)
{
dims_
=
dims
;
}
const
DDim
&
dims
()
const
{
return
dims_
;
}
inline
Tensor
Slice
(
const
int
&
begin_idx
,
const
int
&
end_idx
)
const
;
private:
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
inline
void
check_memory_size
()
const
;
private:
/**
* @note Placeholder hides type T, so it doesn't appear as a template
* parameter of Variable.
*/
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
void
*
ptr
()
const
=
0
;
virtual
platform
::
Place
place
()
const
=
0
;
virtual
size_t
size
()
const
=
0
;
virtual
std
::
type_index
type
()
const
=
0
;
virtual
platform
::
Place
place
()
const
=
0
;
};
template
<
typename
T
,
typename
PlaceType
>
...
...
@@ -156,33 +137,38 @@ class Tensor {
place_
(
place
),
size_
(
size
)
{}
virtual
void
*
ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
());
}
virtual
size_t
size
()
const
{
return
size_
;
}
virtual
paddle
::
platform
::
Place
place
()
const
{
return
place_
;
}
virtual
platform
::
Place
place
()
const
{
return
place_
;
}
virtual
void
*
ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
());
}
virtual
std
::
type_index
type
()
const
{
return
std
::
type_index
(
typeid
(
T
));
}
/*! the pointer of memory block. */
std
::
unique_ptr
<
T
,
memory
::
PODDeleter
<
T
,
PlaceType
>>
ptr_
;
platform
::
Place
place_
;
// record the place of ptr_.
size_t
size_
;
// size of the memory block.
/*! the place of memory block. */
platform
::
Place
place_
;
/*! the size of memory block. */
size_t
size_
;
};
template
<
typename
T
>
inline
void
EnforceSufficientMemory
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Tenosr holds no memory. Call Tensor::mutable_data first."
);
PADDLE_ENFORCE
(
holder_
->
size
()
>=
product
(
dims_
)
*
sizeof
(
T
)
+
offset_
,
"Tensor's dims_ is out of bound. Call Tensor::mutable_data "
"first to re-allocate memory."
);
}
std
::
shared_ptr
<
Placeholder
>
holder_
;
// holds the memory block if allocated.
/*! holds the memory block if allocated. */
std
::
shared_ptr
<
Placeholder
>
holder_
;
/*! points to dimensions of memory block. */
DDim
dims_
;
// A PlaceHolder may be shared by more than one tensor. Some of them may be
// slices of the others. So the offset_ is introduced here to indicate the
// byte offset between PlaceHolder::ptr_ and where tensor's data really
// begins.
/**
* @brief A PlaceHolder may be shared by more than one tensor.
*
* @note Some of them may be slices of the others. So the offset_
* is introduced here to indicate the byte offset between
* PlaceHolder::ptr_ and where the tensor data really begins.
*/
size_t
offset_
;
};
}
// namespace framework
}
// namespace paddle
#include "paddle/framework/detail/tensor-inl.h"
paddle/framework/tensor_test.cc
浏览文件 @
1cfc8b6e
...
...
@@ -72,7 +72,8 @@ TEST(Tensor, MutableData) {
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
}),
CPUPlace
());
EXPECT_EQ
(
p1
,
p2
);
}
#ifdef __CUDACC__
#ifndef PADDLE_ONLY_CPU
{
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
...
...
@@ -123,7 +124,7 @@ TEST(Tensor, ShareDataWith) {
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
#if
def __CUDACC__
#if
ndef PADDLE_ONLY_CPU
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
...
...
@@ -160,7 +161,7 @@ TEST(Tensor, Slice) {
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
}
#if
def __CUDACC__
#if
ndef PADDLE_ONLY_CPU
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
GPUPlace
());
...
...
@@ -188,25 +189,74 @@ TEST(Tensor, Slice) {
TEST
(
Tensor
,
CopyFrom
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
int
*
src_ptr
=
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
3
,
3
}),
CPUPlace
());
int
arr
[
9
]
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
memcpy
(
src_ptr
,
arr
,
9
*
sizeof
(
int
));
Tensor
src_tensor
;
int
*
src_ptr
=
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
3
,
3
}),
CPUPlace
());
int
arr
[
9
]
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
memcpy
(
src_ptr
,
arr
,
9
*
sizeof
(
int
));
Tensor
dst_tensor
;
dst_tensor
.
CopyFrom
<
int
>
(
src_tensor
,
CPUPlace
());
const
int
*
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
src_ptr
,
dst_ptr
);
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
auto
*
cpu_ctx
=
new
paddle
::
platform
::
CPUDeviceContext
();
dst_tensor
.
CopyFrom
<
int
>
(
src_tensor
,
*
cpu_ctx
);
const
int
*
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
src_ptr
,
dst_ptr
);
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
}
Tensor
slice_tensor
=
src_tensor
.
Slice
<
int
>
(
1
,
2
);
dst_tensor
.
CopyFrom
<
int
>
(
slice_tensor
,
*
cpu_ctx
);
const
int
*
slice_ptr
=
slice_tensor
.
data
<
int
>
();
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
dst_ptr
,
slice_ptr
);
for
(
size_t
i
=
0
;
i
<
3
;
++
i
)
{
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
}
}
#ifndef PADDLE_ONLY_CPU
{
Tensor
src_tensor
;
Tensor
gpu_tensor
;
Tensor
dst_tensor
;
int
*
src_ptr
=
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
3
,
3
}),
CPUPlace
());
int
arr
[
9
]
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
memcpy
(
src_ptr
,
arr
,
9
*
sizeof
(
int
));
// CPU Tensor to GPU Tensor
auto
gpu_ctx
=
new
paddle
::
platform
::
CUDADeviceContext
(
0
);
gpu_tensor
.
CopyFrom
<
int
>
(
src_tensor
,
*
gpu_ctx
);
// GPU Tensor to CPU Tensor
auto
cpu_ctx
=
new
paddle
::
platform
::
CPUDeviceContext
();
dst_tensor
.
CopyFrom
<
int
>
(
gpu_tensor
,
*
cpu_ctx
);
// Compare Tensors
const
int
*
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
src_ptr
,
dst_ptr
);
for
(
size_t
i
=
0
;
i
<
9
;
++
i
)
{
EXPECT_EQ
(
src_ptr
[
i
],
dst_ptr
[
i
]);
}
Tensor
slice_tensor
=
src_tensor
.
Slice
<
int
>
(
1
,
2
);
// CPU Slice Tensor to GPU Tensor
gpu_tensor
.
CopyFrom
<
int
>
(
slice_tensor
,
*
gpu_ctx
);
Tensor
slice_tensor
=
src_tensor
.
Slice
<
int
>
(
1
,
2
);
dst_tensor
.
CopyFrom
<
int
>
(
slice_tensor
,
CPUPlace
());
const
int
*
slice_ptr
=
slice_tensor
.
data
<
int
>
();
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
dst_ptr
,
slice_ptr
);
for
(
size_t
i
=
0
;
i
<
3
;
++
i
)
{
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
// GPU Tensor to CPU Tensor
dst_tensor
.
CopyFrom
<
int
>
(
gpu_tensor
,
*
cpu_ctx
);
// Compare Slice Tensors
const
int
*
slice_ptr
=
slice_tensor
.
data
<
int
>
();
dst_ptr
=
dst_tensor
.
data
<
int
>
();
ASSERT_NE
(
dst_ptr
,
slice_ptr
);
for
(
size_t
i
=
0
;
i
<
3
;
++
i
)
{
EXPECT_EQ
(
dst_ptr
[
i
],
slice_ptr
[
i
]);
}
}
#endif
}
paddle/memory/memory.h
浏览文件 @
1cfc8b6e
...
...
@@ -29,10 +29,10 @@ void Free(Place, void*);
template
<
typename
Place
>
size_t
Used
(
Place
);
template
<
typename
T
,
/* must be POD types */
typename
Place
/* platform::GPUPlace or platform::CPUPlace */
,
typename
std
::
enable_if
<
std
::
is_pod
<
T
>
::
value
>::
type
*
=
nullptr
>
template
<
typename
T
,
typename
Place
>
class
PODDeleter
{
static_assert
(
std
::
is_pod
<
T
>::
value
,
"T must be POD"
);
public:
PODDeleter
(
Place
place
)
:
place_
(
place
)
{}
void
operator
()(
T
*
ptr
)
{
Free
(
place_
,
static_cast
<
void
*>
(
ptr
));
}
...
...
paddle/platform/device_context.h
浏览文件 @
1cfc8b6e
...
...
@@ -87,7 +87,7 @@ class CUDADeviceContext : public DeviceContext {
"cudaStreamSynchronize failed"
);
}
cudaStream_t
stream
()
{
return
stream_
;
}
cudaStream_t
stream
()
const
{
return
stream_
;
}
Eigen
::
GpuDevice
*
eigen_device
()
const
{
return
eigen_device_
.
get
();
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录