Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
a8109cf0
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2299
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a8109cf0
编写于
12月 28, 2017
作者:
S
sweetsky0901
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into detection_output
上级
95aec835
18311767
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
327 addition
and
155 deletion
+327
-155
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+1
-1
paddle/framework/lod_tensor.cc
paddle/framework/lod_tensor.cc
+19
-93
paddle/framework/lod_tensor_test.cc
paddle/framework/lod_tensor_test.cc
+14
-0
paddle/framework/selected_rows.cc
paddle/framework/selected_rows.cc
+54
-1
paddle/framework/selected_rows.h
paddle/framework/selected_rows.h
+9
-0
paddle/framework/selected_rows_test.cc
paddle/framework/selected_rows_test.cc
+14
-0
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+60
-54
paddle/framework/tensor_util.h
paddle/framework/tensor_util.h
+100
-0
paddle/framework/tensor_util_test.cc
paddle/framework/tensor_util_test.cc
+50
-0
paddle/operators/load_op.cc
paddle/operators/load_op.cc
+1
-1
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+4
-4
paddle/platform/for_range.h
paddle/platform/for_range.h
+1
-1
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
a8109cf0
...
...
@@ -5,7 +5,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test
(
ddim_test SRCS ddim_test.cc DEPS ddim
)
nv_test
(
dim_test SRCS dim_test.cu DEPS ddim
)
cc_library
(
tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context
)
cc_library
(
tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context
framework_proto
)
cc_test
(
tensor_test SRCS tensor_test.cc DEPS tensor
)
cc_test
(
tensor_util_test SRCS tensor_util_test.cc DEPS tensor
)
...
...
paddle/framework/lod_tensor.cc
浏览文件 @
a8109cf0
...
...
@@ -189,62 +189,16 @@ void AppendLoD(LoD *lod, const LoD &lod_length) {
void
SerializeToStream
(
std
::
ostream
&
os
,
const
LoDTensor
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
// TODO(typhoonzero): serialize to ostream
{
// the 1st field, uint32_t version
{
// the 1st field, uint32_t version for LoDTensor
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto
::
TensorDesc
desc
;
desc
.
set_data_type
(
framework
::
ToDataType
(
tensor
.
type
()));
auto
dims
=
framework
::
vectorize
(
tensor
.
dims
());
auto
*
pb_dims
=
desc
.
mutable_dims
();
pb_dims
->
Resize
(
static_cast
<
int
>
(
dims
.
size
()),
0
);
std
::
copy
(
dims
.
begin
(),
dims
.
end
(),
pb_dims
->
begin
());
int32_t
size
=
desc
.
ByteSize
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
auto
out
=
desc
.
SerializeAsString
();
os
.
write
(
out
.
data
(),
size
);
}
{
// the 3rd field, tensor data
uint64_t
size
=
tensor
.
memory_size
();
auto
*
data_ptr
=
tensor
.
data
<
void
>
();
PADDLE_ENFORCE
(
size
<
std
::
numeric_limits
<
std
::
streamsize
>::
max
(),
"Index overflow when writing tensor"
);
if
(
platform
::
is_gpu_place
(
tensor
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
constexpr
size_t
kBufSize
=
1024
*
1024
*
64
;
// 64MB
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
kBufSize
]);
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
platform
::
CPUPlace
cpu
;
uintptr_t
data
=
reinterpret_cast
<
uintptr_t
>
(
data_ptr
);
while
(
size
!=
0
)
{
size_t
size_to_write
=
std
::
min
(
kBufSize
,
static_cast
<
size_t
>
(
size
));
memory
::
Copy
(
cpu
,
buf
.
get
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
reinterpret_cast
<
const
void
*>
(
data
),
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
Wait
();
os
.
write
(
buf
.
get
(),
size_to_write
);
data
+=
size_to_write
;
size
-=
size_to_write
;
}
#else
PADDLE_THROW
(
"Unexpected branch"
);
#endif
}
else
{
os
.
write
(
static_cast
<
const
char
*>
(
data_ptr
),
static_cast
<
std
::
streamsize
>
(
size
));
}
}
{
// the 4th field, lod information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
{
// the 2st field, LoD information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto
lod
=
tensor
.
lod
();
uint64_t
size
=
lod
.
size
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
...
...
@@ -256,49 +210,19 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
static_cast
<
std
::
streamsize
>
(
size
));
}
}
// the 3st field, Tensor
SerializeToStream
(
os
,
static_cast
<
Tensor
>
(
tensor
),
dev_ctx
);
}
void
DeserializeFromStream
(
std
::
istream
&
is
,
LoDTensor
*
tensor
)
{
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
proto
::
TensorDesc
desc
;
{
// int32_t size
// proto buffer
int32_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
is
.
read
(
reinterpret_cast
<
char
*>
(
buf
.
get
()),
size
);
PADDLE_ENFORCE
(
desc
.
ParseFromArray
(
buf
.
get
(),
size
),
"Cannot parse tensor desc"
);
}
{
// read tensor
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
static_cast
<
size_t
>
(
desc
.
dims
().
size
()));
std
::
copy
(
desc
.
dims
().
begin
(),
desc
.
dims
().
end
(),
std
::
back_inserter
(
dims
));
tensor
->
Resize
(
framework
::
make_ddim
(
dims
));
void
*
buf
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
switch
(
desc
.
data_type
())
{
case
proto
::
FP32
:
buf
=
tensor
->
mutable_data
<
float
>
(
cpu
);
break
;
case
proto
::
FP64
:
buf
=
tensor
->
mutable_data
<
double
>
(
cpu
);
break
;
case
proto
::
INT32
:
buf
=
tensor
->
mutable_data
<
int
>
(
cpu
);
break
;
case
proto
::
INT64
:
buf
=
tensor
->
mutable_data
<
int64_t
>
(
cpu
);
break
;
default:
PADDLE_THROW
(
"DataType %d not supported"
,
desc
.
data_type
());
}
is
.
read
(
static_cast
<
char
*>
(
buf
),
tensor
->
memory_size
());
}
{
// read lod
{
// the 1st field, unit32_t version for SelectedRows
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
}
{
// the 2st field, LoD information
uint64_t
lod_level
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
lod_level
),
sizeof
(
lod_level
));
auto
&
lod
=
*
tensor
->
mutable_lod
();
...
...
@@ -312,6 +236,8 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
lod
[
i
]
=
tmp
;
}
}
// the 3st filed, Tensor
DeserializeFromStream
(
is
,
static_cast
<
Tensor
*>
(
tensor
));
}
}
// namespace framework
...
...
paddle/framework/lod_tensor_test.cc
浏览文件 @
a8109cf0
...
...
@@ -126,6 +126,20 @@ TEST_F(LoDTensorTester, ShrinkInLevel) {
EXPECT_NE
(
t1
.
data
<
float
>
(),
lod_tensor_
.
data
<
float
>
());
}
TEST_F
(
LoDTensorTester
,
SerializeAndDeserialize
)
{
LoDTensor
dst_tensor
;
platform
::
CPUDeviceContext
cpu_ctx
((
platform
::
CPUPlace
()));
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
lod_tensor_
,
cpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
float
*
dst_ptr
=
dst_tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
kLodTensorSize
;
++
i
)
{
EXPECT_EQ
(
dst_ptr
[
i
],
i
);
}
EXPECT_EQ
(
dst_tensor
.
lod
(),
lod_tensor_
.
lod
());
}
TEST
(
LodExpand
,
test
)
{
LoD
lod
{{
0
,
2
}};
LoDTensor
tensor
;
...
...
paddle/framework/selected_rows.cc
浏览文件 @
a8109cf0
...
...
@@ -12,5 +12,58 @@ limitations under the License. */
#include "paddle/framework/selected_rows.h"
namespace
paddle
{
namespace
framework
{}
// namespace framework
namespace
framework
{
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
{
// the 1st field, uint32_t version
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2st field, rows information
auto
&
rows
=
selected_rows
.
rows
();
uint64_t
size
=
rows
.
size
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
for
(
uint64_t
i
=
0
;
i
<
size
;
++
i
)
{
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
rows
[
i
]),
sizeof
(
rows
[
i
]));
}
}
{
// the 3st field, the height of SelectedRows
int64_t
height
=
selected_rows
.
height
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
height
),
sizeof
(
height
));
}
// the 4st field, Tensor data
SerializeToStream
(
os
,
selected_rows
.
value
(),
dev_ctx
);
}
void
DeserializeFromStream
(
std
::
istream
&
is
,
SelectedRows
*
selected_rows
)
{
auto
tensor
=
*
selected_rows
->
mutable_value
();
{
// the 1st field, unit32_t version for SelectedRows
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
}
{
// the 2st field, rows information
uint64_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
auto
&
rows
=
*
selected_rows
->
mutable_rows
();
rows
.
resize
(
size
);
for
(
uint64_t
i
=
0
;
i
<
size
;
++
i
)
{
is
.
read
(
reinterpret_cast
<
char
*>
(
&
rows
[
i
]),
sizeof
(
int64_t
));
}
}
{
// the 3st field, the height of the SelectedRows
int64_t
height
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
height
),
sizeof
(
int64_t
));
selected_rows
->
set_height
(
height
);
}
// the 4st field, tensor which contains the data
DeserializeFromStream
(
is
,
&
tensor
);
}
}
// namespace framework
}
// namespace paddle
paddle/framework/selected_rows.h
浏览文件 @
a8109cf0
...
...
@@ -59,5 +59,14 @@ class SelectedRows {
int64_t
height_
;
};
/*
* Serialize/Desiralize SelectedRows to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
const
platform
::
DeviceContext
&
dev_ctx
);
void
DeserializeFromStream
(
std
::
istream
&
is
,
SelectedRows
*
selected_rows
);
}
// namespace framework
}
// namespace paddle
paddle/framework/selected_rows_test.cc
浏览文件 @
a8109cf0
...
...
@@ -43,5 +43,19 @@ TEST_F(SelectedRowsTester, complete_dims) {
ASSERT_EQ
(
selected_rows_
->
GetCompleteDims
(),
make_ddim
({
10
,
100
}));
}
TEST_F
(
SelectedRowsTester
,
SerializeAndDeseralize
)
{
SelectedRows
dst_tensor
;
platform
::
CPUDeviceContext
cpu_ctx
(
place_
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
*
selected_rows_
,
cpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
ASSERT_EQ
(
selected_rows_
->
rows
(),
dst_tensor
.
rows
());
ASSERT_EQ
(
selected_rows_
->
height
(),
dst_tensor
.
height
());
}
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor_test.cc
浏览文件 @
a8109cf0
...
...
@@ -15,12 +15,13 @@
#include <gtest/gtest.h>
#include <string>
namespace
framework
=
paddle
::
framework
;
namespace
platform
=
paddle
::
platform
;
TEST
(
Tensor
,
Dims
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
Tensor
tt
;
framework
::
Tensor
tt
;
tt
.
Resize
({
2
,
3
,
4
});
DDim
dims
=
tt
.
dims
();
framework
::
DDim
dims
=
tt
.
dims
();
ASSERT_EQ
(
arity
(
dims
),
3
);
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
EXPECT_EQ
(
i
+
2
,
dims
[
i
]);
...
...
@@ -28,12 +29,12 @@ TEST(Tensor, Dims) {
}
TEST
(
Tensor
,
DataAssert
)
{
paddle
::
framework
::
Tensor
src_tensor
;
framework
::
Tensor
src_tensor
;
bool
caught
=
false
;
try
{
src_tensor
.
data
<
double
>
();
}
catch
(
p
addle
::
p
latform
::
EnforceNotMet
err
)
{
}
catch
(
platform
::
EnforceNotMet
err
)
{
caught
=
true
;
std
::
string
msg
=
"holder_ should not be null
\n
Tensor holds no memory. Call "
...
...
@@ -50,61 +51,65 @@ TEST(Tensor, DataAssert) {
because Memory::Alloc() and Memory::Free() have not been ready.
*/
TEST
(
Tensor
,
MutableData
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
framework
::
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
float
*
p2
=
nullptr
;
// initialization
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
1
,
2
,
3
}),
CPUPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
1
,
2
,
3
}),
platform
::
CPUPlace
());
EXPECT_NE
(
p1
,
nullptr
);
// set src_tensor a new dim with large size
// momery is supposed to be re-allocated
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
3
,
4
}),
CPUPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
3
,
4
}),
platform
::
CPUPlace
());
EXPECT_NE
(
p2
,
nullptr
);
EXPECT_NE
(
p1
,
p2
);
// set src_tensor a new dim with same size
// momery block is supposed to be unchanged
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
,
3
}),
CPUPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
,
3
}),
platform
::
CPUPlace
());
EXPECT_EQ
(
p1
,
p2
);
// set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
}),
CPUPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
}),
platform
::
CPUPlace
());
EXPECT_EQ
(
p1
,
p2
);
}
#ifdef PADDLE_WITH_CUDA
{
Tensor
src_tensor
;
framework
::
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
float
*
p2
=
nullptr
;
// initialization
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
1
,
2
,
3
}),
CUDAPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
1
,
2
,
3
}),
platform
::
CUDAPlace
());
EXPECT_NE
(
p1
,
nullptr
);
// set src_tensor a new dim with large size
// momery is supposed to be re-allocated
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
3
,
4
}),
CUDAPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
3
,
4
}),
platform
::
CUDAPlace
());
EXPECT_NE
(
p2
,
nullptr
);
EXPECT_NE
(
p1
,
p2
);
// set src_tensor a new dim with same size
// momery block is supposed to be unchanged
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
,
3
}),
CUDAPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
,
3
}),
platform
::
CUDAPlace
());
EXPECT_EQ
(
p1
,
p2
);
// set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
}),
CUDAPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
}),
platform
::
CUDAPlace
());
EXPECT_EQ
(
p1
,
p2
);
}
#endif
}
TEST
(
Tensor
,
ShareDataWith
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
framework
::
Tensor
src_tensor
;
framework
::
Tensor
dst_tensor
;
// Try to share data form uninitialized tensor
bool
caught
=
false
;
try
{
...
...
@@ -121,16 +126,18 @@ TEST(Tensor, ShareDataWith) {
}
ASSERT_TRUE
(
caught
);
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
2
,
3
,
4
}),
CPUPlace
());
src_tensor
.
mutable_data
<
int
>
(
framework
::
make_ddim
({
2
,
3
,
4
}),
platform
::
CPUPlace
());
dst_tensor
.
ShareDataWith
(
src_tensor
);
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
#ifdef PADDLE_WITH_CUDA
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
2
,
3
,
4
}),
CUDAPlace
());
framework
::
Tensor
src_tensor
;
framework
::
Tensor
dst_tensor
;
src_tensor
.
mutable_data
<
int
>
(
framework
::
make_ddim
({
2
,
3
,
4
}),
platform
::
CUDAPlace
());
dst_tensor
.
ShareDataWith
(
src_tensor
);
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
...
...
@@ -138,13 +145,12 @@ TEST(Tensor, ShareDataWith) {
}
TEST
(
Tensor
,
Slice
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
5
,
3
,
4
}),
CPUPlace
());
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
3
);
DDim
slice_dims
=
slice_tensor
.
dims
();
framework
::
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
int
>
(
framework
::
make_ddim
({
5
,
3
,
4
}),
platform
::
CPUPlace
());
framework
::
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
3
);
framework
::
DDim
slice_dims
=
slice_tensor
.
dims
();
ASSERT_EQ
(
arity
(
slice_dims
),
3
);
EXPECT_EQ
(
slice_dims
[
0
],
2
);
EXPECT_EQ
(
slice_dims
[
1
],
3
);
...
...
@@ -153,11 +159,12 @@ TEST(Tensor, Slice) {
uintptr_t
src_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
data
<
int
>
());
uintptr_t
src_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
mutable_data
<
int
>
(
src_tensor
.
dims
(),
CPUPlace
()));
src_tensor
.
mutable_data
<
int
>
(
src_tensor
.
dims
(),
platform
::
CPUPlace
()));
uintptr_t
slice_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
data
<
int
>
());
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
int
>
(
slice_tensor
.
dims
(),
CPUPlace
()));
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
int
>
(
slice_tensor
.
dims
(),
platform
::
CPUPlace
()));
EXPECT_EQ
(
src_data_address
,
src_mutable_data_address
);
EXPECT_EQ
(
slice_data_address
,
slice_mutable_data_address
);
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
...
...
@@ -165,22 +172,25 @@ TEST(Tensor, Slice) {
#ifdef PADDLE_WITH_CUDA
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
CUDAPlace
());
Tensor
slice_tensor
=
src_tensor
.
Slice
(
2
,
6
);
DDim
slice_dims
=
slice_tensor
.
dims
();
framework
::
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
framework
::
make_ddim
({
6
,
9
}),
platform
::
CUDAPlace
());
framework
::
Tensor
slice_tensor
=
src_tensor
.
Slice
(
2
,
6
);
framework
::
DDim
slice_dims
=
slice_tensor
.
dims
();
ASSERT_EQ
(
arity
(
slice_dims
),
2
);
EXPECT_EQ
(
slice_dims
[
0
],
4
);
EXPECT_EQ
(
slice_dims
[
1
],
9
);
uintptr_t
src_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
data
<
double
>
());
uintptr_t
src_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
mutable_data
<
double
>
(
src_tensor
.
dims
(),
CUDAPlace
()));
uintptr_t
src_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
mutable_data
<
double
>
(
src_tensor
.
dims
(),
platform
::
CUDAPlace
()));
uintptr_t
slice_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
data
<
double
>
());
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
double
>
(
slice_tensor
.
dims
(),
CUDAPlace
()));
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
double
>
(
slice_tensor
.
dims
(),
platform
::
CUDAPlace
()));
EXPECT_EQ
(
src_data_address
,
src_mutable_data_address
);
EXPECT_EQ
(
slice_data_address
,
slice_mutable_data_address
);
EXPECT_EQ
(
src_data_address
+
9
*
2
*
sizeof
(
double
),
slice_data_address
);
...
...
@@ -189,23 +199,19 @@ TEST(Tensor, Slice) {
}
TEST
(
Tensor
,
ReshapeToMatrix
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
Tensor
src
;
int
*
src_ptr
=
src
.
mutable_data
<
int
>
({
2
,
3
,
4
,
9
},
CPUPlace
());
framework
::
Tensor
src
;
int
*
src_ptr
=
src
.
mutable_data
<
int
>
({
2
,
3
,
4
,
9
},
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
2
*
3
*
4
*
9
;
++
i
)
{
src_ptr
[
i
]
=
i
;
}
Tensor
res
=
ReshapeToMatrix
(
src
,
2
);
framework
::
Tensor
res
=
framework
::
ReshapeToMatrix
(
src
,
2
);
ASSERT_EQ
(
res
.
dims
()[
0
],
2
*
3
);
ASSERT_EQ
(
res
.
dims
()[
1
],
4
*
9
);
}
TEST
(
Tensor
,
Layout
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
Tensor
src
;
ASSERT_EQ
(
src
.
layout
(),
DataLayout
::
kNHWC
);
src
.
set_layout
(
DataLayout
::
kAnyLayout
);
ASSERT_EQ
(
src
.
layout
(),
DataLayout
::
kAnyLayout
);
framework
::
Tensor
src
;
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kNHWC
);
src
.
set_layout
(
framework
::
DataLayout
::
kAnyLayout
);
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kAnyLayout
);
}
paddle/framework/tensor_util.h
浏览文件 @
a8109cf0
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/tensor.h"
namespace
paddle
{
...
...
@@ -205,5 +207,103 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) {
src_ptr
,
size
);
}
inline
void
SerializeToStream
(
std
::
ostream
&
os
,
const
Tensor
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
// TODO(typhoonzero): serialize to ostream
{
// the 1st field, uint32_t version
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto
::
TensorDesc
desc
;
desc
.
set_data_type
(
framework
::
ToDataType
(
tensor
.
type
()));
auto
dims
=
framework
::
vectorize
(
tensor
.
dims
());
auto
*
pb_dims
=
desc
.
mutable_dims
();
pb_dims
->
Resize
(
static_cast
<
int
>
(
dims
.
size
()),
0
);
std
::
copy
(
dims
.
begin
(),
dims
.
end
(),
pb_dims
->
begin
());
int32_t
size
=
desc
.
ByteSize
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
auto
out
=
desc
.
SerializeAsString
();
os
.
write
(
out
.
data
(),
size
);
}
{
// the 3rd field, tensor data
uint64_t
size
=
tensor
.
memory_size
();
auto
*
data_ptr
=
tensor
.
data
<
void
>
();
PADDLE_ENFORCE
(
size
<
std
::
numeric_limits
<
std
::
streamsize
>::
max
(),
"Index overflow when writing tensor"
);
if
(
platform
::
is_gpu_place
(
tensor
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
constexpr
size_t
kBufSize
=
1024
*
1024
*
64
;
// 64MB
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
kBufSize
]);
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
platform
::
CPUPlace
cpu
;
uintptr_t
data
=
reinterpret_cast
<
uintptr_t
>
(
data_ptr
);
while
(
size
!=
0
)
{
size_t
size_to_write
=
std
::
min
(
kBufSize
,
static_cast
<
size_t
>
(
size
));
memory
::
Copy
(
cpu
,
buf
.
get
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
reinterpret_cast
<
const
void
*>
(
data
),
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
Wait
();
os
.
write
(
buf
.
get
(),
size_to_write
);
data
+=
size_to_write
;
size
-=
size_to_write
;
}
#else
PADDLE_THROW
(
"Unexpected branch"
);
#endif
}
else
{
os
.
write
(
static_cast
<
const
char
*>
(
data_ptr
),
static_cast
<
std
::
streamsize
>
(
size
));
}
}
}
inline
void
DeserializeFromStream
(
std
::
istream
&
is
,
Tensor
*
tensor
)
{
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
proto
::
TensorDesc
desc
;
{
// int32_t size
// proto buffer
int32_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
is
.
read
(
reinterpret_cast
<
char
*>
(
buf
.
get
()),
size
);
PADDLE_ENFORCE
(
desc
.
ParseFromArray
(
buf
.
get
(),
size
),
"Cannot parse tensor desc"
);
}
{
// read tensor
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
static_cast
<
size_t
>
(
desc
.
dims
().
size
()));
std
::
copy
(
desc
.
dims
().
begin
(),
desc
.
dims
().
end
(),
std
::
back_inserter
(
dims
));
tensor
->
Resize
(
framework
::
make_ddim
(
dims
));
void
*
buf
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
// TODO(Yancey1989): use VisiterDataType instead of DataType switch
switch
(
desc
.
data_type
())
{
case
proto
::
FP32
:
buf
=
tensor
->
mutable_data
<
float
>
(
cpu
);
break
;
case
proto
::
FP64
:
buf
=
tensor
->
mutable_data
<
double
>
(
cpu
);
break
;
case
proto
::
INT32
:
buf
=
tensor
->
mutable_data
<
int
>
(
cpu
);
break
;
case
proto
::
INT64
:
buf
=
tensor
->
mutable_data
<
int64_t
>
(
cpu
);
break
;
default:
PADDLE_THROW
(
"DataType %d not supported"
,
desc
.
data_type
());
}
is
.
read
(
static_cast
<
char
*>
(
buf
),
tensor
->
memory_size
());
}
}
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor_util_test.cc
浏览文件 @
a8109cf0
...
...
@@ -230,5 +230,55 @@ TEST(CopyToVector, Tensor) {
#endif
}
TEST
(
Tensor
,
SerializeAndDeserialize
)
{
framework
::
Tensor
src_tensor
;
int
array
[
6
]
=
{
1
,
2
,
3
,
4
,
5
,
6
};
src_tensor
.
Resize
({
2
,
3
});
int
*
src_ptr
=
src_tensor
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
src_ptr
[
i
]
=
array
[
i
];
}
{
framework
::
Tensor
dst_tensor
;
auto
place
=
new
platform
::
CPUPlace
();
platform
::
CPUDeviceContext
cpu_ctx
(
*
place
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
src_tensor
,
cpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
int
*
dst_ptr
=
dst_tensor
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
ASSERT_EQ
(
dst_ptr
[
i
],
array
[
i
]);
}
delete
place
;
}
#ifdef PADDLE_WITH_CUDA
{
Tensor
gpu_tensor
;
gpu_tensor
.
Resize
({
2
,
3
});
Tensor
dst_tensor
;
auto
gpu_place
=
new
platform
::
CUDAPlace
();
platform
::
CUDADeviceContext
gpu_ctx
(
*
gpu_place
);
CopyFrom
(
src_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
gpu_tensor
,
gpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
int
*
dst_ptr
=
dst_tensor
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
ASSERT_EQ
(
dst_ptr
[
i
],
array
[
i
]);
}
delete
gpu_place
;
}
#endif
}
}
// namespace framework
}
// namespace paddle
paddle/operators/load_op.cc
浏览文件 @
a8109cf0
...
...
@@ -38,7 +38,7 @@ class LoadOp : public framework::OperatorBase {
out_var_name
);
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
DeserializeFromStream
(
fin
,
tensor
);
DeserializeFromStream
(
fin
,
tensor
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
a8109cf0
...
...
@@ -9,9 +9,9 @@ if(WITH_GPU)
nv_library
(
cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context
)
nv_library
(
pooling SRCS pooling.cc pooling.cu DEPS device_context
)
nv_library
(
sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function
)
nv_library
(
vol2col SRCS vol2col.cc vol2col.cu DEPS device_context
)
nv_library
(
vol2col SRCS vol2col.cc vol2col.cu DEPS device_context
tensor
)
nv_library
(
context_project SRCS context_project.cc context_project.cu DEPS device_context math_function
)
nv_library
(
sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context
)
nv_library
(
sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context
tensor
)
nv_library
(
lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions
)
nv_library
(
maxouting SRCS maxouting.cc maxouting.cu DEPS device_context
)
nv_library
(
unpooling SRCS unpooling.cc unpooling.cu DEPS device_context
)
...
...
@@ -23,9 +23,9 @@ else()
cc_library
(
cross_entropy SRCS cross_entropy.cc DEPS device_context
)
cc_library
(
pooling SRCS pooling.cc DEPS device_context
)
cc_library
(
sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function
)
cc_library
(
vol2col SRCS vol2col.cc DEPS device_context
)
cc_library
(
vol2col SRCS vol2col.cc DEPS device_context
tensor
)
cc_library
(
context_project SRCS context_project.cc DEPS device_context math_function
)
cc_library
(
sequence2batch SRCS sequence2batch.cc DEPS device_context
)
cc_library
(
sequence2batch SRCS sequence2batch.cc DEPS device_context
tensor
)
cc_library
(
lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions
)
cc_library
(
maxouting SRCS maxouting.cc DEPS device_context
)
cc_library
(
unpooling SRCS unpooling.cc DEPS device_context
)
...
...
paddle/platform/for_range.h
浏览文件 @
a8109cf0
...
...
@@ -62,7 +62,7 @@ struct ForRange<CUDADeviceContext> {
template
<
typename
Function
>
inline
void
operator
()(
Function
func
)
const
{
constexpr
size_
t
num_threads
=
1024
;
constexpr
in
t
num_threads
=
1024
;
int
block_size
=
limit_
<=
num_threads
?
limit_
:
num_threads
;
int
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录