Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a8109cf0
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
a8109cf0
编写于
12月 28, 2017
作者:
S
sweetsky0901
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into detection_output
上级
95aec835
18311767
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
327 addition
and
155 deletion
+327
-155
paddle/framework/CMakeLists.txt
paddle/framework/CMakeLists.txt
+1
-1
paddle/framework/lod_tensor.cc
paddle/framework/lod_tensor.cc
+19
-93
paddle/framework/lod_tensor_test.cc
paddle/framework/lod_tensor_test.cc
+14
-0
paddle/framework/selected_rows.cc
paddle/framework/selected_rows.cc
+54
-1
paddle/framework/selected_rows.h
paddle/framework/selected_rows.h
+9
-0
paddle/framework/selected_rows_test.cc
paddle/framework/selected_rows_test.cc
+14
-0
paddle/framework/tensor_test.cc
paddle/framework/tensor_test.cc
+60
-54
paddle/framework/tensor_util.h
paddle/framework/tensor_util.h
+100
-0
paddle/framework/tensor_util_test.cc
paddle/framework/tensor_util_test.cc
+50
-0
paddle/operators/load_op.cc
paddle/operators/load_op.cc
+1
-1
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+4
-4
paddle/platform/for_range.h
paddle/platform/for_range.h
+1
-1
未找到文件。
paddle/framework/CMakeLists.txt
浏览文件 @
a8109cf0
...
...
@@ -5,7 +5,7 @@ cc_library(ddim SRCS ddim.cc DEPS eigen3)
cc_test
(
ddim_test SRCS ddim_test.cc DEPS ddim
)
nv_test
(
dim_test SRCS dim_test.cu DEPS ddim
)
cc_library
(
tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context
)
cc_library
(
tensor SRCS tensor.cc DEPS ddim place paddle_memory device_context
framework_proto
)
cc_test
(
tensor_test SRCS tensor_test.cc DEPS tensor
)
cc_test
(
tensor_util_test SRCS tensor_util_test.cc DEPS tensor
)
...
...
paddle/framework/lod_tensor.cc
浏览文件 @
a8109cf0
...
...
@@ -189,62 +189,16 @@ void AppendLoD(LoD *lod, const LoD &lod_length) {
void
SerializeToStream
(
std
::
ostream
&
os
,
const
LoDTensor
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
// TODO(typhoonzero): serialize to ostream
{
// the 1st field, uint32_t version
{
// the 1st field, uint32_t version for LoDTensor
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto
::
TensorDesc
desc
;
desc
.
set_data_type
(
framework
::
ToDataType
(
tensor
.
type
()));
auto
dims
=
framework
::
vectorize
(
tensor
.
dims
());
auto
*
pb_dims
=
desc
.
mutable_dims
();
pb_dims
->
Resize
(
static_cast
<
int
>
(
dims
.
size
()),
0
);
std
::
copy
(
dims
.
begin
(),
dims
.
end
(),
pb_dims
->
begin
());
int32_t
size
=
desc
.
ByteSize
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
auto
out
=
desc
.
SerializeAsString
();
os
.
write
(
out
.
data
(),
size
);
}
{
// the 3rd field, tensor data
uint64_t
size
=
tensor
.
memory_size
();
auto
*
data_ptr
=
tensor
.
data
<
void
>
();
PADDLE_ENFORCE
(
size
<
std
::
numeric_limits
<
std
::
streamsize
>::
max
(),
"Index overflow when writing tensor"
);
if
(
platform
::
is_gpu_place
(
tensor
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
constexpr
size_t
kBufSize
=
1024
*
1024
*
64
;
// 64MB
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
kBufSize
]);
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
platform
::
CPUPlace
cpu
;
uintptr_t
data
=
reinterpret_cast
<
uintptr_t
>
(
data_ptr
);
while
(
size
!=
0
)
{
size_t
size_to_write
=
std
::
min
(
kBufSize
,
static_cast
<
size_t
>
(
size
));
memory
::
Copy
(
cpu
,
buf
.
get
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
reinterpret_cast
<
const
void
*>
(
data
),
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
Wait
();
os
.
write
(
buf
.
get
(),
size_to_write
);
data
+=
size_to_write
;
size
-=
size_to_write
;
}
#else
PADDLE_THROW
(
"Unexpected branch"
);
#endif
}
else
{
os
.
write
(
static_cast
<
const
char
*>
(
data_ptr
),
static_cast
<
std
::
streamsize
>
(
size
));
}
}
{
// the 4th field, lod information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
{
// the 2st field, LoD information
// uint64_t lod_level
// uint64_t lod_level_1 size in byte.
// int* lod_level_1 data
// ...
auto
lod
=
tensor
.
lod
();
uint64_t
size
=
lod
.
size
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
...
...
@@ -256,49 +210,19 @@ void SerializeToStream(std::ostream &os, const LoDTensor &tensor,
static_cast
<
std
::
streamsize
>
(
size
));
}
}
// the 3st field, Tensor
SerializeToStream
(
os
,
static_cast
<
Tensor
>
(
tensor
),
dev_ctx
);
}
void
DeserializeFromStream
(
std
::
istream
&
is
,
LoDTensor
*
tensor
)
{
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
proto
::
TensorDesc
desc
;
{
// int32_t size
// proto buffer
int32_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
is
.
read
(
reinterpret_cast
<
char
*>
(
buf
.
get
()),
size
);
PADDLE_ENFORCE
(
desc
.
ParseFromArray
(
buf
.
get
(),
size
),
"Cannot parse tensor desc"
);
}
{
// read tensor
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
static_cast
<
size_t
>
(
desc
.
dims
().
size
()));
std
::
copy
(
desc
.
dims
().
begin
(),
desc
.
dims
().
end
(),
std
::
back_inserter
(
dims
));
tensor
->
Resize
(
framework
::
make_ddim
(
dims
));
void
*
buf
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
switch
(
desc
.
data_type
())
{
case
proto
::
FP32
:
buf
=
tensor
->
mutable_data
<
float
>
(
cpu
);
break
;
case
proto
::
FP64
:
buf
=
tensor
->
mutable_data
<
double
>
(
cpu
);
break
;
case
proto
::
INT32
:
buf
=
tensor
->
mutable_data
<
int
>
(
cpu
);
break
;
case
proto
::
INT64
:
buf
=
tensor
->
mutable_data
<
int64_t
>
(
cpu
);
break
;
default:
PADDLE_THROW
(
"DataType %d not supported"
,
desc
.
data_type
());
}
is
.
read
(
static_cast
<
char
*>
(
buf
),
tensor
->
memory_size
());
}
{
// read lod
{
// the 1st field, unit32_t version for SelectedRows
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
}
{
// the 2st field, LoD information
uint64_t
lod_level
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
lod_level
),
sizeof
(
lod_level
));
auto
&
lod
=
*
tensor
->
mutable_lod
();
...
...
@@ -312,6 +236,8 @@ void DeserializeFromStream(std::istream &is, LoDTensor *tensor) {
lod
[
i
]
=
tmp
;
}
}
// the 3st filed, Tensor
DeserializeFromStream
(
is
,
static_cast
<
Tensor
*>
(
tensor
));
}
}
// namespace framework
...
...
paddle/framework/lod_tensor_test.cc
浏览文件 @
a8109cf0
...
...
@@ -126,6 +126,20 @@ TEST_F(LoDTensorTester, ShrinkInLevel) {
EXPECT_NE
(
t1
.
data
<
float
>
(),
lod_tensor_
.
data
<
float
>
());
}
TEST_F
(
LoDTensorTester
,
SerializeAndDeserialize
)
{
LoDTensor
dst_tensor
;
platform
::
CPUDeviceContext
cpu_ctx
((
platform
::
CPUPlace
()));
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
lod_tensor_
,
cpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
float
*
dst_ptr
=
dst_tensor
.
mutable_data
<
float
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
kLodTensorSize
;
++
i
)
{
EXPECT_EQ
(
dst_ptr
[
i
],
i
);
}
EXPECT_EQ
(
dst_tensor
.
lod
(),
lod_tensor_
.
lod
());
}
TEST
(
LodExpand
,
test
)
{
LoD
lod
{{
0
,
2
}};
LoDTensor
tensor
;
...
...
paddle/framework/selected_rows.cc
浏览文件 @
a8109cf0
...
...
@@ -12,5 +12,58 @@ limitations under the License. */
#include "paddle/framework/selected_rows.h"
namespace
paddle
{
namespace
framework
{}
// namespace framework
namespace
framework
{
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
{
// the 1st field, uint32_t version
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2st field, rows information
auto
&
rows
=
selected_rows
.
rows
();
uint64_t
size
=
rows
.
size
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
for
(
uint64_t
i
=
0
;
i
<
size
;
++
i
)
{
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
rows
[
i
]),
sizeof
(
rows
[
i
]));
}
}
{
// the 3st field, the height of SelectedRows
int64_t
height
=
selected_rows
.
height
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
height
),
sizeof
(
height
));
}
// the 4st field, Tensor data
SerializeToStream
(
os
,
selected_rows
.
value
(),
dev_ctx
);
}
void
DeserializeFromStream
(
std
::
istream
&
is
,
SelectedRows
*
selected_rows
)
{
auto
tensor
=
*
selected_rows
->
mutable_value
();
{
// the 1st field, unit32_t version for SelectedRows
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
}
{
// the 2st field, rows information
uint64_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
auto
&
rows
=
*
selected_rows
->
mutable_rows
();
rows
.
resize
(
size
);
for
(
uint64_t
i
=
0
;
i
<
size
;
++
i
)
{
is
.
read
(
reinterpret_cast
<
char
*>
(
&
rows
[
i
]),
sizeof
(
int64_t
));
}
}
{
// the 3st field, the height of the SelectedRows
int64_t
height
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
height
),
sizeof
(
int64_t
));
selected_rows
->
set_height
(
height
);
}
// the 4st field, tensor which contains the data
DeserializeFromStream
(
is
,
&
tensor
);
}
}
// namespace framework
}
// namespace paddle
paddle/framework/selected_rows.h
浏览文件 @
a8109cf0
...
...
@@ -59,5 +59,14 @@ class SelectedRows {
int64_t
height_
;
};
/*
* Serialize/Desiralize SelectedRows to std::ostream
* You can pass ofstream or ostringstream to serilize to file
* or to a in memory string. GPU tensor will be copied to CPU.
*/
void
SerializeToStream
(
std
::
ostream
&
os
,
const
SelectedRows
&
selected_rows
,
const
platform
::
DeviceContext
&
dev_ctx
);
void
DeserializeFromStream
(
std
::
istream
&
is
,
SelectedRows
*
selected_rows
);
}
// namespace framework
}
// namespace paddle
paddle/framework/selected_rows_test.cc
浏览文件 @
a8109cf0
...
...
@@ -43,5 +43,19 @@ TEST_F(SelectedRowsTester, complete_dims) {
ASSERT_EQ
(
selected_rows_
->
GetCompleteDims
(),
make_ddim
({
10
,
100
}));
}
TEST_F
(
SelectedRowsTester
,
SerializeAndDeseralize
)
{
SelectedRows
dst_tensor
;
platform
::
CPUDeviceContext
cpu_ctx
(
place_
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
*
selected_rows_
,
cpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
ASSERT_EQ
(
selected_rows_
->
rows
(),
dst_tensor
.
rows
());
ASSERT_EQ
(
selected_rows_
->
height
(),
dst_tensor
.
height
());
}
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor_test.cc
浏览文件 @
a8109cf0
...
...
@@ -15,12 +15,13 @@
#include <gtest/gtest.h>
#include <string>
namespace
framework
=
paddle
::
framework
;
namespace
platform
=
paddle
::
platform
;
TEST
(
Tensor
,
Dims
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
Tensor
tt
;
framework
::
Tensor
tt
;
tt
.
Resize
({
2
,
3
,
4
});
DDim
dims
=
tt
.
dims
();
framework
::
DDim
dims
=
tt
.
dims
();
ASSERT_EQ
(
arity
(
dims
),
3
);
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
EXPECT_EQ
(
i
+
2
,
dims
[
i
]);
...
...
@@ -28,12 +29,12 @@ TEST(Tensor, Dims) {
}
TEST
(
Tensor
,
DataAssert
)
{
paddle
::
framework
::
Tensor
src_tensor
;
framework
::
Tensor
src_tensor
;
bool
caught
=
false
;
try
{
src_tensor
.
data
<
double
>
();
}
catch
(
p
addle
::
p
latform
::
EnforceNotMet
err
)
{
}
catch
(
platform
::
EnforceNotMet
err
)
{
caught
=
true
;
std
::
string
msg
=
"holder_ should not be null
\n
Tensor holds no memory. Call "
...
...
@@ -50,61 +51,65 @@ TEST(Tensor, DataAssert) {
because Memory::Alloc() and Memory::Free() have not been ready.
*/
TEST
(
Tensor
,
MutableData
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
framework
::
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
float
*
p2
=
nullptr
;
// initialization
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
1
,
2
,
3
}),
CPUPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
1
,
2
,
3
}),
platform
::
CPUPlace
());
EXPECT_NE
(
p1
,
nullptr
);
// set src_tensor a new dim with large size
// momery is supposed to be re-allocated
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
3
,
4
}),
CPUPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
3
,
4
}),
platform
::
CPUPlace
());
EXPECT_NE
(
p2
,
nullptr
);
EXPECT_NE
(
p1
,
p2
);
// set src_tensor a new dim with same size
// momery block is supposed to be unchanged
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
,
3
}),
CPUPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
,
3
}),
platform
::
CPUPlace
());
EXPECT_EQ
(
p1
,
p2
);
// set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
}),
CPUPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
}),
platform
::
CPUPlace
());
EXPECT_EQ
(
p1
,
p2
);
}
#ifdef PADDLE_WITH_CUDA
{
Tensor
src_tensor
;
framework
::
Tensor
src_tensor
;
float
*
p1
=
nullptr
;
float
*
p2
=
nullptr
;
// initialization
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
1
,
2
,
3
}),
CUDAPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
1
,
2
,
3
}),
platform
::
CUDAPlace
());
EXPECT_NE
(
p1
,
nullptr
);
// set src_tensor a new dim with large size
// momery is supposed to be re-allocated
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
3
,
4
}),
CUDAPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
3
,
4
}),
platform
::
CUDAPlace
());
EXPECT_NE
(
p2
,
nullptr
);
EXPECT_NE
(
p1
,
p2
);
// set src_tensor a new dim with same size
// momery block is supposed to be unchanged
p1
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
,
3
}),
CUDAPlace
());
p1
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
,
3
}),
platform
::
CUDAPlace
());
EXPECT_EQ
(
p1
,
p2
);
// set src_tensor a new dim with smaller size
// momery block is supposed to be unchanged
p2
=
src_tensor
.
mutable_data
<
float
>
(
make_ddim
({
2
,
2
}),
CUDAPlace
());
p2
=
src_tensor
.
mutable_data
<
float
>
(
framework
::
make_ddim
({
2
,
2
}),
platform
::
CUDAPlace
());
EXPECT_EQ
(
p1
,
p2
);
}
#endif
}
TEST
(
Tensor
,
ShareDataWith
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
framework
::
Tensor
src_tensor
;
framework
::
Tensor
dst_tensor
;
// Try to share data form uninitialized tensor
bool
caught
=
false
;
try
{
...
...
@@ -121,16 +126,18 @@ TEST(Tensor, ShareDataWith) {
}
ASSERT_TRUE
(
caught
);
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
2
,
3
,
4
}),
CPUPlace
());
src_tensor
.
mutable_data
<
int
>
(
framework
::
make_ddim
({
2
,
3
,
4
}),
platform
::
CPUPlace
());
dst_tensor
.
ShareDataWith
(
src_tensor
);
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
#ifdef PADDLE_WITH_CUDA
{
Tensor
src_tensor
;
Tensor
dst_tensor
;
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
2
,
3
,
4
}),
CUDAPlace
());
framework
::
Tensor
src_tensor
;
framework
::
Tensor
dst_tensor
;
src_tensor
.
mutable_data
<
int
>
(
framework
::
make_ddim
({
2
,
3
,
4
}),
platform
::
CUDAPlace
());
dst_tensor
.
ShareDataWith
(
src_tensor
);
ASSERT_EQ
(
src_tensor
.
data
<
int
>
(),
dst_tensor
.
data
<
int
>
());
}
...
...
@@ -138,13 +145,12 @@ TEST(Tensor, ShareDataWith) {
}
TEST
(
Tensor
,
Slice
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
int
>
(
make_ddim
({
5
,
3
,
4
}),
CPUPlace
());
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
3
);
DDim
slice_dims
=
slice_tensor
.
dims
();
framework
::
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
int
>
(
framework
::
make_ddim
({
5
,
3
,
4
}),
platform
::
CPUPlace
());
framework
::
Tensor
slice_tensor
=
src_tensor
.
Slice
(
1
,
3
);
framework
::
DDim
slice_dims
=
slice_tensor
.
dims
();
ASSERT_EQ
(
arity
(
slice_dims
),
3
);
EXPECT_EQ
(
slice_dims
[
0
],
2
);
EXPECT_EQ
(
slice_dims
[
1
],
3
);
...
...
@@ -153,11 +159,12 @@ TEST(Tensor, Slice) {
uintptr_t
src_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
data
<
int
>
());
uintptr_t
src_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
mutable_data
<
int
>
(
src_tensor
.
dims
(),
CPUPlace
()));
src_tensor
.
mutable_data
<
int
>
(
src_tensor
.
dims
(),
platform
::
CPUPlace
()));
uintptr_t
slice_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
data
<
int
>
());
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
int
>
(
slice_tensor
.
dims
(),
CPUPlace
()));
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
int
>
(
slice_tensor
.
dims
(),
platform
::
CPUPlace
()));
EXPECT_EQ
(
src_data_address
,
src_mutable_data_address
);
EXPECT_EQ
(
slice_data_address
,
slice_mutable_data_address
);
EXPECT_EQ
(
src_data_address
+
3
*
4
*
1
*
sizeof
(
int
),
slice_data_address
);
...
...
@@ -165,22 +172,25 @@ TEST(Tensor, Slice) {
#ifdef PADDLE_WITH_CUDA
{
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
make_ddim
({
6
,
9
}),
CUDAPlace
());
Tensor
slice_tensor
=
src_tensor
.
Slice
(
2
,
6
);
DDim
slice_dims
=
slice_tensor
.
dims
();
framework
::
Tensor
src_tensor
;
src_tensor
.
mutable_data
<
double
>
(
framework
::
make_ddim
({
6
,
9
}),
platform
::
CUDAPlace
());
framework
::
Tensor
slice_tensor
=
src_tensor
.
Slice
(
2
,
6
);
framework
::
DDim
slice_dims
=
slice_tensor
.
dims
();
ASSERT_EQ
(
arity
(
slice_dims
),
2
);
EXPECT_EQ
(
slice_dims
[
0
],
4
);
EXPECT_EQ
(
slice_dims
[
1
],
9
);
uintptr_t
src_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
data
<
double
>
());
uintptr_t
src_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
mutable_data
<
double
>
(
src_tensor
.
dims
(),
CUDAPlace
()));
uintptr_t
src_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
src_tensor
.
mutable_data
<
double
>
(
src_tensor
.
dims
(),
platform
::
CUDAPlace
()));
uintptr_t
slice_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
data
<
double
>
());
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
double
>
(
slice_tensor
.
dims
(),
CUDAPlace
()));
uintptr_t
slice_mutable_data_address
=
reinterpret_cast
<
uintptr_t
>
(
slice_tensor
.
mutable_data
<
double
>
(
slice_tensor
.
dims
(),
platform
::
CUDAPlace
()));
EXPECT_EQ
(
src_data_address
,
src_mutable_data_address
);
EXPECT_EQ
(
slice_data_address
,
slice_mutable_data_address
);
EXPECT_EQ
(
src_data_address
+
9
*
2
*
sizeof
(
double
),
slice_data_address
);
...
...
@@ -189,23 +199,19 @@ TEST(Tensor, Slice) {
}
TEST
(
Tensor
,
ReshapeToMatrix
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
Tensor
src
;
int
*
src_ptr
=
src
.
mutable_data
<
int
>
({
2
,
3
,
4
,
9
},
CPUPlace
());
framework
::
Tensor
src
;
int
*
src_ptr
=
src
.
mutable_data
<
int
>
({
2
,
3
,
4
,
9
},
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
2
*
3
*
4
*
9
;
++
i
)
{
src_ptr
[
i
]
=
i
;
}
Tensor
res
=
ReshapeToMatrix
(
src
,
2
);
framework
::
Tensor
res
=
framework
::
ReshapeToMatrix
(
src
,
2
);
ASSERT_EQ
(
res
.
dims
()[
0
],
2
*
3
);
ASSERT_EQ
(
res
.
dims
()[
1
],
4
*
9
);
}
TEST
(
Tensor
,
Layout
)
{
using
namespace
paddle
::
framework
;
using
namespace
paddle
::
platform
;
Tensor
src
;
ASSERT_EQ
(
src
.
layout
(),
DataLayout
::
kNHWC
);
src
.
set_layout
(
DataLayout
::
kAnyLayout
);
ASSERT_EQ
(
src
.
layout
(),
DataLayout
::
kAnyLayout
);
framework
::
Tensor
src
;
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kNHWC
);
src
.
set_layout
(
framework
::
DataLayout
::
kAnyLayout
);
ASSERT_EQ
(
src
.
layout
(),
framework
::
DataLayout
::
kAnyLayout
);
}
paddle/framework/tensor_util.h
浏览文件 @
a8109cf0
...
...
@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/data_type.h"
#include "paddle/framework/framework.pb.h"
#include "paddle/framework/tensor.h"
namespace
paddle
{
...
...
@@ -205,5 +207,103 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) {
src_ptr
,
size
);
}
inline
void
SerializeToStream
(
std
::
ostream
&
os
,
const
Tensor
&
tensor
,
const
platform
::
DeviceContext
&
dev_ctx
)
{
// TODO(typhoonzero): serialize to ostream
{
// the 1st field, uint32_t version
constexpr
uint32_t
version
=
0
;
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
version
),
sizeof
(
version
));
}
{
// the 2nd field, tensor description
// int32_t size
// void* protobuf message
proto
::
TensorDesc
desc
;
desc
.
set_data_type
(
framework
::
ToDataType
(
tensor
.
type
()));
auto
dims
=
framework
::
vectorize
(
tensor
.
dims
());
auto
*
pb_dims
=
desc
.
mutable_dims
();
pb_dims
->
Resize
(
static_cast
<
int
>
(
dims
.
size
()),
0
);
std
::
copy
(
dims
.
begin
(),
dims
.
end
(),
pb_dims
->
begin
());
int32_t
size
=
desc
.
ByteSize
();
os
.
write
(
reinterpret_cast
<
const
char
*>
(
&
size
),
sizeof
(
size
));
auto
out
=
desc
.
SerializeAsString
();
os
.
write
(
out
.
data
(),
size
);
}
{
// the 3rd field, tensor data
uint64_t
size
=
tensor
.
memory_size
();
auto
*
data_ptr
=
tensor
.
data
<
void
>
();
PADDLE_ENFORCE
(
size
<
std
::
numeric_limits
<
std
::
streamsize
>::
max
(),
"Index overflow when writing tensor"
);
if
(
platform
::
is_gpu_place
(
tensor
.
place
()))
{
#ifdef PADDLE_WITH_CUDA
constexpr
size_t
kBufSize
=
1024
*
1024
*
64
;
// 64MB
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
kBufSize
]);
auto
&
gpu_dev_ctx
=
static_cast
<
const
platform
::
CUDADeviceContext
&>
(
dev_ctx
);
platform
::
CPUPlace
cpu
;
uintptr_t
data
=
reinterpret_cast
<
uintptr_t
>
(
data_ptr
);
while
(
size
!=
0
)
{
size_t
size_to_write
=
std
::
min
(
kBufSize
,
static_cast
<
size_t
>
(
size
));
memory
::
Copy
(
cpu
,
buf
.
get
(),
boost
::
get
<
platform
::
CUDAPlace
>
(
tensor
.
place
()),
reinterpret_cast
<
const
void
*>
(
data
),
size_to_write
,
gpu_dev_ctx
.
stream
());
gpu_dev_ctx
.
Wait
();
os
.
write
(
buf
.
get
(),
size_to_write
);
data
+=
size_to_write
;
size
-=
size_to_write
;
}
#else
PADDLE_THROW
(
"Unexpected branch"
);
#endif
}
else
{
os
.
write
(
static_cast
<
const
char
*>
(
data_ptr
),
static_cast
<
std
::
streamsize
>
(
size
));
}
}
}
inline
void
DeserializeFromStream
(
std
::
istream
&
is
,
Tensor
*
tensor
)
{
uint32_t
version
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
version
),
sizeof
(
version
));
PADDLE_ENFORCE_EQ
(
version
,
0U
,
"Only version 0 is supported"
);
proto
::
TensorDesc
desc
;
{
// int32_t size
// proto buffer
int32_t
size
;
is
.
read
(
reinterpret_cast
<
char
*>
(
&
size
),
sizeof
(
size
));
std
::
unique_ptr
<
char
[]
>
buf
(
new
char
[
size
]);
is
.
read
(
reinterpret_cast
<
char
*>
(
buf
.
get
()),
size
);
PADDLE_ENFORCE
(
desc
.
ParseFromArray
(
buf
.
get
(),
size
),
"Cannot parse tensor desc"
);
}
{
// read tensor
std
::
vector
<
int64_t
>
dims
;
dims
.
reserve
(
static_cast
<
size_t
>
(
desc
.
dims
().
size
()));
std
::
copy
(
desc
.
dims
().
begin
(),
desc
.
dims
().
end
(),
std
::
back_inserter
(
dims
));
tensor
->
Resize
(
framework
::
make_ddim
(
dims
));
void
*
buf
;
platform
::
Place
cpu
=
platform
::
CPUPlace
();
// TODO(Yancey1989): use VisiterDataType instead of DataType switch
switch
(
desc
.
data_type
())
{
case
proto
::
FP32
:
buf
=
tensor
->
mutable_data
<
float
>
(
cpu
);
break
;
case
proto
::
FP64
:
buf
=
tensor
->
mutable_data
<
double
>
(
cpu
);
break
;
case
proto
::
INT32
:
buf
=
tensor
->
mutable_data
<
int
>
(
cpu
);
break
;
case
proto
::
INT64
:
buf
=
tensor
->
mutable_data
<
int64_t
>
(
cpu
);
break
;
default:
PADDLE_THROW
(
"DataType %d not supported"
,
desc
.
data_type
());
}
is
.
read
(
static_cast
<
char
*>
(
buf
),
tensor
->
memory_size
());
}
}
}
// namespace framework
}
// namespace paddle
paddle/framework/tensor_util_test.cc
浏览文件 @
a8109cf0
...
...
@@ -230,5 +230,55 @@ TEST(CopyToVector, Tensor) {
#endif
}
TEST
(
Tensor
,
SerializeAndDeserialize
)
{
framework
::
Tensor
src_tensor
;
int
array
[
6
]
=
{
1
,
2
,
3
,
4
,
5
,
6
};
src_tensor
.
Resize
({
2
,
3
});
int
*
src_ptr
=
src_tensor
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
src_ptr
[
i
]
=
array
[
i
];
}
{
framework
::
Tensor
dst_tensor
;
auto
place
=
new
platform
::
CPUPlace
();
platform
::
CPUDeviceContext
cpu_ctx
(
*
place
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
src_tensor
,
cpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
int
*
dst_ptr
=
dst_tensor
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
ASSERT_EQ
(
dst_ptr
[
i
],
array
[
i
]);
}
delete
place
;
}
#ifdef PADDLE_WITH_CUDA
{
Tensor
gpu_tensor
;
gpu_tensor
.
Resize
({
2
,
3
});
Tensor
dst_tensor
;
auto
gpu_place
=
new
platform
::
CUDAPlace
();
platform
::
CUDADeviceContext
gpu_ctx
(
*
gpu_place
);
CopyFrom
(
src_tensor
,
*
gpu_place
,
gpu_ctx
,
&
gpu_tensor
);
std
::
ostringstream
oss
;
SerializeToStream
(
oss
,
gpu_tensor
,
gpu_ctx
);
std
::
istringstream
iss
(
oss
.
str
());
DeserializeFromStream
(
iss
,
&
dst_tensor
);
int
*
dst_ptr
=
dst_tensor
.
mutable_data
<
int
>
(
platform
::
CPUPlace
());
for
(
int
i
=
0
;
i
<
6
;
++
i
)
{
ASSERT_EQ
(
dst_ptr
[
i
],
array
[
i
]);
}
delete
gpu_place
;
}
#endif
}
}
// namespace framework
}
// namespace paddle
paddle/operators/load_op.cc
浏览文件 @
a8109cf0
...
...
@@ -38,7 +38,7 @@ class LoadOp : public framework::OperatorBase {
out_var_name
);
auto
*
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
DeserializeFromStream
(
fin
,
tensor
);
DeserializeFromStream
(
fin
,
tensor
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
...
...
paddle/operators/math/CMakeLists.txt
浏览文件 @
a8109cf0
...
...
@@ -9,9 +9,9 @@ if(WITH_GPU)
nv_library
(
cross_entropy SRCS cross_entropy.cc cross_entropy.cu DEPS device_context
)
nv_library
(
pooling SRCS pooling.cc pooling.cu DEPS device_context
)
nv_library
(
sequence_pooling SRCS sequence_pooling.cc sequence_pooling.cu DEPS device_context math_function
)
nv_library
(
vol2col SRCS vol2col.cc vol2col.cu DEPS device_context
)
nv_library
(
vol2col SRCS vol2col.cc vol2col.cu DEPS device_context
tensor
)
nv_library
(
context_project SRCS context_project.cc context_project.cu DEPS device_context math_function
)
nv_library
(
sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context
)
nv_library
(
sequence2batch SRCS sequence2batch.cc sequence2batch.cu DEPS device_context
tensor
)
nv_library
(
lstm_compute SRCS lstm_compute.cc lstm_compute.cu DEPS device_context activation_functions
)
nv_library
(
maxouting SRCS maxouting.cc maxouting.cu DEPS device_context
)
nv_library
(
unpooling SRCS unpooling.cc unpooling.cu DEPS device_context
)
...
...
@@ -23,9 +23,9 @@ else()
cc_library
(
cross_entropy SRCS cross_entropy.cc DEPS device_context
)
cc_library
(
pooling SRCS pooling.cc DEPS device_context
)
cc_library
(
sequence_pooling SRCS sequence_pooling.cc DEPS device_context math_function
)
cc_library
(
vol2col SRCS vol2col.cc DEPS device_context
)
cc_library
(
vol2col SRCS vol2col.cc DEPS device_context
tensor
)
cc_library
(
context_project SRCS context_project.cc DEPS device_context math_function
)
cc_library
(
sequence2batch SRCS sequence2batch.cc DEPS device_context
)
cc_library
(
sequence2batch SRCS sequence2batch.cc DEPS device_context
tensor
)
cc_library
(
lstm_compute SRCS lstm_compute.cc DEPS device_context activation_functions
)
cc_library
(
maxouting SRCS maxouting.cc DEPS device_context
)
cc_library
(
unpooling SRCS unpooling.cc DEPS device_context
)
...
...
paddle/platform/for_range.h
浏览文件 @
a8109cf0
...
...
@@ -62,7 +62,7 @@ struct ForRange<CUDADeviceContext> {
template
<
typename
Function
>
inline
void
operator
()(
Function
func
)
const
{
constexpr
size_
t
num_threads
=
1024
;
constexpr
in
t
num_threads
=
1024
;
int
block_size
=
limit_
<=
num_threads
?
limit_
:
num_threads
;
int
grid_size
=
(
limit_
+
num_threads
-
1
)
/
num_threads
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录