Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
a009272e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
a009272e
编写于
6月 20, 2018
作者:
Y
Yan Chunwei
提交者:
GitHub
6月 20, 2018
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
inference/unify output buffer management (#11569)
上级
5f0c780a
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
121 addition
and
51 deletion
+121
-51
paddle/contrib/inference/demo/simple_on_word2vec.cc
paddle/contrib/inference/demo/simple_on_word2vec.cc
+9
-13
paddle/contrib/inference/paddle_inference_api.cc
paddle/contrib/inference/paddle_inference_api.cc
+50
-0
paddle/contrib/inference/paddle_inference_api.h
paddle/contrib/inference/paddle_inference_api.h
+33
-5
paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
...e/contrib/inference/paddle_inference_api_anakin_engine.cc
+5
-2
paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
...ib/inference/paddle_inference_api_anakin_engine_tester.cc
+7
-9
paddle/contrib/inference/paddle_inference_api_impl.cc
paddle/contrib/inference/paddle_inference_api_impl.cc
+7
-6
paddle/contrib/inference/test_paddle_inference_api_impl.cc
paddle/contrib/inference/test_paddle_inference_api_impl.cc
+10
-16
未找到文件。
paddle/contrib/inference/demo/simple_on_word2vec.cc
浏览文件 @
a009272e
...
...
@@ -40,10 +40,9 @@ void Main(bool use_gpu) {
//# 2. Prepare input.
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
INT64
};
// For simplicity, we set all the slots with the same data.
...
...
@@ -55,14 +54,12 @@ void Main(bool use_gpu) {
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
()
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
()
)[
i
];
}
// TODO(Superjomn): this is should be free automatically
free
(
outputs
[
0
].
data
.
data
);
}
}
...
...
@@ -86,10 +83,9 @@ void MainThreads(int num_threads, bool use_gpu) {
for
(
int
batch_id
=
0
;
batch_id
<
num_batches
;
++
batch_id
)
{
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
INT64
};
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
...
...
@@ -99,13 +95,13 @@ void MainThreads(int num_threads, bool use_gpu) {
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
()
)[
i
];
}
free
(
outputs
[
0
].
data
.
data
);
}
});
}
...
...
paddle/contrib/inference/paddle_inference_api.cc
浏览文件 @
a009272e
...
...
@@ -13,3 +13,53 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/contrib/inference/paddle_inference_api.h"
namespace
paddle
{
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
:
data_
(
other
.
data_
),
length_
(
other
.
length_
),
memory_owned_
(
other
.
memory_owned_
)
{
other
.
memory_owned_
=
false
;
other
.
data_
=
nullptr
;
other
.
length_
=
0
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
// only the buffer with external memory can be copied
assert
(
!
other
.
memory_owned_
);
data_
=
other
.
data_
;
length_
=
other
.
length_
;
memory_owned_
=
other
.
memory_owned_
;
return
*
this
;
}
void
PaddleBuf
::
Resize
(
size_t
length
)
{
// Only the owned memory can be reset, the external memory can't be changed.
if
(
length_
==
length
)
return
;
assert
(
memory_owned_
);
Free
();
data_
=
new
char
[
length
];
length_
=
length
;
memory_owned_
=
true
;
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
Free
();
memory_owned_
=
false
;
data_
=
data
;
length_
=
length
;
}
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
assert
(
length_
>
0
);
delete
static_cast
<
char
*>
(
data_
);
data_
=
nullptr
;
length_
=
0
;
}
}
}
// namespace paddle
\ No newline at end of file
paddle/contrib/inference/paddle_inference_api.h
浏览文件 @
a009272e
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#pragma once
#include <cassert>
#include <memory>
#include <string>
#include <vector>
...
...
@@ -32,12 +33,38 @@ enum PaddleDType {
INT64
,
};
struct
PaddleBuf
{
void
*
data
;
// pointer to the data memory.
size_t
length
;
// number of memory bytes.
class
PaddleBuf
{
public:
PaddleBuf
()
=
default
;
PaddleBuf
(
PaddleBuf
&&
other
);
// Copy only available when memory is managed externally.
explicit
PaddleBuf
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
const
PaddleBuf
&
);
// Do not own the memory.
PaddleBuf
(
void
*
data
,
size_t
length
)
:
data_
(
data
),
length_
(
length
),
memory_owned_
{
false
}
{}
// Own memory.
PaddleBuf
(
size_t
length
)
:
data_
(
new
char
[
length
]),
length_
(
length
),
memory_owned_
(
true
)
{}
// Resize to `length` bytes.
void
Resize
(
size_t
length
);
// Reset to external memory.
void
Reset
(
void
*
data
,
size_t
length
);
bool
empty
()
const
{
return
length_
==
0
;
}
void
*
data
()
const
{
return
data_
;
}
size_t
length
()
const
{
return
length_
;
}
~
PaddleBuf
()
{
Free
();
}
private:
void
Free
();
void
*
data_
{
nullptr
};
// pointer to the data memory.
size_t
length_
{
0
};
// number of memory bytes.
bool
memory_owned_
{
true
};
};
struct
PaddleTensor
{
PaddleTensor
()
=
default
;
std
::
string
name
;
// variable name.
std
::
vector
<
int
>
shape
;
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
...
...
@@ -67,8 +94,9 @@ class PaddlePredictor {
// Predict an record.
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be alive until Run returns. caller should be
// responsible for releasing the memory of `output_data`.
// `inputs`. `inputs` should be available until Run returns. Caller should be
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
=
0
;
...
...
paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
浏览文件 @
a009272e
...
...
@@ -48,7 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run(
auto
d_tensor_in_p
=
executor_
.
get_in
(
input
.
name
);
float
*
d_data_p
=
d_tensor_in_p
->
mutable_data
();
if
(
cudaMemcpy
(
d_data_p
,
static_cast
<
float
*>
(
input
.
data
.
data
),
static_cast
<
float
*>
(
input
.
data
.
data
()
),
d_tensor_in_p
->
valid_size
()
*
sizeof
(
float
),
cudaMemcpyHostToDevice
)
!=
0
)
{
LOG
(
ERROR
)
<<
"copy data from CPU to GPU error"
;
...
...
@@ -65,8 +65,11 @@ bool PaddleInferenceAnakinPredictor::Run(
for
(
auto
&
output
:
*
output_data
)
{
auto
*
tensor
=
executor_
.
get_out
(
output
.
name
);
output
.
shape
=
tensor
->
shape
();
if
(
output
.
data
.
length
()
<
tensor
->
valid_size
()
*
sizeof
(
float
))
{
output
.
data
.
Resize
(
tensor
->
valid_size
()
*
sizeof
(
float
));
}
// Copy data from GPU -> CPU
if
(
cudaMemcpy
(
output
.
data
.
data
,
if
(
cudaMemcpy
(
output
.
data
.
data
()
,
tensor
->
mutable_data
(),
tensor
->
valid_size
()
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
)
!=
0
)
{
...
...
paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
浏览文件 @
a009272e
...
...
@@ -37,28 +37,26 @@ TEST(inference, anakin) {
float
data
[
1
*
3
*
224
*
224
]
=
{
1.0
f
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
"input_0"
,
.
shape
=
std
::
vector
<
int
>
({
1
,
3
,
224
,
224
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
FLOAT32
};
// For simplicity, we set all the slots with the same data.
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
;
paddle_tensor_feeds
.
emplace_back
(
std
::
move
(
tensor
));
float
data_out
[
1000
];
PaddleBuf
buf_out
{.
data
=
data_out
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor_out
{.
name
=
"prob_out"
,
.
shape
=
std
::
vector
<
int
>
({
1000
,
1
}),
.
data
=
buf_out
,
.
data
=
PaddleBuf
()
,
.
dtype
=
PaddleDType
::
FLOAT32
};
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
std
::
vector
<
PaddleTensor
>
outputs
;
outputs
.
emplace_back
(
std
::
move
(
tensor_out
));
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
1000
;
++
j
)
{
LOG
(
INFO
)
<<
"output["
<<
j
<<
"]: "
<<
data_o
[
j
];
}
...
...
paddle/contrib/inference/paddle_inference_api_impl.cc
浏览文件 @
a009272e
...
...
@@ -178,8 +178,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
,
inputs
[
i
].
data
.
length
);
inputs
[
i
].
data
.
data
()
,
inputs
[
i
].
data
.
length
()
);
feeds
->
push_back
(
input
);
}
return
true
;
...
...
@@ -241,10 +241,11 @@ bool NativePaddlePredictor::GetFetch(
}
outputs
->
at
(
i
).
shape
=
shape
;
outputs
->
at
(
i
).
data
.
length
=
sizeof
(
float
)
*
data
.
size
();
outputs
->
at
(
i
).
data
.
data
=
malloc
(
outputs
->
at
(
i
).
data
.
length
);
std
::
memcpy
(
outputs
->
at
(
i
).
data
.
data
,
data
.
data
(),
outputs
->
at
(
i
).
data
.
length
);
auto
&
buffer
=
outputs
->
at
(
i
).
data
;
if
(
buffer
.
empty
()
||
buffer
.
length
()
<
sizeof
(
float
)
*
data
.
size
())
{
buffer
.
Resize
(
sizeof
(
float
)
*
data
.
size
());
}
std
::
memcpy
(
buffer
.
data
(),
data
.
data
(),
buffer
.
length
());
outputs
->
at
(
i
).
dtype
=
PaddleDType
::
FLOAT32
;
// TODO(panyx0718): support other types? fill tensor name? avoid a copy.
}
...
...
paddle/contrib/inference/test_paddle_inference_api_impl.cc
浏览文件 @
a009272e
...
...
@@ -27,13 +27,12 @@ namespace paddle {
PaddleTensor
LodTensorToPaddleTensor
(
framework
::
LoDTensor
*
t
)
{
PaddleTensor
pt
;
pt
.
data
.
data
=
t
->
data
<
void
>
();
if
(
t
->
type
()
==
typeid
(
int64_t
))
{
pt
.
data
.
length
=
t
->
numel
()
*
sizeof
(
int64_t
);
pt
.
data
.
Reset
(
t
->
data
<
void
>
(),
t
->
numel
()
*
sizeof
(
int64_t
)
);
pt
.
dtype
=
PaddleDType
::
INT64
;
}
else
if
(
t
->
type
()
==
typeid
(
float
))
{
pt
.
data
.
length
=
t
->
numel
()
*
sizeof
(
float
);
pt
.
data
.
Reset
(
t
->
data
<
void
>
(),
t
->
numel
()
*
sizeof
(
float
)
);
pt
.
dtype
=
PaddleDType
::
FLOAT32
;
}
else
{
LOG
(
FATAL
)
<<
"unsupported type."
;
...
...
@@ -79,8 +78,8 @@ void MainWord2Vec(bool use_gpu) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
size_t
len
=
outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
...
...
@@ -103,8 +102,6 @@ void MainWord2Vec(bool use_gpu) {
EXPECT_LT
(
lod_data
[
i
]
-
data
[
i
],
1e-3
);
EXPECT_GT
(
lod_data
[
i
]
-
data
[
i
],
-
1e-3
);
}
free
(
outputs
[
0
].
data
.
data
);
}
void
MainImageClassification
(
bool
use_gpu
)
{
...
...
@@ -143,13 +140,12 @@ void MainImageClassification(bool use_gpu) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
size_t
len
=
outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
float
*
lod_data
=
output1
.
data
<
float
>
();
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
EXPECT_NEAR
(
lod_data
[
j
],
data
[
j
],
1e-3
);
}
free
(
data
);
}
void
MainThreadsWord2Vec
(
bool
use_gpu
)
{
...
...
@@ -192,8 +188,8 @@ void MainThreadsWord2Vec(bool use_gpu) {
// check outputs range
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
...
...
@@ -205,7 +201,6 @@ void MainThreadsWord2Vec(bool use_gpu) {
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
...
...
@@ -251,14 +246,13 @@ void MainThreadsImageClassification(bool use_gpu) {
// check outputs correctness
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
()
);
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录