Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c6dcffc6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c6dcffc6
编写于
10月 24, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
lb. add debug output
上级
607080e8
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
626 addition
and
127 deletion
+626
-127
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+101
-1
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+18
-3
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
+127
-122
paddle/fluid/inference/api/demo_ci/inference_icnet.h
paddle/fluid/inference/api/demo_ci/inference_icnet.h
+21
-0
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
+123
-0
paddle/fluid/inference/api/demo_ci/test.cc
paddle/fluid/inference/api/demo_ci/test.cc
+99
-0
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
+105
-0
paddle/fluid/operators/batch_norm_op.cu.cc
paddle/fluid/operators/batch_norm_op.cu.cc
+21
-0
paddle/fluid/operators/load_combine_op.cc
paddle/fluid/operators/load_combine_op.cc
+11
-1
未找到文件。
paddle/fluid/framework/executor.cc
浏览文件 @
c6dcffc6
...
...
@@ -333,9 +333,49 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
return
result
;
}
// void CheckResult(const std::string op_type, ExecutorPrepareContext* ctx, Scope* local_scope) {
// VLOG(3) << "before checking result";
// auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
// std::vector<std::string> outputs;
// auto& block = ctx->prog_.Block(0);
// bool found = false;
// framework::OpDesc* myop = nullptr;
// for(auto& op : block.AllOps()) {
// if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") return;
// if (op->Type() == op_type) {
// found = true;
// myop = op;
// break;
// }
// }
// }
// if(!found) {
// VLOG(3) << "not found op!";
// return;
// }
// auto* op = myop;
// VLOG(3) << "start op output" << op->Type();
// for(auto var_name: op->OutputArgumentNames()) {
// auto* var = local_scope->Var(var_name);
// auto* var_desc = block.FindVar(var_name);
// if (var_desc->Persistable()) continue;
// auto* tensor = var->GetMutable<framework::LoDTensor>();
// framework::Tensor check;
// VLOG(3) << "before tensor copy";
// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
// VLOG(3) << "after tensor copy";
// float sum = .0;
// for(size_t i=0; i < check.numel(); ++i) {
// sum += check.data<float>()[i];
// }
// VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum;
// VLOG(3) << "after checking result";
// }
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
,
bool
keep_kids
)
{
VLOG
(
3
)
<<
"RunPreparedContext inside"
;
Scope
*
local_scope
=
scope
;
if
(
create_vars
)
{
if
(
create_local_scope
)
{
...
...
@@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
for
(
auto
&
op
:
ctx
->
ops_
)
{
op
->
Run
(
*
local_scope
,
place_
);
// CheckResult(op->Type(), ctx, local_scope);
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
<<
memory
::
memory_usage
(
place_
);
}
}
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
VLOG
(
3
)
<<
"start checking"
;
auto
&
dev_ctx
=
*
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
);
std
::
vector
<
std
::
string
>
outputs
;
auto
&
block
=
ctx
->
prog_
.
Block
(
0
);
for
(
auto
&
op
:
block
.
AllOps
())
{
if
(
op
->
Type
()
==
"load_combine"
||
op
->
Type
()
==
"fetch"
||
op
->
Type
()
==
"feed"
)
continue
;
// for(auto& real_op : ctx->ops_) {
// if(real_op->Type() == op->Type()) {
// VLOG(3) << real_op->Type() << " " <<place_ << " " << real_op->DebugStringEx(local_scope);
// }
// }
//VLOG(3) << "start op output" << op->Type();
for
(
auto
var_name
:
op
->
InputArgumentNames
())
{
auto
*
var
=
local_scope
->
Var
(
var_name
);
auto
*
var_desc
=
block
.
FindVar
(
var_name
);
if
(
var_desc
->
Persistable
())
continue
;
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
check
;
VLOG
(
3
)
<<
"before tensor copy"
;
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
VLOG
(
3
)
<<
"after tensor copy"
;
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
" input var "
<<
var_name
<<
" sum "
<<
sum
;
}
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
"input finished"
;
for
(
auto
var_name
:
op
->
OutputArgumentNames
())
{
auto
*
var
=
local_scope
->
Var
(
var_name
);
auto
*
var_desc
=
block
.
FindVar
(
var_name
);
if
(
var_desc
->
Persistable
())
continue
;
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
check
;
VLOG
(
3
)
<<
"before tensor copy"
;
if
(
op
->
Type
()
==
"batch_norm"
&&
platform
::
is_gpu_place
(
place_
))
{
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
" output var "
<<
var_name
<<
" "
<<
tensor
->
numel
();
tensor
->
mutable_data
<
float
>
(
place_
);
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
}
else
{
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
}
VLOG
(
3
)
<<
"after tensor copy"
;
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
" output var "
<<
var_name
<<
" sum "
<<
sum
;
}
}
VLOG
(
3
)
<<
"after checking result"
;
if
(
local_scope
!=
scope
)
{
scope
->
DeleteScope
(
local_scope
);
}
else
{
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
c6dcffc6
...
...
@@ -46,7 +46,7 @@ if(WITH_GPU)
endif
(
NOT WIN32
)
endif
()
include_directories
(
"
D
:/Paddle/"
)
include_directories
(
"
E
:/Paddle/"
)
include_directories
(
"
${
PADDLE_LIB
}
"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
...
...
@@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories
(
"
${
PADDLE_LIB
}
/paddle/fluid/inference"
)
# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
# add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library
(
${
DEMO_NAME
}
SHARED
${
DEMO_NAME
}
.cc
)
add_executable
(
real_data_icnet_tester real_data_icnet_tester.cc
)
add_executable
(
test test.cc
)
add_executable
(
thread_icnet_test thread_icnet_test.cc
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
...
...
@@ -89,7 +94,11 @@ endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if
(
WITH_STATIC_LIB
)
set
(
DEPS
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
# ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
# E:/Paddle/build/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/paddle/fluid/inference/api/Release/libpaddle_inference_api
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
else
()
set
(
DEPS
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
...
...
@@ -121,3 +130,9 @@ if(WITH_GPU)
endif
()
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
target_link_libraries
(
test
${
DEMO_NAME
}
)
target_link_libraries
(
thread_icnet_test
${
DEPS
}
)
target_link_libraries
(
real_data_icnet_tester
${
DEPS
}
)
target_compile_definitions
(
${
DEMO_NAME
}
PRIVATE
"API_DEFINITION"
)
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
浏览文件 @
c6dcffc6
...
...
@@ -19,139 +19,144 @@
#include <algorithm>
#include <vector>
#include <string>
#include <memory>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "inference_icnet.h"
namespace
paddle
{
std
::
string
DIRNAME
=
"./infer_model"
;
std
::
string
DATA
=
"./test-image.txt"
;
const
int
C
=
3
;
// image channel
const
int
H
=
449
;
// image height
const
int
W
=
581
;
// image width
// 数据格式
// "<space splitted floats as data>\t<space splitted ints as shape"
// 1. 存储为float32格式。
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
struct
Record
{
std
::
vector
<
float
>
data
;
std
::
vector
<
int32_t
>
shape
;
using
namespace
paddle
;
class
Predictor
{
private:
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
struct
Record
{
std
::
vector
<
float
>
data
;
std
::
vector
<
int32_t
>
shape
;
};
const
int
C
=
3
;
// image channel
const
int
H
=
449
;
// image height
const
int
W
=
581
;
// image width
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
std
::
vector
<
std
::
string
>*
pieces
)
{
pieces
->
clear
();
if
(
str
.
empty
())
{
return
;
}
size_t
pos
=
0
;
size_t
next
=
str
.
find
(
sep
,
pos
);
while
(
next
!=
std
::
string
::
npos
)
{
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
pos
=
next
+
1
;
next
=
str
.
find
(
sep
,
pos
);
}
if
(
!
str
.
substr
(
pos
).
empty
())
{
pieces
->
push_back
(
str
.
substr
(
pos
));
}
}
Record
ProcessALine
(
const
std
::
string
&
line
)
{
std
::
vector
<
std
::
string
>
columns
;
split
(
line
,
'\t'
,
&
columns
);
Record
record
;
std
::
vector
<
std
::
string
>
data_strs
;
split
(
columns
[
0
],
' '
,
&
data_strs
);
for
(
auto
&
d
:
data_strs
)
{
record
.
data
.
push_back
(
std
::
stof
(
d
));
}
std
::
vector
<
std
::
string
>
shape_strs
;
split
(
columns
[
1
],
' '
,
&
shape_strs
);
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
return
record
;
}
public:
Predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
)
{
NativeConfig
config
;
config
.
prog_file
=
prog_file
;
config
.
param_file
=
param_file
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory
;
config
.
use_gpu
=
use_gpu
;
config
.
device
=
device
;
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
}
void
predict
(
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
)
{
std
::
vector
<
float
>
data
;
int
intput_length
=
channel
*
height
*
width
*
batch_size
;
for
(
int
i
=
0
;
i
<
intput_length
;
i
++
)
{
data
.
push_back
(
*
((
float
*
)
input
+
i
));
}
// initialize the input data
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
channel
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
channel
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
// initialize the output data
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
*
output_length
=
(
int
)
outputs
[
0
].
data
.
length
();
std
::
memcpy
(
static_cast
<
void
*>
(
*
output
),
outputs
[
0
].
data
.
data
(),
outputs
[
0
].
data
.
length
());
int64_t
sum_out
=
0
;
for
(
int
i
=
0
;
i
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
int64_t
);
++
i
)
{
int64_t
item
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
())[
i
];
sum_out
+=
item
;
if
(
item
!=
0
)
{
std
::
cout
<<
item
<<
std
::
endl
;
}
}
std
::
cout
<<
"sum_out"
<<
sum_out
<<
std
::
endl
;
}
};
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
prog_file
=
DIRNAME
+
"/__model__"
;
config
.
param_file
=
DIRNAME
+
"/__params__"
;
config
.
fraction_of_gpu_memory
=
0.0
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
API_REFERENCE
void
*
init_predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
)
{
return
new
Predictor
(
prog_file
,
param_file
,
fraction_of_gpu_memory
,
use_gpu
,
device
);
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
std
::
vector
<
std
::
string
>*
pieces
)
{
pieces
->
clear
();
if
(
str
.
empty
())
{
return
;
}
size_t
pos
=
0
;
size_t
next
=
str
.
find
(
sep
,
pos
);
while
(
next
!=
std
::
string
::
npos
)
{
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
pos
=
next
+
1
;
next
=
str
.
find
(
sep
,
pos
);
}
if
(
!
str
.
substr
(
pos
).
empty
())
{
pieces
->
push_back
(
str
.
substr
(
pos
));
}
API_REFERENCE
void
predict
(
void
*
handle
,
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
)
{
assert
(
handle
!=
nullptr
);
((
Predictor
*
)
handle
)
->
predict
(
input
,
channel
,
height
,
width
,
output
,
output_length
,
batch_size
);
}
Record
ProcessALine
(
const
std
::
string
&
line
)
{
std
::
vector
<
std
::
string
>
columns
;
split
(
line
,
'\t'
,
&
columns
);
Record
record
;
std
::
vector
<
std
::
string
>
data_strs
;
split
(
columns
[
0
],
' '
,
&
data_strs
);
for
(
auto
&
d
:
data_strs
)
{
record
.
data
.
push_back
(
std
::
stof
(
d
));
}
std
::
vector
<
std
::
string
>
shape_strs
;
split
(
columns
[
1
],
' '
,
&
shape_strs
);
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
return
record
;
API_REFERENCE
void
destory_predictor
(
void
*
handle
)
{
if
(
handle
)
{
delete
handle
;
handle
=
nullptr
;
}
}
void
test_naive
(
int
batch_size
){
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
int
height
=
H
;
int
width
=
W
;
int
channel
=
C
;
int
num_sum
=
height
*
width
*
channel
*
batch_size
;
// 1. use fake data
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
num_sum
;
i
++
)
{
data
.
push_back
(
0.0
);
}
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
channel
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
channel
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
// 2. read data from file
// std::string line;
// std::ifstream file(DATA);
// std::getline(file, line);
// auto record = ProcessALine(line);
// file.close();
// PaddleTensor tensor;
// tensor.shape = record.shape;
// tensor.data =
// PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
auto
time1
=
time
();
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
std
::
cout
<<
"Pass "
<<
i
<<
"predict"
;
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
}
auto
time2
=
time
();
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
100.0
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
test_naive
(
1
<<
0
);
return
0
;
}
\ No newline at end of file
paddle/fluid/inference/api/demo_ci/inference_icnet.h
0 → 100644
浏览文件 @
c6dcffc6
#ifdef _WIN32
#ifdef inference_icnet_EXPORTS
#define API_REFERENCE extern "C" __declspec(dllexport)
#else
#define API_REFERENCE extern "C" __declspec(dllimport)
#endif
#else
#define API_REFERENCE
#endif
//API_REFERENCE void * init_predictor();
//API_REFERENCE void destory_predictor(void *handle);
//API_REFERENCE void predict(void *handle, int n);
API_REFERENCE
void
*
init_predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
);
API_REFERENCE
void
predict
(
void
*
handle
,
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
);
API_REFERENCE
void
destory_predictor
(
void
*
handle
);
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
0 → 100644
浏览文件 @
c6dcffc6
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
// DEFINE_string(dirname, "./lb",
// "Directory of the inference model.");
NativeConfig
GetConfig
()
{
NativeConfig
config
;
// config.model_dir = FLAGS_dirname;
config
.
prog_file
=
"lb/__model__"
;
config
.
param_file
=
"lb/__params__"
;
config
.
fraction_of_gpu_memory
=
0.8
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
){
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
int
height
=
449
;
int
width
=
581
;
// =============read file list =============
std
::
ifstream
infile
(
"new_file.list"
);
std
::
string
temp_s
;
std
::
vector
<
std
::
string
>
all_files
;
while
(
!
infile
.
eof
())
{
infile
>>
temp_s
;
all_files
.
push_back
(
temp_s
);
}
// size_t file_num = all_files.size();
infile
.
close
();
// =============read file list =============
for
(
size_t
f_k
=
0
;
f_k
<
1
;
f_k
++
)
{
std
::
ifstream
in_img
(
all_files
[
f_k
]);
std
::
cout
<<
all_files
[
f_k
]
<<
std
::
endl
;
float
temp_v
;
float
sum_n
=
0.0
;
std
::
vector
<
float
>
data
;
while
(
!
in_img
.
eof
())
{
in_img
>>
temp_v
;
data
.
push_back
(
float
(
temp_v
));
// std::cout << temp_v << " ";
sum_n
+=
temp_v
;
}
in_img
.
close
();
std
::
cout
<<
"sum: "
<<
sum_n
<<
std
::
endl
;
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
std
::
cout
<<
"start predict123:"
<<
std
::
endl
;
auto
time1
=
time
();
for
(
size_t
i
=
0
;
i
<
1
;
i
++
)
{
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
}
auto
time2
=
time
();
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
1000.0
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
int64_t
*
data_o
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
int64_t
sum_out
=
0
;
for
(
size_t
j
=
0
;
j
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
int64_t
);
++
j
)
{
ofresult
<<
std
::
to_string
(
data_o
[
j
])
<<
" "
;
sum_out
+=
data_o
[
j
];
}
std
::
cout
<<
"sum_out "
<<
sum_out
<<
std
::
endl
;
ofresult
<<
std
::
endl
;
ofresult
.
close
();
}
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
// google::ParseCommandLineFlags(&argc, &argv, true);
paddle
::
test_naive
(
1
<<
0
);
return
0
;
}
paddle/fluid/inference/api/demo_ci/test.cc
0 → 100644
浏览文件 @
c6dcffc6
#include<windows.h>
#include <fstream>
#include "inference_icnet.h"
#include <thread>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
using
namespace
std
;
template
<
class
Type
>
Type
stringToNum
(
const
string
&
str
)
{
istringstream
iss
(
str
);
Type
num
;
iss
>>
num
;
return
num
;
}
void
test_imgs
()
{
void
*
h
=
init_predictor
(
"./lb/__model__"
,
"./lb/__params__"
,
0.3
f
,
true
,
0
);
std
::
ifstream
infile
(
"new_file.list"
);
std
::
ofstream
ofs
(
"./1.png.output.txt"
);
std
::
string
temp_s
;
std
::
vector
<
std
::
string
>
all_files
;
while
(
!
infile
.
eof
())
{
infile
>>
temp_s
;
all_files
.
push_back
(
temp_s
);
}
// size_t file_num = all_files.size();
infile
.
close
();
// =============read file list =============
for
(
size_t
f_k
=
0
;
f_k
<
1
;
f_k
++
)
{
// std::string path = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\";
// std::ifstream in_img(path + all_files[f_k]);
std
::
string
mypath
=
"D:
\\
Paddle
\\
paddle
\\
fluid
\\
inference
\\
api
\\
demo_ci
\\
build
\\
Release
\\
1.png.txt"
;
std
::
cout
<<
"file"
<<
mypath
<<
std
::
endl
;
std
::
ifstream
in_img
(
mypath
);
//std::cout << path + all_files[f_k] << std::endl;
double
temp_v
;
const
int
size
=
3
*
449
*
581
*
1
;
float
*
data
=
new
float
[
size
];
std
::
string
value
;
if
(
!
in_img
.
is_open
())
{
cout
<<
"open failed"
<<
endl
;
}
double
sum_input
=
.0
;
for
(
auto
i
=
0
;
i
<
size
;
i
++
)
{
getline
(
in_img
,
value
,
'\n'
);
double
v
=
stringToNum
<
double
>
(
value
);
data
[
i
]
=
static_cast
<
float
>
(
v
);
sum_input
+=
v
;
}
std
::
cout
<<
"sum_input"
<<
sum_input
<<
std
::
endl
;
in_img
.
close
();
const
int
SIZE
=
449
*
581
*
1
;
int64_t
*
p
=
new
int64_t
[
SIZE
]();
int
out_size
=
0
;
//memset(p, 0, size);
predict
(
h
,
data
,
3
,
449
,
581
,
&
p
,
&
out_size
,
1
);
std
::
cout
<<
"out_size = "
<<
out_size
<<
std
::
endl
;
double
out_sum
=
.0
;
for
(
auto
i
=
0
;
i
<
out_size
/
sizeof
(
int64_t
);
i
++
)
{
out_sum
+=
p
[
i
];
ofs
<<
p
[
i
]
<<
" "
;
}
ofs
.
close
();
std
::
cout
<<
"inferece out sum"
<<
out_sum
<<
std
::
endl
;
delete
p
;
}
destory_predictor
(
h
);
}
int
main
(
int
argc
,
char
**
argv
)
{
//if (true) {
// std::thread t1(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// std::thread t2(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t3(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t4(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// t1.join();
// t2.join();
// //t3.join();
// //t4.join();
// //Sleep(1);
//}
test_imgs
();
return
0
;
}
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
0 → 100644
浏览文件 @
c6dcffc6
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
//#include <gtest/gtest.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include <thread> // NOLINT
#define ASSERT_TRUE(x) x
#define ASSERT_EQ(x, y) assert(x == y)
namespace
paddle
{
// DEFINE_string(dirname, "./LB_icnet_model",
// "Directory of the inference model.");
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
prog_file
=
"./dzh_lb/__model__"
;
config
.
param_file
=
"./dzh_lb/__params__"
;
config
.
fraction_of_gpu_memory
=
0.08
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
,
std
::
string
model_path
){
PaddlePredictor
*
pres
[
2
];
NativeConfig
config
=
GetConfig
();
// config.model_dir = model_path;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
predictor1
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
pres
[
0
]
=
predictor0
.
get
();
pres
[
1
]
=
predictor1
.
get
();
int
height
=
449
;
int
width
=
581
;
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
3
*
height
*
width
;
i
++
)
{
data
.
push_back
(
0
);
}
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
constexpr
int
num_jobs
=
5
;
// each job run 1 batch
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
auto
predictor
=
pres
[
tid
];
std
::
vector
<
PaddleTensor
>
local_outputs
;
for
(
size_t
i
=
0
;
i
<
1000
;
i
++
)
{
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
local_outputs
));
std
::
cout
<<
"run: "
<<
tid
<<
std
::
endl
;
}
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
threads
[
i
].
join
();
}
}
//TEST(alexnet, naive) {
// test_naive(1 << 0, "./trt_models/vgg19");
//}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
test_naive
(
1
<<
0
,
""
);
}
paddle/fluid/operators/batch_norm_op.cu.cc
浏览文件 @
c6dcffc6
...
...
@@ -141,6 +141,27 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
bias
->
template
data
<
BatchNormParamType
<
T
>
>
(),
est_mean
->
template
data
<
BatchNormParamType
<
T
>
>
(),
est_var
->
template
data
<
BatchNormParamType
<
T
>
>
(),
epsilon
));
VLOG
(
3
)
<<
"before tensor copy"
;
Tensor
mean_
,
var_
,
x_
,
y_
;
framework
::
TensorCopy
(
*
est_mean
,
platform
::
CPUPlace
(),
dev_ctx
,
&
mean_
);
framework
::
TensorCopy
(
*
est_var
,
platform
::
CPUPlace
(),
dev_ctx
,
&
var_
);
framework
::
TensorCopy
(
*
x
,
platform
::
CPUPlace
(),
dev_ctx
,
&
x_
);
framework
::
TensorCopy
(
*
y
,
platform
::
CPUPlace
(),
dev_ctx
,
&
y_
);
VLOG
(
3
)
<<
"after tensor copy"
;
auto
check_tensor
=
[
&
](
const
Tensor
&
check
)
{
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
return
sum
;
};
VLOG
(
3
)
<<
"BatchNormKernel"
;
VLOG
(
3
)
<<
"mean"
<<
check_tensor
(
mean_
);
VLOG
(
3
)
<<
"var"
<<
check_tensor
(
var_
);
VLOG
(
3
)
<<
"x"
<<
check_tensor
(
x_
);
VLOG
(
3
)
<<
"y"
<<
check_tensor
(
y_
);
}
else
{
// Run training mode.
// obtain running mean and running inv var, and see if we need to
...
...
paddle/fluid/operators/load_combine_op.cc
浏览文件 @
c6dcffc6
...
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <vector>
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
...
...
@@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
load_as_fp16
=
Attr
<
bool
>
(
"load_as_fp16"
);
std
::
ifstream
fin
(
filename
,
std
::
ios_base
::
in
|
std
::
ios_base
::
binary
);
//std::ifstream fin(filename, std::ios_base::in);
PADDLE_ENFORCE
(
!
fin
.
bad
(),
"Cannot open file %s for load_combine op"
,
filename
);
...
...
@@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
for
(
size_t
i
=
0
;
i
<
out_var_names
.
size
();
i
++
)
{
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
];
VLOG
(
3
)
<<
"load
variable
"
<<
out_var_names
[
i
];
auto
*
out_var
=
scope
.
FindVar
(
out_var_names
[
i
]);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output variable %s cannot be found"
,
...
...
@@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase {
// Get data from fin to tensor
DeserializeFromStream
(
fin
,
tensor
,
dev_ctx
);
VLOG
(
3
)
<<
"after deserialization"
;
framework
::
Tensor
check
;
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
VLOG
(
3
)
<<
"sum result"
<<
sum
;
auto
in_dtype
=
framework
::
ToDataType
(
tensor
->
type
());
auto
out_dtype
=
load_as_fp16
?
framework
::
proto
::
VarType
::
FP16
:
in_dtype
;
...
...
@@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase {
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
set_lod
(
fp16_tensor
.
lod
());
tensor
->
ShareDataWith
(
fp16_tensor
);
}
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
]
<<
" finished"
;
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录