Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
c6dcffc6
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c6dcffc6
编写于
10月 24, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
lb. add debug output
上级
607080e8
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
626 addition
and
127 deletion
+626
-127
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+101
-1
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+18
-3
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
+127
-122
paddle/fluid/inference/api/demo_ci/inference_icnet.h
paddle/fluid/inference/api/demo_ci/inference_icnet.h
+21
-0
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
+123
-0
paddle/fluid/inference/api/demo_ci/test.cc
paddle/fluid/inference/api/demo_ci/test.cc
+99
-0
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
+105
-0
paddle/fluid/operators/batch_norm_op.cu.cc
paddle/fluid/operators/batch_norm_op.cu.cc
+21
-0
paddle/fluid/operators/load_combine_op.cc
paddle/fluid/operators/load_combine_op.cc
+11
-1
未找到文件。
paddle/fluid/framework/executor.cc
浏览文件 @
c6dcffc6
...
@@ -333,9 +333,49 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
...
@@ -333,9 +333,49 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
return
result
;
return
result
;
}
}
// void CheckResult(const std::string op_type, ExecutorPrepareContext* ctx, Scope* local_scope) {
// VLOG(3) << "before checking result";
// auto& dev_ctx = *platform::DeviceContextPool::Instance().Get(place_);
// std::vector<std::string> outputs;
// auto& block = ctx->prog_.Block(0);
// bool found = false;
// framework::OpDesc* myop = nullptr;
// for(auto& op : block.AllOps()) {
// if(op->Type() == "load_combine" || op->Type() == "fetch" || op->Type() == "feed") return;
// if (op->Type() == op_type) {
// found = true;
// myop = op;
// break;
// }
// }
// }
// if(!found) {
// VLOG(3) << "not found op!";
// return;
// }
// auto* op = myop;
// VLOG(3) << "start op output" << op->Type();
// for(auto var_name: op->OutputArgumentNames()) {
// auto* var = local_scope->Var(var_name);
// auto* var_desc = block.FindVar(var_name);
// if (var_desc->Persistable()) continue;
// auto* tensor = var->GetMutable<framework::LoDTensor>();
// framework::Tensor check;
// VLOG(3) << "before tensor copy";
// framework::TensorCopy(*tensor, platform::CPUPlace(), dev_ctx, &check);
// VLOG(3) << "after tensor copy";
// float sum = .0;
// for(size_t i=0; i < check.numel(); ++i) {
// sum += check.data<float>()[i];
// }
// VLOG(3) << "op " << op->Type() << " output var " << var_name << " sum " << sum;
// VLOG(3) << "after checking result";
// }
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
,
bool
create_local_scope
,
bool
create_vars
,
bool
keep_kids
)
{
bool
keep_kids
)
{
VLOG
(
3
)
<<
"RunPreparedContext inside"
;
Scope
*
local_scope
=
scope
;
Scope
*
local_scope
=
scope
;
if
(
create_vars
)
{
if
(
create_vars
)
{
if
(
create_local_scope
)
{
if
(
create_local_scope
)
{
...
@@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
...
@@ -346,13 +386,73 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
for
(
auto
&
op
:
ctx
->
ops_
)
{
for
(
auto
&
op
:
ctx
->
ops_
)
{
op
->
Run
(
*
local_scope
,
place_
);
op
->
Run
(
*
local_scope
,
place_
);
// CheckResult(op->Type(), ctx, local_scope);
if
(
FLAGS_benchmark
)
{
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
VLOG
(
2
)
<<
"Memory used after operator "
+
op
->
Type
()
+
" running: "
<<
memory
::
memory_usage
(
place_
);
<<
memory
::
memory_usage
(
place_
);
}
}
}
}
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
VLOG
(
3
)
<<
"start checking"
;
auto
&
dev_ctx
=
*
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
);
std
::
vector
<
std
::
string
>
outputs
;
auto
&
block
=
ctx
->
prog_
.
Block
(
0
);
for
(
auto
&
op
:
block
.
AllOps
())
{
if
(
op
->
Type
()
==
"load_combine"
||
op
->
Type
()
==
"fetch"
||
op
->
Type
()
==
"feed"
)
continue
;
// for(auto& real_op : ctx->ops_) {
// if(real_op->Type() == op->Type()) {
// VLOG(3) << real_op->Type() << " " <<place_ << " " << real_op->DebugStringEx(local_scope);
// }
// }
//VLOG(3) << "start op output" << op->Type();
for
(
auto
var_name
:
op
->
InputArgumentNames
())
{
auto
*
var
=
local_scope
->
Var
(
var_name
);
auto
*
var_desc
=
block
.
FindVar
(
var_name
);
if
(
var_desc
->
Persistable
())
continue
;
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
check
;
VLOG
(
3
)
<<
"before tensor copy"
;
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
VLOG
(
3
)
<<
"after tensor copy"
;
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
" input var "
<<
var_name
<<
" sum "
<<
sum
;
}
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
"input finished"
;
for
(
auto
var_name
:
op
->
OutputArgumentNames
())
{
auto
*
var
=
local_scope
->
Var
(
var_name
);
auto
*
var_desc
=
block
.
FindVar
(
var_name
);
if
(
var_desc
->
Persistable
())
continue
;
auto
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
Tensor
check
;
VLOG
(
3
)
<<
"before tensor copy"
;
if
(
op
->
Type
()
==
"batch_norm"
&&
platform
::
is_gpu_place
(
place_
))
{
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
" output var "
<<
var_name
<<
" "
<<
tensor
->
numel
();
tensor
->
mutable_data
<
float
>
(
place_
);
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
}
else
{
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
}
VLOG
(
3
)
<<
"after tensor copy"
;
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
VLOG
(
3
)
<<
"op "
<<
op
->
Type
()
<<
" output var "
<<
var_name
<<
" sum "
<<
sum
;
}
}
VLOG
(
3
)
<<
"after checking result"
;
if
(
local_scope
!=
scope
)
{
if
(
local_scope
!=
scope
)
{
scope
->
DeleteScope
(
local_scope
);
scope
->
DeleteScope
(
local_scope
);
}
else
{
}
else
{
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
c6dcffc6
...
@@ -46,7 +46,7 @@ if(WITH_GPU)
...
@@ -46,7 +46,7 @@ if(WITH_GPU)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
endif
()
endif
()
include_directories
(
"
D
:/Paddle/"
)
include_directories
(
"
E
:/Paddle/"
)
include_directories
(
"
${
PADDLE_LIB
}
"
)
include_directories
(
"
${
PADDLE_LIB
}
"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
...
@@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
...
@@ -72,7 +72,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories
(
"
${
PADDLE_LIB
}
/paddle/fluid/inference"
)
link_directories
(
"
${
PADDLE_LIB
}
/paddle/fluid/inference"
)
# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
# add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
add_library
(
${
DEMO_NAME
}
SHARED
${
DEMO_NAME
}
.cc
)
add_executable
(
real_data_icnet_tester real_data_icnet_tester.cc
)
add_executable
(
test test.cc
)
add_executable
(
thread_icnet_test thread_icnet_test.cc
)
if
(
WITH_MKL
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
...
@@ -89,7 +94,11 @@ endif()
...
@@ -89,7 +94,11 @@ endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if
(
WITH_STATIC_LIB
)
if
(
WITH_STATIC_LIB
)
set
(
DEPS
set
(
DEPS
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
# ${PADDLE_LIB}/paddle/fluid/inference/libpaddle_fluid${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/fluid_install_dir/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
# E:/Paddle/build/paddle/fluid/inference/api/Release/libpaddle_inference_api${CMAKE_STATIC_LIBRARY_SUFFIX}
D:/Paddle/bazel-dll/paddle/fluid/inference/api/Release/libpaddle_inference_api
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
else
()
else
()
set
(
DEPS
set
(
DEPS
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
...
@@ -121,3 +130,9 @@ if(WITH_GPU)
...
@@ -121,3 +130,9 @@ if(WITH_GPU)
endif
()
endif
()
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
target_link_libraries
(
test
${
DEMO_NAME
}
)
target_link_libraries
(
thread_icnet_test
${
DEPS
}
)
target_link_libraries
(
real_data_icnet_tester
${
DEPS
}
)
target_compile_definitions
(
${
DEMO_NAME
}
PRIVATE
"API_DEFINITION"
)
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
浏览文件 @
c6dcffc6
...
@@ -19,139 +19,144 @@
...
@@ -19,139 +19,144 @@
#include <algorithm>
#include <algorithm>
#include <vector>
#include <vector>
#include <string>
#include <string>
#include <memory>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "inference_icnet.h"
namespace
paddle
{
std
::
string
DIRNAME
=
"./infer_model"
;
std
::
string
DATA
=
"./test-image.txt"
;
const
int
C
=
3
;
// image channel
const
int
H
=
449
;
// image height
const
int
W
=
581
;
// image width
// 数据格式
// 数据格式
// "<space splitted floats as data>\t<space splitted ints as shape"
// "<space splitted floats as data>\t<space splitted ints as shape"
// 1. 存储为float32格式。
// 1. 存储为float32格式。
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
using
namespace
paddle
;
struct
Record
{
class
Predictor
{
std
::
vector
<
float
>
data
;
private:
std
::
vector
<
int32_t
>
shape
;
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
struct
Record
{
std
::
vector
<
float
>
data
;
std
::
vector
<
int32_t
>
shape
;
};
const
int
C
=
3
;
// image channel
const
int
H
=
449
;
// image height
const
int
W
=
581
;
// image width
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
std
::
vector
<
std
::
string
>*
pieces
)
{
pieces
->
clear
();
if
(
str
.
empty
())
{
return
;
}
size_t
pos
=
0
;
size_t
next
=
str
.
find
(
sep
,
pos
);
while
(
next
!=
std
::
string
::
npos
)
{
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
pos
=
next
+
1
;
next
=
str
.
find
(
sep
,
pos
);
}
if
(
!
str
.
substr
(
pos
).
empty
())
{
pieces
->
push_back
(
str
.
substr
(
pos
));
}
}
Record
ProcessALine
(
const
std
::
string
&
line
)
{
std
::
vector
<
std
::
string
>
columns
;
split
(
line
,
'\t'
,
&
columns
);
Record
record
;
std
::
vector
<
std
::
string
>
data_strs
;
split
(
columns
[
0
],
' '
,
&
data_strs
);
for
(
auto
&
d
:
data_strs
)
{
record
.
data
.
push_back
(
std
::
stof
(
d
));
}
std
::
vector
<
std
::
string
>
shape_strs
;
split
(
columns
[
1
],
' '
,
&
shape_strs
);
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
return
record
;
}
public:
Predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
)
{
NativeConfig
config
;
config
.
prog_file
=
prog_file
;
config
.
param_file
=
param_file
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory
;
config
.
use_gpu
=
use_gpu
;
config
.
device
=
device
;
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
}
void
predict
(
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
)
{
std
::
vector
<
float
>
data
;
int
intput_length
=
channel
*
height
*
width
*
batch_size
;
for
(
int
i
=
0
;
i
<
intput_length
;
i
++
)
{
data
.
push_back
(
*
((
float
*
)
input
+
i
));
}
// initialize the input data
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
channel
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
channel
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
// initialize the output data
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
*
output_length
=
(
int
)
outputs
[
0
].
data
.
length
();
std
::
memcpy
(
static_cast
<
void
*>
(
*
output
),
outputs
[
0
].
data
.
data
(),
outputs
[
0
].
data
.
length
());
int64_t
sum_out
=
0
;
for
(
int
i
=
0
;
i
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
int64_t
);
++
i
)
{
int64_t
item
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
())[
i
];
sum_out
+=
item
;
if
(
item
!=
0
)
{
std
::
cout
<<
item
<<
std
::
endl
;
}
}
std
::
cout
<<
"sum_out"
<<
sum_out
<<
std
::
endl
;
}
};
};
NativeConfig
GetConfig
()
{
API_REFERENCE
void
*
init_predictor
(
const
char
*
prog_file
,
NativeConfig
config
;
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
config
.
prog_file
=
DIRNAME
+
"/__model__"
;
const
bool
use_gpu
,
const
int
device
)
{
config
.
param_file
=
DIRNAME
+
"/__params__"
;
return
new
Predictor
(
prog_file
,
param_file
,
fraction_of_gpu_memory
,
use_gpu
,
device
);
config
.
fraction_of_gpu_memory
=
0.0
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
API_REFERENCE
void
predict
(
void
*
handle
,
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
std
::
vector
<
std
::
string
>*
pieces
)
{
int64_t
**
output
,
int
*
output_length
,
int
batch_size
)
{
pieces
->
clear
();
assert
(
handle
!=
nullptr
);
if
(
str
.
empty
())
{
((
Predictor
*
)
handle
)
->
predict
(
input
,
channel
,
height
,
width
,
output
,
output_length
,
batch_size
);
return
;
}
size_t
pos
=
0
;
size_t
next
=
str
.
find
(
sep
,
pos
);
while
(
next
!=
std
::
string
::
npos
)
{
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
pos
=
next
+
1
;
next
=
str
.
find
(
sep
,
pos
);
}
if
(
!
str
.
substr
(
pos
).
empty
())
{
pieces
->
push_back
(
str
.
substr
(
pos
));
}
}
}
Record
ProcessALine
(
const
std
::
string
&
line
)
{
API_REFERENCE
void
destory_predictor
(
void
*
handle
)
{
std
::
vector
<
std
::
string
>
columns
;
if
(
handle
)
{
split
(
line
,
'\t'
,
&
columns
);
delete
handle
;
handle
=
nullptr
;
Record
record
;
}
std
::
vector
<
std
::
string
>
data_strs
;
split
(
columns
[
0
],
' '
,
&
data_strs
);
for
(
auto
&
d
:
data_strs
)
{
record
.
data
.
push_back
(
std
::
stof
(
d
));
}
std
::
vector
<
std
::
string
>
shape_strs
;
split
(
columns
[
1
],
' '
,
&
shape_strs
);
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
return
record
;
}
}
void
test_naive
(
int
batch_size
){
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
int
height
=
H
;
int
width
=
W
;
int
channel
=
C
;
int
num_sum
=
height
*
width
*
channel
*
batch_size
;
// 1. use fake data
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
num_sum
;
i
++
)
{
data
.
push_back
(
0.0
);
}
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
channel
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
channel
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
// 2. read data from file
// std::string line;
// std::ifstream file(DATA);
// std::getline(file, line);
// auto record = ProcessALine(line);
// file.close();
// PaddleTensor tensor;
// tensor.shape = record.shape;
// tensor.data =
// PaddleBuf(record.data.data(), record.data.size() * sizeof(float));
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
auto
time1
=
time
();
for
(
size_t
i
=
0
;
i
<
2
;
i
++
)
{
std
::
cout
<<
"Pass "
<<
i
<<
"predict"
;
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
}
auto
time2
=
time
();
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
100.0
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
test_naive
(
1
<<
0
);
return
0
;
}
\ No newline at end of file
paddle/fluid/inference/api/demo_ci/inference_icnet.h
0 → 100644
浏览文件 @
c6dcffc6
#ifdef _WIN32
#ifdef inference_icnet_EXPORTS
#define API_REFERENCE extern "C" __declspec(dllexport)
#else
#define API_REFERENCE extern "C" __declspec(dllimport)
#endif
#else
#define API_REFERENCE
#endif
//API_REFERENCE void * init_predictor();
//API_REFERENCE void destory_predictor(void *handle);
//API_REFERENCE void predict(void *handle, int n);
API_REFERENCE
void
*
init_predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
);
API_REFERENCE
void
predict
(
void
*
handle
,
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
);
API_REFERENCE
void
destory_predictor
(
void
*
handle
);
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
0 → 100644
浏览文件 @
c6dcffc6
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
namespace
paddle
{
// DEFINE_string(dirname, "./lb",
// "Directory of the inference model.");
NativeConfig
GetConfig
()
{
NativeConfig
config
;
// config.model_dir = FLAGS_dirname;
config
.
prog_file
=
"lb/__model__"
;
config
.
param_file
=
"lb/__params__"
;
config
.
fraction_of_gpu_memory
=
0.8
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
){
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
int
height
=
449
;
int
width
=
581
;
// =============read file list =============
std
::
ifstream
infile
(
"new_file.list"
);
std
::
string
temp_s
;
std
::
vector
<
std
::
string
>
all_files
;
while
(
!
infile
.
eof
())
{
infile
>>
temp_s
;
all_files
.
push_back
(
temp_s
);
}
// size_t file_num = all_files.size();
infile
.
close
();
// =============read file list =============
for
(
size_t
f_k
=
0
;
f_k
<
1
;
f_k
++
)
{
std
::
ifstream
in_img
(
all_files
[
f_k
]);
std
::
cout
<<
all_files
[
f_k
]
<<
std
::
endl
;
float
temp_v
;
float
sum_n
=
0.0
;
std
::
vector
<
float
>
data
;
while
(
!
in_img
.
eof
())
{
in_img
>>
temp_v
;
data
.
push_back
(
float
(
temp_v
));
// std::cout << temp_v << " ";
sum_n
+=
temp_v
;
}
in_img
.
close
();
std
::
cout
<<
"sum: "
<<
sum_n
<<
std
::
endl
;
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
std
::
cout
<<
"start predict123:"
<<
std
::
endl
;
auto
time1
=
time
();
for
(
size_t
i
=
0
;
i
<
1
;
i
++
)
{
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
}
auto
time2
=
time
();
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
1000.0
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
int64_t
*
data_o
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
int64_t
sum_out
=
0
;
for
(
size_t
j
=
0
;
j
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
int64_t
);
++
j
)
{
ofresult
<<
std
::
to_string
(
data_o
[
j
])
<<
" "
;
sum_out
+=
data_o
[
j
];
}
std
::
cout
<<
"sum_out "
<<
sum_out
<<
std
::
endl
;
ofresult
<<
std
::
endl
;
ofresult
.
close
();
}
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
// google::ParseCommandLineFlags(&argc, &argv, true);
paddle
::
test_naive
(
1
<<
0
);
return
0
;
}
paddle/fluid/inference/api/demo_ci/test.cc
0 → 100644
浏览文件 @
c6dcffc6
#include<windows.h>
#include <fstream>
#include "inference_icnet.h"
#include <thread>
#include <vector>
#include <string>
#include <iostream>
#include <sstream>
using
namespace
std
;
template
<
class
Type
>
Type
stringToNum
(
const
string
&
str
)
{
istringstream
iss
(
str
);
Type
num
;
iss
>>
num
;
return
num
;
}
void
test_imgs
()
{
void
*
h
=
init_predictor
(
"./lb/__model__"
,
"./lb/__params__"
,
0.3
f
,
true
,
0
);
std
::
ifstream
infile
(
"new_file.list"
);
std
::
ofstream
ofs
(
"./1.png.output.txt"
);
std
::
string
temp_s
;
std
::
vector
<
std
::
string
>
all_files
;
while
(
!
infile
.
eof
())
{
infile
>>
temp_s
;
all_files
.
push_back
(
temp_s
);
}
// size_t file_num = all_files.size();
infile
.
close
();
// =============read file list =============
for
(
size_t
f_k
=
0
;
f_k
<
1
;
f_k
++
)
{
// std::string path = "D:\\Paddle\\paddle\\fluid\\inference\\api\\demo_ci\\build\\Release\\";
// std::ifstream in_img(path + all_files[f_k]);
std
::
string
mypath
=
"D:
\\
Paddle
\\
paddle
\\
fluid
\\
inference
\\
api
\\
demo_ci
\\
build
\\
Release
\\
1.png.txt"
;
std
::
cout
<<
"file"
<<
mypath
<<
std
::
endl
;
std
::
ifstream
in_img
(
mypath
);
//std::cout << path + all_files[f_k] << std::endl;
double
temp_v
;
const
int
size
=
3
*
449
*
581
*
1
;
float
*
data
=
new
float
[
size
];
std
::
string
value
;
if
(
!
in_img
.
is_open
())
{
cout
<<
"open failed"
<<
endl
;
}
double
sum_input
=
.0
;
for
(
auto
i
=
0
;
i
<
size
;
i
++
)
{
getline
(
in_img
,
value
,
'\n'
);
double
v
=
stringToNum
<
double
>
(
value
);
data
[
i
]
=
static_cast
<
float
>
(
v
);
sum_input
+=
v
;
}
std
::
cout
<<
"sum_input"
<<
sum_input
<<
std
::
endl
;
in_img
.
close
();
const
int
SIZE
=
449
*
581
*
1
;
int64_t
*
p
=
new
int64_t
[
SIZE
]();
int
out_size
=
0
;
//memset(p, 0, size);
predict
(
h
,
data
,
3
,
449
,
581
,
&
p
,
&
out_size
,
1
);
std
::
cout
<<
"out_size = "
<<
out_size
<<
std
::
endl
;
double
out_sum
=
.0
;
for
(
auto
i
=
0
;
i
<
out_size
/
sizeof
(
int64_t
);
i
++
)
{
out_sum
+=
p
[
i
];
ofs
<<
p
[
i
]
<<
" "
;
}
ofs
.
close
();
std
::
cout
<<
"inferece out sum"
<<
out_sum
<<
std
::
endl
;
delete
p
;
}
destory_predictor
(
h
);
}
int
main
(
int
argc
,
char
**
argv
)
{
//if (true) {
// std::thread t1(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// std::thread t2(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t3(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// //std::thread t4(func, init_predictor("./infer_model/__model__", "./infer_model/__params__", 0.1f, true, 0));
// t1.join();
// t2.join();
// //t3.join();
// //t4.join();
// //Sleep(1);
//}
test_imgs
();
return
0
;
}
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
0 → 100644
浏览文件 @
c6dcffc6
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
//#include <gtest/gtest.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include <thread> // NOLINT
#define ASSERT_TRUE(x) x
#define ASSERT_EQ(x, y) assert(x == y)
namespace
paddle
{
// DEFINE_string(dirname, "./LB_icnet_model",
// "Directory of the inference model.");
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
prog_file
=
"./dzh_lb/__model__"
;
config
.
param_file
=
"./dzh_lb/__params__"
;
config
.
fraction_of_gpu_memory
=
0.08
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
,
std
::
string
model_path
){
PaddlePredictor
*
pres
[
2
];
NativeConfig
config
=
GetConfig
();
// config.model_dir = model_path;
auto
predictor0
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
auto
predictor1
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
pres
[
0
]
=
predictor0
.
get
();
pres
[
1
]
=
predictor1
.
get
();
int
height
=
449
;
int
width
=
581
;
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
3
*
height
*
width
;
i
++
)
{
data
.
push_back
(
0
);
}
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
constexpr
int
num_jobs
=
5
;
// each job run 1 batch
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
auto
predictor
=
pres
[
tid
];
std
::
vector
<
PaddleTensor
>
local_outputs
;
for
(
size_t
i
=
0
;
i
<
1000
;
i
++
)
{
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
local_outputs
));
std
::
cout
<<
"run: "
<<
tid
<<
std
::
endl
;
}
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
threads
[
i
].
join
();
}
}
//TEST(alexnet, naive) {
// test_naive(1 << 0, "./trt_models/vgg19");
//}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
test_naive
(
1
<<
0
,
""
);
}
paddle/fluid/operators/batch_norm_op.cu.cc
浏览文件 @
c6dcffc6
...
@@ -141,6 +141,27 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
...
@@ -141,6 +141,27 @@ class BatchNormKernel<platform::CUDADeviceContext, T>
bias
->
template
data
<
BatchNormParamType
<
T
>
>
(),
bias
->
template
data
<
BatchNormParamType
<
T
>
>
(),
est_mean
->
template
data
<
BatchNormParamType
<
T
>
>
(),
est_mean
->
template
data
<
BatchNormParamType
<
T
>
>
(),
est_var
->
template
data
<
BatchNormParamType
<
T
>
>
(),
epsilon
));
est_var
->
template
data
<
BatchNormParamType
<
T
>
>
(),
epsilon
));
VLOG
(
3
)
<<
"before tensor copy"
;
Tensor
mean_
,
var_
,
x_
,
y_
;
framework
::
TensorCopy
(
*
est_mean
,
platform
::
CPUPlace
(),
dev_ctx
,
&
mean_
);
framework
::
TensorCopy
(
*
est_var
,
platform
::
CPUPlace
(),
dev_ctx
,
&
var_
);
framework
::
TensorCopy
(
*
x
,
platform
::
CPUPlace
(),
dev_ctx
,
&
x_
);
framework
::
TensorCopy
(
*
y
,
platform
::
CPUPlace
(),
dev_ctx
,
&
y_
);
VLOG
(
3
)
<<
"after tensor copy"
;
auto
check_tensor
=
[
&
](
const
Tensor
&
check
)
{
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
return
sum
;
};
VLOG
(
3
)
<<
"BatchNormKernel"
;
VLOG
(
3
)
<<
"mean"
<<
check_tensor
(
mean_
);
VLOG
(
3
)
<<
"var"
<<
check_tensor
(
var_
);
VLOG
(
3
)
<<
"x"
<<
check_tensor
(
x_
);
VLOG
(
3
)
<<
"y"
<<
check_tensor
(
y_
);
}
else
{
}
else
{
// Run training mode.
// Run training mode.
// obtain running mean and running inv var, and see if we need to
// obtain running mean and running inv var, and see if we need to
...
...
paddle/fluid/operators/load_combine_op.cc
浏览文件 @
c6dcffc6
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <fstream>
#include <fstream>
#include <vector>
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/data_type_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/device_context.h"
...
@@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -34,6 +35,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
load_as_fp16
=
Attr
<
bool
>
(
"load_as_fp16"
);
auto
load_as_fp16
=
Attr
<
bool
>
(
"load_as_fp16"
);
std
::
ifstream
fin
(
filename
,
std
::
ios_base
::
in
|
std
::
ios_base
::
binary
);
std
::
ifstream
fin
(
filename
,
std
::
ios_base
::
in
|
std
::
ios_base
::
binary
);
//std::ifstream fin(filename, std::ios_base::in);
PADDLE_ENFORCE
(
!
fin
.
bad
(),
PADDLE_ENFORCE
(
!
fin
.
bad
(),
"Cannot open file %s for load_combine op"
,
filename
);
"Cannot open file %s for load_combine op"
,
filename
);
...
@@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -46,7 +48,7 @@ class LoadCombineOp : public framework::OperatorBase {
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
for
(
size_t
i
=
0
;
i
<
out_var_names
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
out_var_names
.
size
();
i
++
)
{
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
];
VLOG
(
3
)
<<
"load
variable
"
<<
out_var_names
[
i
];
auto
*
out_var
=
scope
.
FindVar
(
out_var_names
[
i
]);
auto
*
out_var
=
scope
.
FindVar
(
out_var_names
[
i
]);
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output variable %s cannot be found"
,
PADDLE_ENFORCE
(
out_var
!=
nullptr
,
"Output variable %s cannot be found"
,
...
@@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -61,6 +63,13 @@ class LoadCombineOp : public framework::OperatorBase {
// Get data from fin to tensor
// Get data from fin to tensor
DeserializeFromStream
(
fin
,
tensor
,
dev_ctx
);
DeserializeFromStream
(
fin
,
tensor
,
dev_ctx
);
VLOG
(
3
)
<<
"after deserialization"
;
VLOG
(
3
)
<<
"after deserialization"
;
framework
::
Tensor
check
;
framework
::
TensorCopy
(
*
tensor
,
platform
::
CPUPlace
(),
dev_ctx
,
&
check
);
float
sum
=
.0
;
for
(
size_t
i
=
0
;
i
<
check
.
numel
();
++
i
)
{
sum
+=
check
.
data
<
float
>
()[
i
];
}
VLOG
(
3
)
<<
"sum result"
<<
sum
;
auto
in_dtype
=
framework
::
ToDataType
(
tensor
->
type
());
auto
in_dtype
=
framework
::
ToDataType
(
tensor
->
type
());
auto
out_dtype
=
auto
out_dtype
=
load_as_fp16
?
framework
::
proto
::
VarType
::
FP16
:
in_dtype
;
load_as_fp16
?
framework
::
proto
::
VarType
::
FP16
:
in_dtype
;
...
@@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase {
...
@@ -80,6 +89,7 @@ class LoadCombineOp : public framework::OperatorBase {
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
set_lod
(
fp16_tensor
.
lod
());
tensor
->
set_lod
(
fp16_tensor
.
lod
());
tensor
->
ShareDataWith
(
fp16_tensor
);
tensor
->
ShareDataWith
(
fp16_tensor
);
}
}
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
]
<<
" finished"
;
VLOG
(
3
)
<<
"load "
<<
out_var_names
[
i
]
<<
" finished"
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录