Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
eb2f7ed2
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2302
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
eb2f7ed2
编写于
11月 02, 2018
作者:
D
dzhwinter
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
refine tests. test=develop
上级
0a180584
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
128 addition
and
563 deletion
+128
-563
cmake/external/threadpool.cmake
cmake/external/threadpool.cmake
+0
-1
paddle/fluid/framework/data_type.h
paddle/fluid/framework/data_type.h
+0
-41
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+0
-1
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+10
-11
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+0
-2
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+28
-39
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
+78
-141
paddle/fluid/inference/api/demo_ci/inference_icnet.h
paddle/fluid/inference/api/demo_ci/inference_icnet.h
+0
-21
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
+0
-125
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
+0
-146
paddle/fluid/operators/conv_op.cc
paddle/fluid/operators/conv_op.cc
+12
-35
未找到文件。
cmake/external/threadpool.cmake
浏览文件 @
eb2f7ed2
...
...
@@ -3,7 +3,6 @@ INCLUDE(ExternalProject)
SET
(
THREADPOOL_SOURCE_DIR
${
THIRD_PARTY_PATH
}
/threadpool
)
SET
(
THREADPOOL_INCLUDE_DIR
${
THREADPOOL_SOURCE_DIR
}
/src/extern_threadpool
)
INCLUDE_DIRECTORIES
(
${
THREADPOOL_INCLUDE_DIR
}
)
message
(
"Debug"
${
THREADPOOL_INCLUDE_DIR
}
)
ExternalProject_Add
(
extern_threadpool
...
...
paddle/fluid/framework/data_type.h
浏览文件 @
eb2f7ed2
...
...
@@ -25,7 +25,6 @@ namespace framework {
extern
proto
::
VarType
::
Type
ToDataType
(
std
::
type_index
type
);
extern
std
::
type_index
ToTypeIndex
(
proto
::
VarType
::
Type
type
);
#if !defined(_MSC_VER)
template
<
typename
Visitor
>
inline
void
VisitDataType
(
proto
::
VarType
::
Type
type
,
Visitor
visitor
)
{
switch
(
type
)
{
...
...
@@ -60,46 +59,6 @@ inline void VisitDataType(proto::VarType::Type type, Visitor visitor) {
PADDLE_THROW
(
"Not supported %d"
,
type
);
}
}
#else
// the msvc compiler do not implement two-stage name lookup correctly.
template
<
typename
Visitor
>
inline
void
VisitDataType
(
proto
::
VarType
::
Type
type
,
Visitor
visitor
)
{
switch
(
type
)
{
case
proto
::
VarType
::
FP16
:
visitor
.
template
apply
<
platform
::
float16
>();
break
;
case
proto
::
VarType
::
FP32
:
visitor
.
template
apply
<
float
>();
break
;
case
proto
::
VarType
::
FP64
:
visitor
.
template
apply
<
double
>();
break
;
case
proto
::
VarType
::
INT32
:
visitor
.
template
apply
<
int
>();
break
;
case
proto
::
VarType
::
INT64
:
visitor
.
template
apply
<
int64_t
>();
break
;
case
proto
::
VarType
::
BOOL
:
visitor
.
template
apply
<
bool
>();
break
;
case
proto
::
VarType
::
UINT8
:
visitor
.
template
apply
<
uint8_t
>();
break
;
case
proto
::
VarType
::
INT16
:
visitor
.
template
apply
<
int16_t
>();
break
;
default:
PADDLE_THROW
(
"Not supported %d"
,
type
);
}
}
template
<
typename
InT
>
void
*
AnyCast
(
const
InT
*
t
)
{
return
static_cast
<
void
*>
(
const_cast
<
InT
*>
(
t
));
}
#endif // _WIN32
extern
std
::
string
DataTypeToString
(
const
proto
::
VarType
::
Type
type
);
extern
size_t
SizeOfType
(
std
::
type_index
type
);
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
eb2f7ed2
...
...
@@ -337,7 +337,6 @@ std::unique_ptr<ExecutorPrepareContext> Executor::Prepare(
new
ExecutorPrepareContext
(
program
,
block_id
));
PADDLE_ENFORCE_LT
(
static_cast
<
size_t
>
(
block_id
),
program
.
Size
());
auto
&
block
=
program
.
Block
(
block_id
);
int
counter
=
0
;
for
(
auto
&
op_desc
:
block
.
AllOps
())
{
ctx
->
ops_
.
push_back
(
OpRegistry
::
CreateOp
(
*
op_desc
));
}
...
...
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
eb2f7ed2
...
...
@@ -11,10 +11,9 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <string>
#include <string>
#include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -212,12 +211,12 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
VLOG
(
3
)
<<
"LSTMWeight resized to "
<<
out
->
dims
();
float
*
out_data
=
out
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
std
::
array
<
const
float
*
,
4
>
tensors
=
{
W_forget_w0
.
data
<
float
>
(),
W_input_w0
.
data
<
float
>
(),
W_output_w0
.
data
<
float
>
(),
W_cell_w0
.
data
<
float
>
()}
;
std
::
array
<
const
float
*
,
4
>
tensors1
=
{
W_forget_w1
.
data
<
float
>
(),
W_input_w1
.
data
<
float
>
(),
W_output_w1
.
data
<
float
>
(),
W_cell_w1
.
data
<
float
>
()}
;
std
::
array
<
const
float
*
,
4
>
tensors
(
{{
W_forget_w0
.
data
<
float
>
(),
W_input_w0
.
data
<
float
>
(),
W_output_w0
.
data
<
float
>
(),
W_cell_w0
.
data
<
float
>
()}})
;
std
::
array
<
const
float
*
,
4
>
tensors1
(
{{
W_forget_w1
.
data
<
float
>
(),
W_input_w1
.
data
<
float
>
(),
W_output_w1
.
data
<
float
>
(),
W_cell_w1
.
data
<
float
>
()}})
;
for
(
int
row
=
0
;
row
<
D
;
row
++
)
{
for
(
int
col
=
0
;
col
<
4
;
col
++
)
{
...
...
@@ -239,9 +238,9 @@ void PrepareLSTMWeight(const LoDTensor& W_forget_w0,
void
PrepareLSTMBias
(
const
LoDTensor
&
B_forget
,
const
LoDTensor
&
B_input
,
const
LoDTensor
&
B_output
,
const
LoDTensor
&
B_cell
,
LoDTensor
*
out
)
{
std
::
array
<
const
float
*
,
4
>
tensors
=
{
B_forget
.
data
<
float
>
(),
B_input
.
data
<
float
>
(),
B_output
.
data
<
float
>
(),
B_cell
.
data
<
float
>
()}
;
std
::
array
<
const
float
*
,
4
>
tensors
(
{{
B_forget
.
data
<
float
>
(),
B_input
.
data
<
float
>
(),
B_output
.
data
<
float
>
(),
B_cell
.
data
<
float
>
()}})
;
PADDLE_ENFORCE_EQ
(
B_forget
.
dims
().
size
(),
1
);
int
D
=
B_forget
.
dims
()[
0
];
...
...
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
eb2f7ed2
...
...
@@ -94,8 +94,6 @@ bool NativePaddlePredictor::Init(
// All parameters are saved in a single file.
// The file names should be consistent with that used
// in Python API `fluid.io.save_inference_model`.
auto
exe
=
executor_
.
get
();
auto
sc
=
scope_
.
get
();
inference_program_
=
paddle
::
inference
::
Load
(
executor_
.
get
(),
scope_
.
get
(),
config_
.
prog_file
,
config_
.
param_file
);
...
...
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
eb2f7ed2
...
...
@@ -6,13 +6,13 @@ option(WITH_STATIC_LIB "Compile demo with static/shared library, default use sta
option
(
USE_TENSORRT
"Compile demo with TensorRT."
OFF
)
macro
(
safe_set_static_flag
)
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
)
if
(
${
flag_var
}
MATCHES
"/MD"
)
string
(
REGEX REPLACE
"/MD"
"/MT"
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
(
${
flag_var
}
MATCHES
"/MD"
)
endforeach
(
flag_var
)
foreach
(
flag_var
CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_DEBUG CMAKE_CXX_FLAGS_RELEASE
CMAKE_CXX_FLAGS_MINSIZEREL CMAKE_CXX_FLAGS_RELWITHDEBINFO
)
if
(
${
flag_var
}
MATCHES
"/MD"
)
string
(
REGEX REPLACE
"/MD"
"/MT"
${
flag_var
}
"
${${
flag_var
}}
"
)
endif
(
${
flag_var
}
MATCHES
"/MD"
)
endforeach
(
flag_var
)
endmacro
()
if
(
WIN32
)
...
...
@@ -42,7 +42,7 @@ if(WITH_GPU) # default gpu path
set
(
CUDA_LIB
"/usr/local/cuda/lib64/"
CACHE STRING
"CUDA Library"
)
else
()
if
(
CUDA_LIB STREQUAL
""
)
set
(
CUDA_LIB
"C:
\\
Program
\
Files
\\
NVIDIA GPU Computing Toolkit
\\
CUDA
\\
v8.0
\\
lib
\\
x64"
)
set
(
CUDA_LIB
"C:
\\
Program
\
Files
\\
NVIDIA GPU Computing Toolkit
\\
CUDA
\\
v8.0
\\
lib
\\
x64"
)
endif
()
endif
(
NOT WIN32
)
endif
()
...
...
@@ -53,9 +53,9 @@ include_directories("${PADDLE_LIB}/third_party/install/glog/include")
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/xxhash/include"
)
if
(
NOT WIN32
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
endif
(
NOT WIN32
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
...
...
@@ -63,15 +63,15 @@ include_directories("${PADDLE_LIB}/third_party/eigen3")
if
(
NOT WIN32
)
if
(
USE_TENSORRT AND WITH_GPU
)
include_directories
(
"
${
TENSORRT_INCLUDE_DIR
}
"
)
link_directories
(
"
${
TENSORRT_LIB_DIR
}
"
)
include_directories
(
"
${
TENSORRT_INCLUDE_DIR
}
"
)
link_directories
(
"
${
TENSORRT_LIB_DIR
}
"
)
endif
()
endif
(
NOT WIN32
)
if
(
NOT WIN32
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
endif
(
NOT WIN32
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
...
...
@@ -80,18 +80,12 @@ link_directories("${PADDLE_LIB}/third_party/install/gflags/lib")
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/xxhash/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/paddle/lib"
)
# add_executable(${DEMO_NAME} ${DEMO_NAME}.cc)
# add_library(${DEMO_NAME} ${DEMO_NAME}.cc)
add_executable
(
real_data_icnet_tester real_data_icnet_tester.cc
)
# add_library(${DEMO_NAME} SHARED ${DEMO_NAME}.cc)
# add_executable(test test.cc)
add_executable
(
thread_icnet_test thread_icnet_test.cc
)
add_executable
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
...
...
@@ -104,25 +98,25 @@ endif()
# Note: libpaddle_inference_api.so/a must put before libpaddle_fluid.so/a
if
(
WITH_STATIC_LIB
)
set
(
DEPS
${
PADDLE_LIB
}
/paddle/fluid/inference
/libpaddle_fluid
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
${
PADDLE_LIB
}
/paddle/lib
/libpaddle_fluid
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
else
()
set
(
DEPS
${
PADDLE_LIB
}
/paddle/fluid/inference
/libpaddle_fluid
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
${
PADDLE_LIB
}
/paddle/lib
/libpaddle_fluid
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
endif
()
if
(
NOT WIN32
)
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
DEPS
${
DEPS
}
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
DEPS
${
DEPS
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
glog gflags protobuf snappystream snappy z xxhash
${
EXTERNAL_LIB
}
)
else
()
set
(
DEPS
${
DEPS
}
set
(
DEPS
${
DEPS
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
${
CMAKE_STATIC_LIBRARY_PREFIX
}
glog
${
CMAKE_STATIC_LIBRARY_PREFIX
}
gflags
${
CMAKE_STATIC_LIBRARY_PREFIX
}
protobuf
${
EXTERNAL_LIB
}
)
# NOTE(dzhwinter) shlwapi is
deprecated.
set
(
DEPS
${
DEPS
}
libcmt shlwapi
)
# NOTE(dzhwinter) shlwapi will be
deprecated.
set
(
DEPS
${
DEPS
}
libcmt shlwapi
)
endif
(
NOT WIN32
)
if
(
WITH_GPU
)
...
...
@@ -134,14 +128,9 @@ if(WITH_GPU)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/libcudart
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
else
()
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/cudart
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/cublas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/cudnn
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/cublas
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/cudnn
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
endif
()
endif
()
target_link_libraries
(
real_data_icnet_tester
${
DEPS
}
)
# target_link_libraries(${DEMO_NAME} ${DEPS})
# target_link_libraries(test ${DEMO_NAME} )
target_link_libraries
(
thread_icnet_test
${
DEPS
}
)
# target_compile_definitions(${DEMO_NAME} PRIVATE "API_DEFINITION")
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
paddle/fluid/inference/api/demo_ci/inference_icnet.cc
浏览文件 @
eb2f7ed2
...
...
@@ -11,152 +11,89 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <
cassert
>
#include <
chrono
>
#include <
iostream>
#define GOOGLE_GLOG_DLL_DECL
#include <
gflags/gflags.h
>
#include <
glog/logging.h
>
#include <
chrono> // NOLINT
#include <fstream>
#include <algorithm>
#include <vector>
#include <string>
#include <memory>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "inference_icnet.h"
// 数据格式
// "<space splitted floats as data>\t<space splitted ints as shape"
// 1. 存储为float32格式。
// 2. 必须减去均值。 CHW三个通道为 mean = 112.15, 109.41, 185.42
using
namespace
paddle
;
class
Predictor
{
private:
std
::
unique_ptr
<
PaddlePredictor
>
predictor
;
struct
Record
{
std
::
vector
<
float
>
data
;
std
::
vector
<
int32_t
>
shape
;
};
const
int
C
=
3
;
// image channel
const
int
H
=
449
;
// image height
const
int
W
=
581
;
// image width
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
static
void
split
(
const
std
::
string
&
str
,
char
sep
,
std
::
vector
<
std
::
string
>*
pieces
)
{
pieces
->
clear
();
if
(
str
.
empty
())
{
return
;
}
size_t
pos
=
0
;
size_t
next
=
str
.
find
(
sep
,
pos
);
while
(
next
!=
std
::
string
::
npos
)
{
pieces
->
push_back
(
str
.
substr
(
pos
,
next
-
pos
));
pos
=
next
+
1
;
next
=
str
.
find
(
sep
,
pos
);
}
if
(
!
str
.
substr
(
pos
).
empty
())
{
pieces
->
push_back
(
str
.
substr
(
pos
));
}
}
Record
ProcessALine
(
const
std
::
string
&
line
)
{
std
::
vector
<
std
::
string
>
columns
;
split
(
line
,
'\t'
,
&
columns
);
Record
record
;
std
::
vector
<
std
::
string
>
data_strs
;
split
(
columns
[
0
],
' '
,
&
data_strs
);
for
(
auto
&
d
:
data_strs
)
{
record
.
data
.
push_back
(
std
::
stof
(
d
));
}
std
::
vector
<
std
::
string
>
shape_strs
;
split
(
columns
[
1
],
' '
,
&
shape_strs
);
for
(
auto
&
s
:
shape_strs
)
{
record
.
shape
.
push_back
(
std
::
stoi
(
s
));
}
return
record
;
}
public:
Predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
)
{
NativeConfig
config
;
config
.
prog_file
=
prog_file
;
config
.
param_file
=
param_file
;
config
.
fraction_of_gpu_memory
=
fraction_of_gpu_memory
;
config
.
use_gpu
=
use_gpu
;
config
.
device
=
device
;
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
}
void
predict
(
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
)
{
std
::
vector
<
float
>
data
;
int
intput_length
=
channel
*
height
*
width
*
batch_size
;
for
(
int
i
=
0
;
i
<
intput_length
;
i
++
)
{
data
.
push_back
(
*
((
float
*
)
input
+
i
));
}
// initialize the input data
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
channel
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
channel
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
// initialize the output data
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
*
output_length
=
(
int
)
outputs
[
0
].
data
.
length
();
std
::
memcpy
(
static_cast
<
void
*>
(
*
output
),
outputs
[
0
].
data
.
data
(),
outputs
[
0
].
data
.
length
());
int64_t
sum_out
=
0
;
for
(
int
i
=
0
;
i
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
int64_t
);
++
i
)
{
int64_t
item
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
())[
i
];
sum_out
+=
item
;
if
(
item
!=
0
)
{
std
::
cout
<<
item
<<
std
::
endl
;
}
}
#include <iostream>
#include <thread> // NOLINT
#include <utility>
#include "paddle/fluid/inference/paddle_inference_api.h"
namespace
paddle
{
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
prog_file
=
"hs_lb_without_bn_cudnn/__model__"
;
config
.
param_file
=
"hs_lb_without_bn_cudnn/__params__"
;
config
.
fraction_of_gpu_memory
=
0.0
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
std
::
cout
<<
"sum_out"
<<
sum_out
<<
std
::
endl
;
}
};
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
TimeNow
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
}
double
TimeDiff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
API_REFERENCE
void
*
init_predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
)
{
return
new
Predictor
(
prog_file
,
param_file
,
fraction_of_gpu_memory
,
use_gpu
,
device
);
std
::
vector
<
PaddleTensor
>
PrepareData
()
{
int
height
=
449
;
int
width
=
581
;
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
3
*
height
*
width
;
++
i
)
{
data
.
push_back
(
0.0
);
}
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
return
std
::
move
(
paddle_tensor_feeds
);
}
API_REFERENCE
void
predict
(
void
*
handle
,
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
)
{
assert
(
handle
!=
nullptr
);
((
Predictor
*
)
handle
)
->
predict
(
input
,
channel
,
height
,
width
,
output
,
output_length
,
batch_size
);
void
TestNaive
(
int
batch_size
,
int
thread_num
)
{
NativeConfig
config
=
GetConfig
();
int
num_jobs
=
thread_num
;
// parallel jobs.
constexpr
int
epoches
=
10
;
// each job run epoches.
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
auto
&
pred
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
predictors
.
emplace_back
(
std
::
move
(
pred
));
}
auto
time1
=
TimeNow
();
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
auto
&
predictor
=
predictors
[
tid
];
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
for
(
size_t
i
=
0
;
i
<
epoches
;
i
++
)
{
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
VLOG
(
3
)
<<
"tid : "
<<
tid
<<
" run: "
<<
i
<<
"finished"
;
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
}
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
threads
[
i
].
join
();
}
auto
time2
=
TimeNow
();
VLOG
(
3
)
<<
"Thread num "
<<
thread_num
<<
"total time cost"
<<
(
time2
-
time1
);
}
}
// namespace paddle
API_REFERENCE
void
destory_predictor
(
void
*
handle
)
{
if
(
handle
)
{
delete
handle
;
handle
=
nullptr
;
}
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
TestNaive
(
1
,
1
);
// single thread.
paddle
::
TestNaive
(
1
,
5
);
// 5 threads.
return
0
;
}
paddle/fluid/inference/api/demo_ci/inference_icnet.h
已删除
100644 → 0
浏览文件 @
0a180584
#ifdef _WIN32
#ifdef inference_icnet_EXPORTS
#define API_REFERENCE extern "C" __declspec(dllexport)
#else
#define API_REFERENCE extern "C" __declspec(dllimport)
#endif
#else
#define API_REFERENCE
#endif
//API_REFERENCE void * init_predictor();
//API_REFERENCE void destory_predictor(void *handle);
//API_REFERENCE void predict(void *handle, int n);
API_REFERENCE
void
*
init_predictor
(
const
char
*
prog_file
,
const
char
*
param_file
,
const
float
fraction_of_gpu_memory
,
const
bool
use_gpu
,
const
int
device
);
API_REFERENCE
void
predict
(
void
*
handle
,
float
*
input
,
const
int
channel
,
const
int
height
,
const
int
width
,
int64_t
**
output
,
int
*
output_length
,
int
batch_size
);
API_REFERENCE
void
destory_predictor
(
void
*
handle
);
paddle/fluid/inference/api/demo_ci/real_data_icnet_tester.cc
已删除
100644 → 0
浏览文件 @
0a180584
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include "paddle/fluid/inference/paddle_inference_api.h"
namespace
paddle
{
NativeConfig
GetConfig
()
{
NativeConfig
config
;
// config.model_dir = FLAGS_dirname;
config
.
prog_file
=
"hs_lb_without_bn_cudnn/__model__"
;
config
.
param_file
=
"hs_lb_without_bn_cudnn/__params__"
;
// config.prog_file = "hs_lb_without_bn_cuda/__model__";
// config.param_file = "hs_lb_without_bn_cuda/__params__";
config
.
fraction_of_gpu_memory
=
0.0
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
)
{
NativeConfig
config
=
GetConfig
();
auto
predictor
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
int
height
=
449
;
int
width
=
581
;
// =============read file list =============
std
::
ifstream
infile
(
"new_file.list"
);
std
::
string
temp_s
;
std
::
vector
<
std
::
string
>
all_files
;
while
(
!
infile
.
eof
())
{
infile
>>
temp_s
;
all_files
.
push_back
(
temp_s
);
}
// size_t file_num = all_files.size();
infile
.
close
();
// =============read file list =============
for
(
size_t
f_k
=
0
;
f_k
<
1
;
f_k
++
)
{
std
::
ifstream
in_img
(
all_files
[
f_k
]);
std
::
cout
<<
all_files
[
f_k
]
<<
std
::
endl
;
float
temp_v
;
float
sum_n
=
0.0
;
std
::
vector
<
float
>
data
;
while
(
!
in_img
.
eof
())
{
in_img
>>
temp_v
;
data
.
push_back
(
float
(
temp_v
));
// std::cout << temp_v << " ";
sum_n
+=
temp_v
;
}
in_img
.
close
();
std
::
cout
<<
"sum: "
<<
sum_n
<<
std
::
endl
;
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
// predictor->Run(paddle_tensor_feeds, &outputs, batch_size);
std
::
cout
<<
"start predict123:"
<<
std
::
endl
;
auto
time1
=
time
();
int
steps
=
100
;
for
(
size_t
i
=
0
;
i
<
steps
;
i
++
)
{
if
(
i
==
5
)
time1
=
time
();
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
,
batch_size
);
}
auto
time2
=
time
();
std
::
ofstream
ofresult
(
"naive_test_result.txt"
,
std
::
ios
::
app
);
std
::
cout
<<
"batch: "
<<
batch_size
<<
" predict cost: "
<<
time_diff
(
time1
,
time2
)
/
steps
<<
"ms"
<<
std
::
endl
;
std
::
cout
<<
outputs
.
size
()
<<
std
::
endl
;
int64_t
*
data_o
=
static_cast
<
int64_t
*>
(
outputs
[
0
].
data
.
data
());
int64_t
sum_out
=
0
;
for
(
size_t
j
=
0
;
j
<
outputs
[
0
].
data
.
length
()
/
sizeof
(
int64_t
);
++
j
)
{
ofresult
<<
std
::
to_string
(
data_o
[
j
])
<<
" "
;
sum_out
+=
data_o
[
j
];
}
std
::
cout
<<
"sum_out "
<<
sum_out
<<
std
::
endl
;
ofresult
<<
std
::
endl
;
ofresult
.
close
();
}
}
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
// google::ParseCommandLineFlags(&argc, &argv, true);
paddle
::
test_naive
(
1
<<
0
);
return
0
;
}
paddle/fluid/inference/api/demo_ci/thread_icnet_test.cc
已删除
100644 → 0
浏览文件 @
0a180584
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#define GOOGLE_GLOG_DLL_DECL
#include <gflags/gflags.h>
#include <glog/logging.h>
//#include <gtest/gtest.h>
#include <chrono>
#include <fstream>
#include <iostream>
#include <thread> // NOLINT
#include <utility>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#define ASSERT_TRUE(x) x
#define ASSERT_EQ(x, y) assert(x == y)
// DEFINE_string(dirname, "./LB_icnet_model",
// "Directory of the inference model.");
namespace
paddle
{
NativeConfig
GetConfig
()
{
NativeConfig
config
;
config
.
prog_file
=
"./hs_lb_without_bn_cuda/__model__"
;
config
.
param_file
=
"./hs_lb_without_bn_cuda/__params__"
;
config
.
fraction_of_gpu_memory
=
0.0
;
config
.
use_gpu
=
true
;
config
.
device
=
0
;
return
config
;
}
using
Time
=
decltype
(
std
::
chrono
::
high_resolution_clock
::
now
());
Time
time
()
{
return
std
::
chrono
::
high_resolution_clock
::
now
();
};
double
time_diff
(
Time
t1
,
Time
t2
)
{
typedef
std
::
chrono
::
microseconds
ms
;
auto
diff
=
t2
-
t1
;
ms
counter
=
std
::
chrono
::
duration_cast
<
ms
>
(
diff
);
return
counter
.
count
()
/
1000.0
;
}
void
test_naive
(
int
batch_size
,
std
::
string
model_path
)
{
NativeConfig
config
=
GetConfig
();
int
height
=
449
;
int
width
=
581
;
std
::
vector
<
float
>
data
;
for
(
int
i
=
0
;
i
<
3
*
height
*
width
;
++
i
)
{
data
.
push_back
(
0.0
);
}
// read data
// std::ifstream infile("new_file.list");
// std::string temp_s;
// std::vector<std::string> all_files;
// while (!infile.eof()) {
// infile >> temp_s;
// all_files.push_back(temp_s);
// }
// // size_t file_num = all_files.size();
// infile.close();
// // =============read file list =============
// for (size_t f_k = 0; f_k < 1; f_k++) {
// std::ifstream in_img(all_files[f_k]);
// std::cout << all_files[f_k] << std::endl;
// float temp_v;
// float sum_n = 0.0;
// std::vector<float> data;
// while (!in_img.eof()) {
// in_img >> temp_v;
// data.push_back(float(temp_v));
// sum_n += temp_v;
// }
// in_img.close();
// std::cout << "sum: " << sum_n << std::endl;
PaddleTensor
tensor
;
tensor
.
shape
=
std
::
vector
<
int
>
({
batch_size
,
3
,
height
,
width
});
tensor
.
data
.
Resize
(
sizeof
(
float
)
*
batch_size
*
3
*
height
*
width
);
std
::
copy
(
data
.
begin
(),
data
.
end
(),
static_cast
<
float
*>
(
tensor
.
data
.
data
()));
tensor
.
dtype
=
PaddleDType
::
FLOAT32
;
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
constexpr
int
num_jobs
=
5
;
// each job run 1 batch
std
::
vector
<
std
::
thread
>
threads
;
// using PtrPred = std::vector<std::unique_ptr<PaddlePredictor>>;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
auto
&
pred
=
CreatePaddlePredictor
<
NativeConfig
>
(
config
);
predictors
.
emplace_back
(
std
::
move
(
pred
));
}
using
namespace
std
::
chrono_literals
;
// std::this_thread::sleep_for(std::chrono::seconds(20));
std
::
cout
<<
"before start predict"
;
int
epoches
=
100000
;
for
(
int
tid
=
0
;
tid
<
num_jobs
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// auto predictor = CreatePaddlePredictor<NativeConfig>(config);
auto
&
predictor
=
predictors
[
tid
];
// auto& predictor = predictors[tid];
// auto predictor = preds[tid];
// std::this_thread::sleep_for(std::chrono::seconds(20));
PaddleTensor
tensor_out
;
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
for
(
size_t
i
=
0
;
i
<
epoches
;
i
++
)
{
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
VLOG
(
0
)
<<
"tid : "
<<
tid
<<
" run: "
<<
i
<<
"finished"
;
// std::cout <<"tid : " << tid << " run: " << i << "finished" <<
// std::endl;
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
// int64_t* data_o = static_cast<int64_t*>(outputs[0].data.data());
// int64_t sum_out = 0;
// for (size_t j = 0; j < outputs[0].data.length() / sizeof(int64_t);
// ++j) {
// sum_out += data_o[j];
// }
// std::cout << "tid : " << tid << "pass : " << i << " " << sum_out
// << std::endl;
}
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
threads
[
i
].
join
();
}
}
// }
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
paddle
::
test_naive
(
1
<<
0
,
""
);
return
0
;
}
paddle/fluid/operators/conv_op.cc
浏览文件 @
eb2f7ed2
...
...
@@ -11,9 +11,6 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#define GLOG_NO_ABBREVIATED_SEVERITIES
#define GOOGLE_GLOG_DLL_DECL
#include <glog/logging.h>
#include "paddle/fluid/operators/conv_op.h"
...
...
@@ -38,7 +35,6 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Output"
),
"Output(Output) of ConvOp should not be null."
);
VLOG
(
3
)
<<
"Conv op infershape"
;
auto
in_dims
=
ctx
->
GetInputDim
(
"Input"
);
auto
filter_dims
=
ctx
->
GetInputDim
(
"Filter"
);
...
...
@@ -46,51 +42,32 @@ void ConvOp::InferShape(framework::InferShapeContext* ctx) const {
std
::
vector
<
int
>
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
groups
=
ctx
->
Attrs
().
Get
<
int
>
(
"groups"
);
std
::
vector
<
int
>
dilations
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"dilations"
);
VLOG
(
3
)
<<
"Conv op Before check"
;
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
;
// PADDLE_ENFORCE(in_dims.size() == 4 || in_dims.size() == 5,
// "Conv intput should be 4-D or 5-D tensor.");
VLOG
(
3
)
<<
"check0"
;
// PADDLE_ENFORCE_EQ(
// in_dims.size(), filter_dims.size(),
// "Conv input dimension and filter dimension should be the same.");
in_dims
.
size
()
==
filter_dims
.
size
();
VLOG
(
3
)
<<
"enforce check0"
;
PADDLE_ENFORCE
(
in_dims
.
size
()
==
4
||
in_dims
.
size
()
==
5
,
"Conv intput should be 4-D or 5-D tensor."
);
PADDLE_ENFORCE_EQ
(
in_dims
.
size
(),
filter_dims
.
size
(),
"Conv input dimension and filter dimension should be the same."
);
PADDLE_ENFORCE
(
in_dims
.
size
()
-
strides
.
size
()
==
2U
,
"Conv input dimension and strides dimension should be consistent."
);
VLOG
(
3
)
<<
"check1"
;
PADDLE_ENFORCE_EQ
(
paddings
.
size
(),
strides
.
size
(),
"Conv paddings dimension and Conv strides dimension should be the same."
);
VLOG
(
3
)
<<
"check2"
;
// in_dims[1] == filter_dims[1] * groups;
// PADDLE_ENFORCE_EQ(in_dims[1], filter_dims[1] * groups,
// "The number of input channels should be equal to filter "
// "channels * groups.");
VLOG
(
3
)
<<
"check3"
;
// filter_dims[0] % groups == 0 ;
// PADDLE_ENFORCE_EQ(
// filter_dims[0] % groups, 0,
// "The number of output channels should be divided by groups.");
VLOG
(
3
)
<<
"filter"
<<
filter_dims
.
size
();
VLOG
(
3
)
<<
"filter"
<<
filter_dims
[
0
];
VLOG
(
3
)
<<
"check4"
;
VLOG
(
3
)
<<
"filter"
<<
filter_dims
[
1
];
VLOG
(
3
)
<<
"dims"
<<
in_dims
[
0
];
PADDLE_ENFORCE_EQ
(
in_dims
[
1
],
filter_dims
[
1
]
*
groups
,
"The number of input channels should be equal to filter "
"channels * groups."
);
PADDLE_ENFORCE_EQ
(
filter_dims
[
0
]
%
groups
,
0
,
"The number of output channels should be divided by groups."
);
std
::
vector
<
int64_t
>
output_shape
({
in_dims
[
0
],
filter_dims
[
0
]});
VLOG
(
3
)
<<
"output shape"
;
for
(
size_t
i
=
0
;
i
<
strides
.
size
();
++
i
)
{
VLOG
(
3
)
<<
"check5"
;
output_shape
.
push_back
(
ConvOutputSize
(
in_dims
[
i
+
2
],
filter_dims
[
i
+
2
],
dilations
[
i
],
paddings
[
i
],
strides
[
i
]));
VLOG
(
3
)
<<
"check pass"
;
}
VLOG
(
3
)
<<
"Conv InferShape Pass"
;
ctx
->
SetOutputDim
(
"Output"
,
framework
::
make_ddim
(
output_shape
));
ctx
->
ShareLoD
(
"Input"
,
"Output"
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录