Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
b8ea7a08
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
b8ea7a08
编写于
7月 11, 2018
作者:
C
chenweihang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into unsqueeze_op
上级
fbef49e7
f9202447
变更
18
隐藏空白更改
内联
并排
Showing
18 changed file
with
496 addition
and
133 deletion
+496
-133
paddle/contrib/inference/CMakeLists.txt
paddle/contrib/inference/CMakeLists.txt
+16
-1
paddle/contrib/inference/check_symbol.sh
paddle/contrib/inference/check_symbol.sh
+12
-0
paddle/contrib/inference/demo/CMakeLists.txt
paddle/contrib/inference/demo/CMakeLists.txt
+0
-2
paddle/contrib/inference/demo_ci/CMakeLists.txt
paddle/contrib/inference/demo_ci/CMakeLists.txt
+77
-0
paddle/contrib/inference/demo_ci/run.sh
paddle/contrib/inference/demo_ci/run.sh
+34
-0
paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
+39
-23
paddle/contrib/inference/paddle_inference_api.map
paddle/contrib/inference/paddle_inference_api.map
+6
-0
paddle/contrib/inference/paddle_inference_api.sym
paddle/contrib/inference/paddle_inference_api.sym
+1
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+6
-0
paddle/fluid/inference/paddle_fluid.sym
paddle/fluid/inference/paddle_fluid.sym
+1
-0
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+120
-52
paddle/fluid/operators/detection/prior_box_op.cc
paddle/fluid/operators/detection/prior_box_op.cc
+7
-0
paddle/fluid/operators/detection/prior_box_op.cu
paddle/fluid/operators/detection/prior_box_op.cu
+26
-10
paddle/fluid/operators/detection/prior_box_op.h
paddle/fluid/operators/detection/prior_box_op.h
+50
-15
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+9
-8
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+14
-1
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+18
-4
python/paddle/fluid/tests/unittests/test_prior_box_op.py
python/paddle/fluid/tests/unittests/test_prior_box_op.py
+60
-17
未找到文件。
paddle/contrib/inference/CMakeLists.txt
浏览文件 @
b8ea7a08
...
@@ -45,6 +45,10 @@ endfunction(inference_api_test)
...
@@ -45,6 +45,10 @@ endfunction(inference_api_test)
cc_library
(
paddle_inference_api
cc_library
(
paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
if
(
NOT APPLE
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_inference_api.sym"
)
set_target_properties
(
paddle_inference_api PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
# Here the shared library doesn't depend on other fluid libraries, or double free will occur.
# Here the shared library doesn't depend on other fluid libraries, or double free will occur.
cc_library
(
paddle_inference_api_shared SHARED
cc_library
(
paddle_inference_api_shared SHARED
...
@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
...
@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
set_target_properties
(
paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
set
(
LINK_FLAGS
"-
fPIC -fvisibility=hidden
"
)
set
(
LINK_FLAGS
"-
Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_inference_api.map
"
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
FILE
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/check_symbol.cmake
"execute_process(COMMAND bash -c
\"
${
CMAKE_CURRENT_SOURCE_DIR
}
/check_symbol.sh"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/libpaddle_inference_api.so
\"
RESULT_VARIABLE symbol_res)
\n
"
"if(NOT
\"\$
{symbol_res}
\"
STREQUAL
\"
0
\"
)
\n
"
" message(FATAL_ERROR
\"
Check symbol failed.
\"
)
\n
"
"endif()
\n
"
)
add_custom_command
(
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/.check_symbol"
COMMAND
${
CMAKE_COMMAND
}
-P
"
${
CMAKE_CURRENT_BINARY_DIR
}
/check_symbol.cmake"
DEPENDS paddle_inference_api_shared
)
add_custom_target
(
check_symbol ALL DEPENDS
"
${
CMAKE_CURRENT_BINARY_DIR
}
/.check_symbol"
)
endif
()
endif
()
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
...
...
paddle/contrib/inference/check_symbol.sh
0 → 100755
浏览文件 @
b8ea7a08
#!/bin/bash
lib
=
$1
if
[
$#
-ne
1
]
;
then
echo
"No input library"
;
exit
-1
;
fi
num_paddle_syms
=
$(
nm
-D
--defined-only
${
lib
}
|
grep
paddle |
wc
-l
)
num_google_syms
=
$(
nm
-D
--defined-only
${
lib
}
|
grep
google |
wc
-l
)
if
[
$num_paddle_syms
-le
0
]
;
then
echo
"Have no paddle symbols"
;
exit
-1
;
fi
if
[
$num_google_syms
-ge
1
]
;
then
echo
"Have some google symbols"
;
exit
-1
;
fi
exit
0
paddle/contrib/inference/demo/CMakeLists.txt
浏览文件 @
b8ea7a08
...
@@ -13,8 +13,6 @@
...
@@ -13,8 +13,6 @@
# limitations under the License.
# limitations under the License.
#
#
inference_api_test
(
simple_on_word2vec ARGS test_word2vec
)
option
(
WITH_INFERENCE_DEMO
"Compile with Inference demo"
OFF
)
option
(
WITH_INFERENCE_DEMO
"Compile with Inference demo"
OFF
)
if
(
NOT WITH_INFERENCE_DEMO
)
if
(
NOT WITH_INFERENCE_DEMO
)
return
()
return
()
...
...
paddle/contrib/inference/demo_ci/CMakeLists.txt
0 → 100644
浏览文件 @
b8ea7a08
cmake_minimum_required
(
VERSION 3.0
)
project
(
cpp_inference_demo CXX C
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11"
)
if
(
NOT DEFINED PADDLE_LIB
)
message
(
FATAL_ERROR
"please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib"
)
endif
()
if
(
NOT DEFINED DEMO_NAME
)
message
(
FATAL_ERROR
"please set DEMO_NAME with -DDEMO_NAME=demo_name"
)
endif
()
option
(
WITH_MKL
"Compile demo with MKL/OpenBlas support, default use MKL."
ON
)
option
(
WITH_GPU
"Compile demo with GPU/CPU, default use CPU."
OFF
)
option
(
WITH_STATIC_LIB
"Compile demo with static/shared library, default use static."
ON
)
if
(
WITH_GPU
)
set
(
CUDA_LIB
"/usr/local/cuda/lib64/"
CACHE STRING
"CUDA Library"
)
endif
()
include_directories
(
"
${
PADDLE_LIB
}
"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/eigen3"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
add_executable
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5.so
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
endif
()
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
()
if
(
WITH_STATIC_LIB
)
set
(
DEPS
"-Wl,--whole-archive"
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid.a
"-Wl,--no-whole-archive"
${
PADDLE_LIB
}
/contrib/inference/libpaddle_inference_api.a
)
else
()
# Note: libpaddle_inference_api.so must put before libpaddle_fluid.so
set
(
DEPS
${
PADDLE_LIB
}
/contrib/inference/libpaddle_inference_api.so
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid.so
)
endif
()
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
DEPS
${
DEPS
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
glog gflags protobuf snappystream snappy z
${
EXTERNAL_LIB
}
)
if
(
WITH_GPU
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/libcudart.so
)
endif
()
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
paddle/contrib/inference/demo_ci/run.sh
0 → 100755
浏览文件 @
b8ea7a08
set
-x
PADDLE_ROOT
=
$1
WITH_MKL
=
$2
WITH_GPU
=
$3
if
[
$3
==
"ON"
]
;
then
use_gpu_list
=
'true false'
else
use_gpu_list
=
'false'
fi
mkdir
-p
build
cd
build
for
WITH_STATIC_LIB
in
false
;
do
rm
-rf
*
cmake ..
-DPADDLE_LIB
=
${
PADDLE_ROOT
}
/build/fluid_install_dir/
\
-DWITH_MKL
=
$WITH_MKL
\
-DDEMO_NAME
=
simple_on_word2vec
\
-DWITH_GPU
=
$WITH_GPU
\
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
make
for
use_gpu
in
$use_gpu_list
;
do
./simple_on_word2vec
\
--dirname
=
${
PADDLE_ROOT
}
/build/python/paddle/fluid/tests/book/word2vec.inference.model
\
--use_gpu
=
$use_gpu
done
done
if
[
$?
-eq
0
]
;
then
exit
0
else
echo
"inference demo runs fail."
exit
1
fi
set
+x
paddle/contrib/inference/demo/simple_on_word2vec.cc
→
paddle/contrib/inference/demo
_ci
/simple_on_word2vec.cc
浏览文件 @
b8ea7a08
...
@@ -16,21 +16,27 @@ limitations under the License. */
...
@@ -16,21 +16,27 @@ limitations under the License. */
* This file contains a simple demo for how to take a model for inference.
* This file contains a simple demo for how to take a model for inference.
*/
*/
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <memory>
#include <thread>
#include <thread>
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
namespace
paddle
{
namespace
paddle
{
namespace
demo
{
namespace
demo
{
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
void
Main
(
bool
use_gpu
)
{
void
Main
(
bool
use_gpu
)
{
//# 1. Create PaddlePredictor with a config.
//# 1. Create PaddlePredictor with a config.
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
if
(
FLAGS_dirname
.
empty
())
{
LOG
(
INFO
)
<<
"Usage: ./simple_on_word2vec --dirname=path/to/your/model"
;
exit
(
1
);
}
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
config
.
device
=
0
;
...
@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
...
@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
//# 4. Get output.
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
}
}
}
}
}
...
@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
...
@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
// Multi-threads only support on CPU
// Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config.
// 0. Create PaddlePredictor with a config.
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
config
.
device
=
0
;
...
@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
...
@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
CHECK
(
predictor
->
Run
(
inputs
,
&
outputs
));
CHECK
(
predictor
->
Run
(
inputs
,
&
outputs
));
// 4. Get output.
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
// Check the output buffer size and result of each tid.
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
}
}
}
});
});
...
@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
...
@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
}
}
}
}
TEST
(
demo
,
word2vec_cpu
)
{
Main
(
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_1
)
{
MainThreads
(
1
,
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_4
)
{
MainThreads
(
4
,
false
/*use_gpu*/
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
demo
,
word2vec_gpu
)
{
Main
(
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_1
)
{
MainThreads
(
1
,
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_4
)
{
MainThreads
(
4
,
true
/*use_gpu*/
);
}
#endif
}
// namespace demo
}
// namespace demo
}
// namespace paddle
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
paddle
::
demo
::
Main
(
false
/* use_gpu*/
);
paddle
::
demo
::
MainThreads
(
1
,
false
/* use_gpu*/
);
paddle
::
demo
::
MainThreads
(
4
,
false
/* use_gpu*/
);
if
(
FLAGS_use_gpu
)
{
paddle
::
demo
::
Main
(
true
/*use_gpu*/
);
paddle
::
demo
::
MainThreads
(
1
,
true
/*use_gpu*/
);
paddle
::
demo
::
MainThreads
(
4
,
true
/*use_gpu*/
);
}
return
0
;
}
paddle/contrib/inference/paddle_inference_api.map
0 → 100644
浏览文件 @
b8ea7a08
{
global:
*paddle*;
local:
*;
};
paddle/contrib/inference/paddle_inference_api.sym
0 → 100644
浏览文件 @
b8ea7a08
*paddle*
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
b8ea7a08
...
@@ -13,6 +13,12 @@ endif()
...
@@ -13,6 +13,12 @@ endif()
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
SRCS io.cc
...
...
paddle/fluid/inference/paddle_fluid.sym
0 → 100644
浏览文件 @
b8ea7a08
*paddle*
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
b8ea7a08
...
@@ -29,6 +29,79 @@ using mkldnn::stream;
...
@@ -29,6 +29,79 @@ using mkldnn::stream;
using
platform
::
to_void_cast
;
using
platform
::
to_void_cast
;
using
platform
::
GetMKLDNNFormat
;
using
platform
::
GetMKLDNNFormat
;
class
ConvMKLDNNHandler
:
public
platform
::
MKLDNNHandler
{
public:
ConvMKLDNNHandler
(
std
::
shared_ptr
<
mkldnn
::
convolution_forward
::
primitive_desc
>
conv_pd
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
)
{
conv_pd_
=
conv_pd
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemoryFromPrimitive
(
void
*
ptr
)
{
return
this
->
AcquireMemoryFromPrimitive
(
conv_pd_
->
dst_primitive_desc
(),
ptr
,
"@dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemoryFromPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
auto
src_pd
=
conv_pd_
->
src_primitive_desc
();
auto
user_pd
=
user_memory_p
->
get_primitive_desc
();
return
this
->
AcquireMemory
(
src_pd
,
user_pd
,
user_memory_p
,
"@src_mem_p"
,
pipeline
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWeightsMemoryFromPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_weights_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
auto
user_weights_pd
=
user_weights_memory_p
->
get_primitive_desc
();
auto
weights_pd
=
conv_pd_
->
weights_primitive_desc
();
return
this
->
AcquireMemory
(
weights_pd
,
user_weights_pd
,
user_weights_memory_p
,
"@weights_mem_p"
,
pipeline
);
}
std
::
shared_ptr
<
mkldnn
::
convolution_forward
>
AcquireConvolution
(
std
::
shared_ptr
<
mkldnn
::
memory
>
src_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
weights_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
)
{
auto
prim_key
=
key_
+
"@conv_p"
;
auto
prim_desc_key
=
key_
+
"@conv_pd"
;
auto
conv_p
=
std
::
static_pointer_cast
<
mkldnn
::
convolution_forward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
conv_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find convolution primitive in device context"
);
if
(
conv_p
==
nullptr
)
{
conv_p
=
std
::
make_shared
<
mkldnn
::
convolution_forward
>
(
*
conv_pd_
,
*
(
src_memory_p
),
*
(
weights_memory_p
.
get
()),
*
(
dst_memory_p
.
get
()));
dev_ctx_
.
SetBlob
(
prim_key
,
conv_p
);
}
else
{
is_reusing_
=
true
;
}
return
conv_p
;
}
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static
std
::
string
GetHash
(
memory
::
dims
&
input_dims
,
memory
::
dims
&
weights_dims
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
dilations
,
int
groups
,
const
std
::
string
&
suffix
)
{
return
dims2str
(
input_dims
)
+
dims2str
(
weights_dims
)
+
dims2str
(
strides
)
+
dims2str
(
paddings
)
+
dims2str
(
dilations
)
+
std
::
to_string
(
groups
)
+
suffix
;
}
private:
std
::
shared_ptr
<
mkldnn
::
convolution_forward
::
primitive_desc
>
conv_pd_
;
};
template
<
typename
T
>
template
<
typename
T
>
class
ConvMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
ConvMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
"It must use CPUPlace."
);
// Get unique name for index
const
std
::
string
key
=
ctx
.
op
().
Output
(
"Output"
);
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
auto
&
dev_ctx
=
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// create mkldnn memory from input tensors (data/weights)
// Get unique name for storing MKLDNN primitives
auto
user_src_memory
=
memory
(
const
std
::
string
key
=
ConvMKLDNNHandler
::
GetHash
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
src_tz
,
weights_tz
,
strides
,
paddings
,
dilations
,
groups
,
to_void_cast
(
input_data
));
ctx
.
op
().
Output
(
"Output"
));
auto
user_weights_memory
=
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
memory
({{{
weights_tz
},
memory
::
data_type
::
f32
,
filter
->
format
()},
mkldnn_engine
},
std
::
vector
<
primitive
>
pipeline
;
to_void_cast
(
filter_data
));
auto
user_src_md
=
platform
::
MKLDNNMemDesc
(
{
src_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
());
auto
user_weights_md
=
platform
::
MKLDNNMemDesc
(
{
weights_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
filter
->
format
());
/* create memory descriptor for convolution without specified format
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
* ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance
* the memory format preferred for best performance
*/
*/
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
memory
::
data_type
::
f32
,
auto
src_md
=
platform
::
MKLDNNMemDesc
(
memory
::
format
::
any
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
auto
weights_md
=
platform
::
MKLDNNMemDesc
(
auto
weights_md
=
platform
::
MKLDNNMemDesc
(
weights_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
weights_tz
,
platform
::
MKLDNNGetDataType
<
T
>
()
,
memory
::
format
::
any
);
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
dst_tz
,
memory
::
data_type
::
f32
,
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
memory
::
format
::
any
);
dst_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
// create a conv primitive descriptor and save it for usage in backward
// create a conv primitive descriptor and save it for usage in backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
conv_pd
=
ConvFwdPrimitiveDesc
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
);
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
);
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
// create reorder primitive if the input format is not the preferred one
ConvMKLDNNHandler
handler
(
conv_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
src_memory
=
user_src_memory
;
primitive
reorder_src
;
bool
is_src_reordered
=
false
;
if
(
memory
::
primitive_desc
(
conv_pd
->
src_primitive_desc
())
!=
user_src_memory
.
get_primitive_desc
())
{
src_memory
=
memory
(
conv_pd
->
src_primitive_desc
());
reorder_src
=
reorder
(
user_src_memory
,
src_memory
);
is_src_reordered
=
true
;
}
auto
weights_memory
=
user_weights_memory
;
primitive
reorder_weights
;
bool
is_weights_reordered
=
false
;
if
(
memory
::
primitive_desc
(
conv_pd
->
weights_primitive_desc
())
!=
user_weights_memory
.
get_primitive_desc
())
{
weights_memory
=
memory
(
conv_pd
->
weights_primitive_desc
());
reorder_weights
=
reorder
(
user_weights_memory
,
weights_memory
);
is_weights_reordered
=
true
;
}
// create memory primitive for conv dst
// create mkldnn memory from input tensors (data/weights)
auto
dst_memory
=
memory
(
conv_pd
->
dst_primitive_desc
(),
output_data
);
auto
user_src_memory_p
=
handler
.
AcquireSrcMemory
(
user_src_md
,
to_void_cast
<
T
>
(
input_data
));
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
// create reorder primitive if the input format is not the preferred one
auto
src_memory_p
=
handler
.
AcquireSrcMemoryFromPrimitive
(
user_src_memory_p
,
pipeline
);
auto
weights_memory_p
=
handler
.
AcquireWeightsMemoryFromPrimitive
(
user_weights_memory_p
,
pipeline
);
auto
dst_memory_p
=
handler
.
AcquireDstMemoryFromPrimitive
(
to_void_cast
<
T
>
(
output_data
));
// create convolution op primitive
// create convolution op primitive
auto
conv_prim
=
conv_fwd
(
*
conv_pd
,
src_memory
,
weights_memory
,
dst_memory
);
auto
conv_p
=
handler
.
AcquireConvolution
(
src_memory_p
,
weights_memory_p
,
dst_memory_p
);
// push primitive to stream and wait until it's executed
// push primitive to stream and wait until it's executed
std
::
vector
<
primitive
>
pipeline
;
pipeline
.
push_back
(
*
conv_p
);
if
(
is_src_reordered
)
pipeline
.
push_back
(
reorder_src
);
if
(
is_weights_reordered
)
pipeline
.
push_back
(
reorder_weights
);
pipeline
.
push_back
(
conv_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetMKLDNNFormat
(
dst_memory
));
output
->
set_format
(
GetMKLDNNFormat
(
*
dst_memory_p
));
}
}
private:
private:
...
@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
if
(
!
input_grad
&&
!
filter_grad
)
return
;
if
(
!
input_grad
&&
!
filter_grad
)
return
;
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const
std
::
string
key
=
ctx
.
op
().
Input
(
"Output"
);
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
...
@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const
std
::
string
key
=
ConvMKLDNNHandler
::
GetHash
(
src_tz
,
weights_tz
,
strides
,
paddings
,
dilations
,
groups
,
ctx
.
op
().
Input
(
"Output"
));
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
// create mkldnn memory from input tensors (input/weights/output_grad)
// create mkldnn memory from input tensors (input/weights/output_grad)
auto
user_src_memory
=
memory
(
auto
user_src_memory
=
memory
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
...
...
paddle/fluid/operators/detection/prior_box_op.cc
浏览文件 @
b8ea7a08
...
@@ -149,6 +149,13 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -149,6 +149,13 @@ class PriorBoxOpMaker : public framework::OpProtoAndCheckerMaker {
"(float) "
"(float) "
"Prior boxes center offset."
)
"Prior boxes center offset."
)
.
SetDefault
(
0.5
);
.
SetDefault
(
0.5
);
AddAttr
<
bool
>
(
"min_max_aspect_ratios_order"
,
"(bool) If set True, the output prior box is in order of"
"[min, max, aspect_ratios], which is consistent with Caffe."
"Please note, this order affects the weights order of convolution layer"
"followed by and does not affect the final detection results."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Prior box operator
Prior box operator
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
Generate prior boxes for SSD(Single Shot MultiBox Detector) algorithm.
...
...
paddle/fluid/operators/detection/prior_box_op.cu
浏览文件 @
b8ea7a08
...
@@ -28,8 +28,8 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height,
...
@@ -28,8 +28,8 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height,
const
int
im_width
,
const
int
as_num
,
const
int
im_width
,
const
int
as_num
,
const
T
offset
,
const
T
step_width
,
const
T
offset
,
const
T
step_width
,
const
T
step_height
,
const
T
*
min_sizes
,
const
T
step_height
,
const
T
*
min_sizes
,
const
T
*
max_sizes
,
const
int
min_num
,
const
T
*
max_sizes
,
const
int
min_num
,
bool
is_clip
,
bool
is_clip
)
{
bool
min_max_aspect_ratios_order
)
{
int
num_priors
=
max_sizes
?
as_num
*
min_num
+
min_num
:
as_num
*
min_num
;
int
num_priors
=
max_sizes
?
as_num
*
min_num
+
min_num
:
as_num
*
min_num
;
int
box_num
=
height
*
width
*
num_priors
;
int
box_num
=
height
*
width
*
num_priors
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
box_num
;
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
box_num
;
...
@@ -44,14 +44,28 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height,
...
@@ -44,14 +44,28 @@ __global__ void GenPriorBox(T* out, const T* aspect_ratios, const int height,
T
min_size
=
min_sizes
[
m
];
T
min_size
=
min_sizes
[
m
];
if
(
max_sizes
)
{
if
(
max_sizes
)
{
int
s
=
p
%
(
as_num
+
1
);
int
s
=
p
%
(
as_num
+
1
);
if
(
s
<
as_num
)
{
if
(
!
min_max_aspect_ratios_order
)
{
T
ar
=
aspect_ratios
[
s
];
if
(
s
<
as_num
)
{
bw
=
min_size
*
sqrt
(
ar
)
/
2.
;
T
ar
=
aspect_ratios
[
s
];
bh
=
min_size
/
sqrt
(
ar
)
/
2.
;
bw
=
min_size
*
sqrt
(
ar
)
/
2.
;
bh
=
min_size
/
sqrt
(
ar
)
/
2.
;
}
else
{
T
max_size
=
max_sizes
[
m
];
bw
=
sqrt
(
min_size
*
max_size
)
/
2.
;
bh
=
bw
;
}
}
else
{
}
else
{
T
max_size
=
max_sizes
[
m
];
if
(
s
==
0
)
{
bw
=
sqrt
(
min_size
*
max_size
)
/
2.
;
bw
=
bh
=
min_size
/
2.
;
bh
=
bw
;
}
else
if
(
s
==
1
)
{
T
max_size
=
max_sizes
[
m
];
bw
=
sqrt
(
min_size
*
max_size
)
/
2.
;
bh
=
bw
;
}
else
{
T
ar
=
aspect_ratios
[
s
-
1
];
bw
=
min_size
*
sqrt
(
ar
)
/
2.
;
bh
=
min_size
/
sqrt
(
ar
)
/
2.
;
}
}
}
}
else
{
}
else
{
int
s
=
p
%
as_num
;
int
s
=
p
%
as_num
;
...
@@ -94,6 +108,8 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -94,6 +108,8 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
auto
variances
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
auto
variances
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
auto
flip
=
ctx
.
Attr
<
bool
>
(
"flip"
);
auto
flip
=
ctx
.
Attr
<
bool
>
(
"flip"
);
auto
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
auto
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
auto
min_max_aspect_ratios_order
=
ctx
.
Attr
<
bool
>
(
"min_max_aspect_ratios_order"
);
std
::
vector
<
float
>
aspect_ratios
;
std
::
vector
<
float
>
aspect_ratios
;
ExpandAspectRatios
(
input_aspect_ratio
,
flip
,
&
aspect_ratios
);
ExpandAspectRatios
(
input_aspect_ratio
,
flip
,
&
aspect_ratios
);
...
@@ -149,7 +165,7 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
...
@@ -149,7 +165,7 @@ class PriorBoxOpCUDAKernel : public framework::OpKernel<T> {
GenPriorBox
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
GenPriorBox
<
T
><<<
grid
,
block
,
0
,
stream
>>>
(
boxes
->
data
<
T
>
(),
r
.
data
<
T
>
(),
height
,
width
,
im_height
,
im_width
,
boxes
->
data
<
T
>
(),
r
.
data
<
T
>
(),
height
,
width
,
im_height
,
im_width
,
aspect_ratios
.
size
(),
offset
,
step_width
,
step_height
,
min
.
data
<
T
>
(),
aspect_ratios
.
size
(),
offset
,
step_width
,
step_height
,
min
.
data
<
T
>
(),
max_data
,
min_num
,
clip
);
max_data
,
min_num
,
clip
,
min_max_aspect_ratios_order
);
framework
::
Tensor
v
;
framework
::
Tensor
v
;
framework
::
TensorFromVector
(
variances
,
ctx
.
device_context
(),
&
v
);
framework
::
TensorFromVector
(
variances
,
ctx
.
device_context
(),
&
v
);
...
...
paddle/fluid/operators/detection/prior_box_op.h
浏览文件 @
b8ea7a08
...
@@ -68,6 +68,8 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -68,6 +68,8 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
auto
variances
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
auto
variances
=
ctx
.
Attr
<
std
::
vector
<
float
>>
(
"variances"
);
auto
flip
=
ctx
.
Attr
<
bool
>
(
"flip"
);
auto
flip
=
ctx
.
Attr
<
bool
>
(
"flip"
);
auto
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
auto
clip
=
ctx
.
Attr
<
bool
>
(
"clip"
);
auto
min_max_aspect_ratios_order
=
ctx
.
Attr
<
bool
>
(
"min_max_aspect_ratios_order"
);
std
::
vector
<
float
>
aspect_ratios
;
std
::
vector
<
float
>
aspect_ratios
;
ExpandAspectRatios
(
input_aspect_ratio
,
flip
,
&
aspect_ratios
);
ExpandAspectRatios
(
input_aspect_ratio
,
flip
,
&
aspect_ratios
);
...
@@ -108,26 +110,59 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
...
@@ -108,26 +110,59 @@ class PriorBoxOpKernel : public framework::OpKernel<T> {
int
idx
=
0
;
int
idx
=
0
;
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
for
(
size_t
s
=
0
;
s
<
min_sizes
.
size
();
++
s
)
{
auto
min_size
=
min_sizes
[
s
];
auto
min_size
=
min_sizes
[
s
];
// priors with different aspect ratios
if
(
min_max_aspect_ratios_order
)
{
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
box_width
=
box_height
=
min_size
/
2.
;
float
ar
=
aspect_ratios
[
r
];
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
}
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
idx
++
;
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
}
// priors with different aspect ratios
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
float
ar
=
aspect_ratios
[
r
];
if
(
fabs
(
ar
-
1.
)
<
1e-6
)
{
continue
;
}
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
}
}
else
{
// priors with different aspect ratios
for
(
size_t
r
=
0
;
r
<
aspect_ratios
.
size
();
++
r
)
{
float
ar
=
aspect_ratios
[
r
];
box_width
=
min_size
*
sqrt
(
ar
)
/
2.
;
box_height
=
min_size
/
sqrt
(
ar
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
}
if
(
max_sizes
.
size
()
>
0
)
{
auto
max_size
=
max_sizes
[
s
];
// square prior with size sqrt(minSize * maxSize)
box_width
=
box_height
=
sqrt
(
min_size
*
max_size
)
/
2.
;
e_boxes
(
h
,
w
,
idx
,
0
)
=
(
center_x
-
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
1
)
=
(
center_y
-
box_height
)
/
img_height
;
e_boxes
(
h
,
w
,
idx
,
2
)
=
(
center_x
+
box_width
)
/
img_width
;
e_boxes
(
h
,
w
,
idx
,
3
)
=
(
center_y
+
box_height
)
/
img_height
;
idx
++
;
}
}
}
}
}
}
}
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
b8ea7a08
...
@@ -222,15 +222,16 @@ class MKLDNNHandler {
...
@@ -222,15 +222,16 @@ class MKLDNNHandler {
static
std
::
string
GetHash
(
mkldnn
::
memory
::
dims
&
operand_dims
,
// NOLINT
static
std
::
string
GetHash
(
mkldnn
::
memory
::
dims
&
operand_dims
,
// NOLINT
const
std
::
string
&
suffix
)
{
const
std
::
string
&
suffix
)
{
auto
dims2str
=
[](
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
};
return
dims2str
(
operand_dims
)
+
suffix
;
return
dims2str
(
operand_dims
)
+
suffix
;
};
protected:
static
std
::
string
dims2str
(
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
}
}
protected:
protected:
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
b8ea7a08
...
@@ -510,11 +510,23 @@ function gen_fluid_inference_lib() {
...
@@ -510,11 +510,23 @@ function gen_fluid_inference_lib() {
EOF
EOF
make
-j
`
nproc
`
inference_lib_dist
make
-j
`
nproc
`
inference_lib_dist
cd
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
mv
fluid_install_dir fluid
cp
-r
fluid_install_dir fluid
tar
-cf
fluid.tgz fluid
tar
-cf
fluid.tgz fluid
fi
fi
}
}
function
test_fluid_inference_lib
()
{
if
[
${
WITH_C_API
:-
OFF
}
==
"OFF"
]
;
then
cat
<<
EOF
========================================
Testing fluid inference library ...
========================================
EOF
cd
${
PADDLE_ROOT
}
/paddle/contrib/inference/demo_ci
sh run.sh
${
PADDLE_ROOT
}
${
WITH_MKL
:-
ON
}
${
WITH_GPU
:-
OFF
}
fi
}
function
main
()
{
function
main
()
{
set
-e
set
-e
local
CMD
=
$1
local
CMD
=
$1
...
@@ -568,6 +580,7 @@ function main() {
...
@@ -568,6 +580,7 @@ function main() {
run_test
run_test
gen_capi_package
gen_capi_package
gen_fluid_inference_lib
gen_fluid_inference_lib
test_fluid_inference_lib
;;
;;
*
)
*
)
print_usage
print_usage
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
b8ea7a08
...
@@ -789,7 +789,8 @@ def prior_box(input,
...
@@ -789,7 +789,8 @@ def prior_box(input,
clip
=
False
,
clip
=
False
,
steps
=
[
0.0
,
0.0
],
steps
=
[
0.0
,
0.0
],
offset
=
0.5
,
offset
=
0.5
,
name
=
None
):
name
=
None
,
min_max_aspect_ratios_order
=
False
):
"""
"""
**Prior Box Operator**
**Prior Box Operator**
...
@@ -818,6 +819,11 @@ def prior_box(input,
...
@@ -818,6 +819,11 @@ def prior_box(input,
Default: [0., 0.]
Default: [0., 0.]
offset(float): Prior boxes center offset. Default: 0.5
offset(float): Prior boxes center offset. Default: 0.5
name(str): Name of the prior box op. Default: None.
name(str): Name of the prior box op. Default: None.
min_max_aspect_ratios_order(bool): If set True, the output prior box is
in order of [min, max, aspect_ratios], which is consistent with
Caffe. Please note, this order affects the weights order of
convolution layer followed by and does not affect the final
detection results. Default: False.
Returns:
Returns:
tuple: A tuple with two Variable (boxes, variances)
tuple: A tuple with two Variable (boxes, variances)
...
@@ -871,7 +877,8 @@ def prior_box(input,
...
@@ -871,7 +877,8 @@ def prior_box(input,
'clip'
:
clip
,
'clip'
:
clip
,
'step_w'
:
steps
[
0
],
'step_w'
:
steps
[
0
],
'step_h'
:
steps
[
1
],
'step_h'
:
steps
[
1
],
'offset'
:
offset
'offset'
:
offset
,
'min_max_aspect_ratios_order'
:
min_max_aspect_ratios_order
}
}
if
max_sizes
is
not
None
and
len
(
max_sizes
)
>
0
and
max_sizes
[
0
]
>
0
:
if
max_sizes
is
not
None
and
len
(
max_sizes
)
>
0
and
max_sizes
[
0
]
>
0
:
if
not
_is_list_or_tuple_
(
max_sizes
):
if
not
_is_list_or_tuple_
(
max_sizes
):
...
@@ -911,7 +918,8 @@ def multi_box_head(inputs,
...
@@ -911,7 +918,8 @@ def multi_box_head(inputs,
kernel_size
=
1
,
kernel_size
=
1
,
pad
=
0
,
pad
=
0
,
stride
=
1
,
stride
=
1
,
name
=
None
):
name
=
None
,
min_max_aspect_ratios_order
=
False
):
"""
"""
Generate prior boxes for SSD(Single Shot MultiBox Detector)
Generate prior boxes for SSD(Single Shot MultiBox Detector)
algorithm. The details of this algorithm, please refer the
algorithm. The details of this algorithm, please refer the
...
@@ -954,6 +962,11 @@ def multi_box_head(inputs,
...
@@ -954,6 +962,11 @@ def multi_box_head(inputs,
pad(int|list|tuple): The padding of conv2d. Default:0.
pad(int|list|tuple): The padding of conv2d. Default:0.
stride(int|list|tuple): The stride of conv2d. Default:1,
stride(int|list|tuple): The stride of conv2d. Default:1,
name(str): Name of the prior box layer. Default: None.
name(str): Name of the prior box layer. Default: None.
min_max_aspect_ratios_order(bool): If set True, the output prior box is
in order of [min, max, aspect_ratios], which is consistent with
Caffe. Please note, this order affects the weights order of
convolution layer followed by and does not affect the fininal
detection results. Default: False.
Returns:
Returns:
tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
tuple: A tuple with four Variables. (mbox_loc, mbox_conf, boxes, variances)
...
@@ -1068,7 +1081,8 @@ def multi_box_head(inputs,
...
@@ -1068,7 +1081,8 @@ def multi_box_head(inputs,
step
=
[
step_w
[
i
]
if
step_w
else
0.0
,
step_h
[
i
]
if
step_w
else
0.0
]
step
=
[
step_w
[
i
]
if
step_w
else
0.0
,
step_h
[
i
]
if
step_w
else
0.0
]
box
,
var
=
prior_box
(
input
,
image
,
min_size
,
max_size
,
aspect_ratio
,
box
,
var
=
prior_box
(
input
,
image
,
min_size
,
max_size
,
aspect_ratio
,
variance
,
flip
,
clip
,
step
,
offset
)
variance
,
flip
,
clip
,
step
,
offset
,
None
,
min_max_aspect_ratios_order
)
box_results
.
append
(
box
)
box_results
.
append
(
box
)
var_results
.
append
(
var
)
var_results
.
append
(
var
)
...
...
python/paddle/fluid/tests/unittests/test_prior_box_op.py
浏览文件 @
b8ea7a08
...
@@ -32,6 +32,7 @@ class TestPriorBoxOp(OpTest):
...
@@ -32,6 +32,7 @@ class TestPriorBoxOp(OpTest):
'variances'
:
self
.
variances
,
'variances'
:
self
.
variances
,
'flip'
:
self
.
flip
,
'flip'
:
self
.
flip
,
'clip'
:
self
.
clip
,
'clip'
:
self
.
clip
,
'min_max_aspect_ratios_order'
:
self
.
min_max_aspect_ratios_order
,
'step_w'
:
self
.
step_w
,
'step_w'
:
self
.
step_w
,
'step_h'
:
self
.
step_h
,
'step_h'
:
self
.
step_h
,
'offset'
:
self
.
offset
'offset'
:
self
.
offset
...
@@ -52,6 +53,9 @@ class TestPriorBoxOp(OpTest):
...
@@ -52,6 +53,9 @@ class TestPriorBoxOp(OpTest):
max_sizes
=
[
5
,
10
]
max_sizes
=
[
5
,
10
]
self
.
max_sizes
=
np
.
array
(
max_sizes
).
astype
(
'float32'
).
tolist
()
self
.
max_sizes
=
np
.
array
(
max_sizes
).
astype
(
'float32'
).
tolist
()
def
set_min_max_aspect_ratios_order
(
self
):
self
.
min_max_aspect_ratios_order
=
False
def
init_test_params
(
self
):
def
init_test_params
(
self
):
self
.
layer_w
=
32
self
.
layer_w
=
32
self
.
layer_h
=
32
self
.
layer_h
=
32
...
@@ -71,6 +75,7 @@ class TestPriorBoxOp(OpTest):
...
@@ -71,6 +75,7 @@ class TestPriorBoxOp(OpTest):
self
.
set_max_sizes
()
self
.
set_max_sizes
()
self
.
aspect_ratios
=
[
2.0
,
3.0
]
self
.
aspect_ratios
=
[
2.0
,
3.0
]
self
.
flip
=
True
self
.
flip
=
True
self
.
set_min_max_aspect_ratios_order
()
self
.
real_aspect_ratios
=
[
1
,
2.0
,
1.0
/
2.0
,
3.0
,
1.0
/
3.0
]
self
.
real_aspect_ratios
=
[
1
,
2.0
,
1.0
/
2.0
,
3.0
,
1.0
/
3.0
]
self
.
aspect_ratios
=
np
.
array
(
self
.
aspect_ratios
=
np
.
array
(
self
.
aspect_ratios
,
dtype
=
np
.
float
).
flatten
()
self
.
aspect_ratios
,
dtype
=
np
.
float
).
flatten
()
...
@@ -78,7 +83,6 @@ class TestPriorBoxOp(OpTest):
...
@@ -78,7 +83,6 @@ class TestPriorBoxOp(OpTest):
self
.
variances
=
np
.
array
(
self
.
variances
,
dtype
=
np
.
float
).
flatten
()
self
.
variances
=
np
.
array
(
self
.
variances
,
dtype
=
np
.
float
).
flatten
()
self
.
clip
=
True
self
.
clip
=
True
self
.
num_priors
=
len
(
self
.
real_aspect_ratios
)
*
len
(
self
.
min_sizes
)
self
.
num_priors
=
len
(
self
.
real_aspect_ratios
)
*
len
(
self
.
min_sizes
)
if
len
(
self
.
max_sizes
)
>
0
:
if
len
(
self
.
max_sizes
)
>
0
:
self
.
num_priors
+=
len
(
self
.
max_sizes
)
self
.
num_priors
+=
len
(
self
.
max_sizes
)
...
@@ -106,26 +110,60 @@ class TestPriorBoxOp(OpTest):
...
@@ -106,26 +110,60 @@ class TestPriorBoxOp(OpTest):
idx
=
0
idx
=
0
for
s
in
range
(
len
(
self
.
min_sizes
)):
for
s
in
range
(
len
(
self
.
min_sizes
)):
min_size
=
self
.
min_sizes
[
s
]
min_size
=
self
.
min_sizes
[
s
]
# rest of priors
if
not
self
.
min_max_aspect_ratios_order
:
for
r
in
range
(
len
(
self
.
real_aspect_ratios
)):
# rest of priors
ar
=
self
.
real_aspect_ratios
[
r
]
for
r
in
range
(
len
(
self
.
real_aspect_ratios
)):
c_w
=
min_size
*
math
.
sqrt
(
ar
)
/
2
ar
=
self
.
real_aspect_ratios
[
r
]
c_h
=
(
min_size
/
math
.
sqrt
(
ar
))
/
2
c_w
=
min_size
*
math
.
sqrt
(
ar
)
/
2
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
c_h
=
(
min_size
/
math
.
sqrt
(
ar
))
/
2
(
c_y
-
c_h
)
/
self
.
image_h
,
out_boxes
[
h
,
w
,
idx
,
:]
=
[
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
(
c_y
+
c_h
)
/
self
.
image_h
]
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
idx
+=
1
(
c_y
+
c_h
)
/
self
.
image_h
]
if
len
(
self
.
max_sizes
)
>
0
:
idx
+=
1
max_size
=
self
.
max_sizes
[
s
]
# second prior: aspect_ratio = 1,
if
len
(
self
.
max_sizes
)
>
0
:
c_w
=
c_h
=
math
.
sqrt
(
min_size
*
max_size
)
/
2
max_size
=
self
.
max_sizes
[
s
]
# second prior: aspect_ratio = 1,
c_w
=
c_h
=
math
.
sqrt
(
min_size
*
max_size
)
/
2
out_boxes
[
h
,
w
,
idx
,
:]
=
[
(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
]
idx
+=
1
else
:
c_w
=
c_h
=
min_size
/
2.
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
out_boxes
[
h
,
w
,
idx
,
:]
=
[(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
]
(
c_y
+
c_h
)
/
self
.
image_h
]
idx
+=
1
idx
+=
1
if
len
(
self
.
max_sizes
)
>
0
:
max_size
=
self
.
max_sizes
[
s
]
# second prior: aspect_ratio = 1,
c_w
=
c_h
=
math
.
sqrt
(
min_size
*
max_size
)
/
2
out_boxes
[
h
,
w
,
idx
,
:]
=
[
(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
]
idx
+=
1
# rest of priors
for
r
in
range
(
len
(
self
.
real_aspect_ratios
)):
ar
=
self
.
real_aspect_ratios
[
r
]
if
abs
(
ar
-
1.
)
<
1e-6
:
continue
c_w
=
min_size
*
math
.
sqrt
(
ar
)
/
2
c_h
=
(
min_size
/
math
.
sqrt
(
ar
))
/
2
out_boxes
[
h
,
w
,
idx
,
:]
=
[
(
c_x
-
c_w
)
/
self
.
image_w
,
(
c_y
-
c_h
)
/
self
.
image_h
,
(
c_x
+
c_w
)
/
self
.
image_w
,
(
c_y
+
c_h
)
/
self
.
image_h
]
idx
+=
1
# clip the prior's coordidate such that it is within[0, 1]
# clip the prior's coordidate such that it is within[0, 1]
if
self
.
clip
:
if
self
.
clip
:
...
@@ -137,10 +175,15 @@ class TestPriorBoxOp(OpTest):
...
@@ -137,10 +175,15 @@ class TestPriorBoxOp(OpTest):
self
.
out_var
=
out_var
.
astype
(
'float32'
)
self
.
out_var
=
out_var
.
astype
(
'float32'
)
class
TestPriorBoxOpWithMaxSize
(
TestPriorBoxOp
):
class
TestPriorBoxOpWith
out
MaxSize
(
TestPriorBoxOp
):
def
set_max_sizes
(
self
):
def
set_max_sizes
(
self
):
self
.
max_sizes
=
[]
self
.
max_sizes
=
[]
class
TestPriorBoxOpWithSpecifiedOutOrder
(
TestPriorBoxOp
):
def
set_min_max_aspect_ratios_order
(
self
):
self
.
min_max_aspect_ratios_order
=
True
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录