Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
9130a884
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9130a884
编写于
7月 11, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into fix_is_taged
上级
a685566c
e568acbe
变更
27
隐藏空白更改
内联
并排
Showing
27 changed file
with
876 addition
and
258 deletion
+876
-258
paddle/contrib/inference/CMakeLists.txt
paddle/contrib/inference/CMakeLists.txt
+16
-1
paddle/contrib/inference/check_symbol.sh
paddle/contrib/inference/check_symbol.sh
+12
-0
paddle/contrib/inference/demo/CMakeLists.txt
paddle/contrib/inference/demo/CMakeLists.txt
+0
-2
paddle/contrib/inference/demo_ci/CMakeLists.txt
paddle/contrib/inference/demo_ci/CMakeLists.txt
+77
-0
paddle/contrib/inference/demo_ci/run.sh
paddle/contrib/inference/demo_ci/run.sh
+34
-0
paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
+39
-23
paddle/contrib/inference/paddle_inference_api.map
paddle/contrib/inference/paddle_inference_api.map
+6
-0
paddle/contrib/inference/paddle_inference_api.sym
paddle/contrib/inference/paddle_inference_api.sym
+1
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+6
-0
paddle/fluid/inference/analysis/data_flow_graph.cc
paddle/fluid/inference/analysis/data_flow_graph.cc
+85
-1
paddle/fluid/inference/analysis/data_flow_graph.h
paddle/fluid/inference/analysis/data_flow_graph.h
+36
-0
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
+67
-2
paddle/fluid/inference/paddle_fluid.sym
paddle/fluid/inference/paddle_fluid.sym
+1
-0
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+120
-52
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+3
-2
paddle/fluid/operators/im2sequence_op.cc
paddle/fluid/operators/im2sequence_op.cc
+18
-7
paddle/fluid/operators/im2sequence_op.h
paddle/fluid/operators/im2sequence_op.h
+91
-34
paddle/fluid/operators/math/im2col.cc
paddle/fluid/operators/math/im2col.cc
+0
-26
paddle/fluid/operators/math/im2col.cu
paddle/fluid/operators/math/im2col.cu
+0
-30
paddle/fluid/platform/CMakeLists.txt
paddle/fluid/platform/CMakeLists.txt
+1
-1
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+9
-8
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+14
-1
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+39
-21
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+40
-12
python/paddle/fluid/tests/unittests/test_im2sequence_op.py
python/paddle/fluid/tests/unittests/test_im2sequence_op.py
+147
-33
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+6
-2
python/setup.py.in
python/setup.py.in
+8
-0
未找到文件。
paddle/contrib/inference/CMakeLists.txt
浏览文件 @
9130a884
...
@@ -45,6 +45,10 @@ endfunction(inference_api_test)
...
@@ -45,6 +45,10 @@ endfunction(inference_api_test)
cc_library
(
paddle_inference_api
cc_library
(
paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
if
(
NOT APPLE
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_inference_api.sym"
)
set_target_properties
(
paddle_inference_api PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
# Here the shared library doesn't depend on other fluid libraries, or double free will occur.
# Here the shared library doesn't depend on other fluid libraries, or double free will occur.
cc_library
(
paddle_inference_api_shared SHARED
cc_library
(
paddle_inference_api_shared SHARED
...
@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
...
@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
set_target_properties
(
paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
set
(
LINK_FLAGS
"-
fPIC -fvisibility=hidden
"
)
set
(
LINK_FLAGS
"-
Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_inference_api.map
"
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
FILE
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/check_symbol.cmake
"execute_process(COMMAND bash -c
\"
${
CMAKE_CURRENT_SOURCE_DIR
}
/check_symbol.sh"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/libpaddle_inference_api.so
\"
RESULT_VARIABLE symbol_res)
\n
"
"if(NOT
\"\$
{symbol_res}
\"
STREQUAL
\"
0
\"
)
\n
"
" message(FATAL_ERROR
\"
Check symbol failed.
\"
)
\n
"
"endif()
\n
"
)
add_custom_command
(
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/.check_symbol"
COMMAND
${
CMAKE_COMMAND
}
-P
"
${
CMAKE_CURRENT_BINARY_DIR
}
/check_symbol.cmake"
DEPENDS paddle_inference_api_shared
)
add_custom_target
(
check_symbol ALL DEPENDS
"
${
CMAKE_CURRENT_BINARY_DIR
}
/.check_symbol"
)
endif
()
endif
()
cc_test
(
test_paddle_inference_api
cc_test
(
test_paddle_inference_api
...
...
paddle/contrib/inference/check_symbol.sh
0 → 100755
浏览文件 @
9130a884
#!/bin/bash
lib
=
$1
if
[
$#
-ne
1
]
;
then
echo
"No input library"
;
exit
-1
;
fi
num_paddle_syms
=
$(
nm
-D
--defined-only
${
lib
}
|
grep
paddle |
wc
-l
)
num_google_syms
=
$(
nm
-D
--defined-only
${
lib
}
|
grep
google |
wc
-l
)
if
[
$num_paddle_syms
-le
0
]
;
then
echo
"Have no paddle symbols"
;
exit
-1
;
fi
if
[
$num_google_syms
-ge
1
]
;
then
echo
"Have some google symbols"
;
exit
-1
;
fi
exit
0
paddle/contrib/inference/demo/CMakeLists.txt
浏览文件 @
9130a884
...
@@ -13,8 +13,6 @@
...
@@ -13,8 +13,6 @@
# limitations under the License.
# limitations under the License.
#
#
inference_api_test
(
simple_on_word2vec ARGS test_word2vec
)
option
(
WITH_INFERENCE_DEMO
"Compile with Inference demo"
OFF
)
option
(
WITH_INFERENCE_DEMO
"Compile with Inference demo"
OFF
)
if
(
NOT WITH_INFERENCE_DEMO
)
if
(
NOT WITH_INFERENCE_DEMO
)
return
()
return
()
...
...
paddle/contrib/inference/demo_ci/CMakeLists.txt
0 → 100644
浏览文件 @
9130a884
cmake_minimum_required
(
VERSION 3.0
)
project
(
cpp_inference_demo CXX C
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11"
)
if
(
NOT DEFINED PADDLE_LIB
)
message
(
FATAL_ERROR
"please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib"
)
endif
()
if
(
NOT DEFINED DEMO_NAME
)
message
(
FATAL_ERROR
"please set DEMO_NAME with -DDEMO_NAME=demo_name"
)
endif
()
option
(
WITH_MKL
"Compile demo with MKL/OpenBlas support, default use MKL."
ON
)
option
(
WITH_GPU
"Compile demo with GPU/CPU, default use CPU."
OFF
)
option
(
WITH_STATIC_LIB
"Compile demo with static/shared library, default use static."
ON
)
if
(
WITH_GPU
)
set
(
CUDA_LIB
"/usr/local/cuda/lib64/"
CACHE STRING
"CUDA Library"
)
endif
()
include_directories
(
"
${
PADDLE_LIB
}
"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/eigen3"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
add_executable
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5.so
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
endif
()
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
()
if
(
WITH_STATIC_LIB
)
set
(
DEPS
"-Wl,--whole-archive"
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid.a
"-Wl,--no-whole-archive"
${
PADDLE_LIB
}
/contrib/inference/libpaddle_inference_api.a
)
else
()
# Note: libpaddle_inference_api.so must put before libpaddle_fluid.so
set
(
DEPS
${
PADDLE_LIB
}
/contrib/inference/libpaddle_inference_api.so
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid.so
)
endif
()
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
DEPS
${
DEPS
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
glog gflags protobuf snappystream snappy z
${
EXTERNAL_LIB
}
)
if
(
WITH_GPU
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/libcudart.so
)
endif
()
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
paddle/contrib/inference/demo_ci/run.sh
0 → 100755
浏览文件 @
9130a884
set
-x
PADDLE_ROOT
=
$1
WITH_MKL
=
$2
WITH_GPU
=
$3
if
[
$3
==
"ON"
]
;
then
use_gpu_list
=
'true false'
else
use_gpu_list
=
'false'
fi
mkdir
-p
build
cd
build
for
WITH_STATIC_LIB
in
false
;
do
rm
-rf
*
cmake ..
-DPADDLE_LIB
=
${
PADDLE_ROOT
}
/build/fluid_install_dir/
\
-DWITH_MKL
=
$WITH_MKL
\
-DDEMO_NAME
=
simple_on_word2vec
\
-DWITH_GPU
=
$WITH_GPU
\
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
make
for
use_gpu
in
$use_gpu_list
;
do
./simple_on_word2vec
\
--dirname
=
${
PADDLE_ROOT
}
/build/python/paddle/fluid/tests/book/word2vec.inference.model
\
--use_gpu
=
$use_gpu
done
done
if
[
$?
-eq
0
]
;
then
exit
0
else
echo
"inference demo runs fail."
exit
1
fi
set
+x
paddle/contrib/inference/demo/simple_on_word2vec.cc
→
paddle/contrib/inference/demo
_ci
/simple_on_word2vec.cc
浏览文件 @
9130a884
...
@@ -16,21 +16,27 @@ limitations under the License. */
...
@@ -16,21 +16,27 @@ limitations under the License. */
* This file contains a simple demo for how to take a model for inference.
* This file contains a simple demo for how to take a model for inference.
*/
*/
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <memory>
#include <thread>
#include <thread>
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
namespace
paddle
{
namespace
paddle
{
namespace
demo
{
namespace
demo
{
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
void
Main
(
bool
use_gpu
)
{
void
Main
(
bool
use_gpu
)
{
//# 1. Create PaddlePredictor with a config.
//# 1. Create PaddlePredictor with a config.
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
if
(
FLAGS_dirname
.
empty
())
{
LOG
(
INFO
)
<<
"Usage: ./simple_on_word2vec --dirname=path/to/your/model"
;
exit
(
1
);
}
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
config
.
device
=
0
;
...
@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
...
@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
//# 4. Get output.
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
}
}
}
}
}
...
@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
...
@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
// Multi-threads only support on CPU
// Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config.
// 0. Create PaddlePredictor with a config.
NativeConfig
config
;
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_gpu
=
use_gpu
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
config
.
device
=
0
;
...
@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
...
@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
CHECK
(
predictor
->
Run
(
inputs
,
&
outputs
));
CHECK
(
predictor
->
Run
(
inputs
,
&
outputs
));
// 4. Get output.
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
// Check the output buffer size and result of each tid.
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
}
}
}
});
});
...
@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
...
@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
}
}
}
}
TEST
(
demo
,
word2vec_cpu
)
{
Main
(
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_1
)
{
MainThreads
(
1
,
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_4
)
{
MainThreads
(
4
,
false
/*use_gpu*/
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
demo
,
word2vec_gpu
)
{
Main
(
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_1
)
{
MainThreads
(
1
,
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_4
)
{
MainThreads
(
4
,
true
/*use_gpu*/
);
}
#endif
}
// namespace demo
}
// namespace demo
}
// namespace paddle
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
paddle
::
demo
::
Main
(
false
/* use_gpu*/
);
paddle
::
demo
::
MainThreads
(
1
,
false
/* use_gpu*/
);
paddle
::
demo
::
MainThreads
(
4
,
false
/* use_gpu*/
);
if
(
FLAGS_use_gpu
)
{
paddle
::
demo
::
Main
(
true
/*use_gpu*/
);
paddle
::
demo
::
MainThreads
(
1
,
true
/*use_gpu*/
);
paddle
::
demo
::
MainThreads
(
4
,
true
/*use_gpu*/
);
}
return
0
;
}
paddle/contrib/inference/paddle_inference_api.map
0 → 100644
浏览文件 @
9130a884
{
global:
*paddle*;
local:
*;
};
paddle/contrib/inference/paddle_inference_api.sym
0 → 100644
浏览文件 @
9130a884
*paddle*
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
9130a884
...
@@ -13,6 +13,12 @@ endif()
...
@@ -13,6 +13,12 @@ endif()
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
SRCS io.cc
...
...
paddle/fluid/inference/analysis/data_flow_graph.cc
浏览文件 @
9130a884
...
@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const {
...
@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const {
return
dot
.
Build
();
return
dot
.
Build
();
}
}
std
::
string
DataFlowGraph
::
HumanReadableInfo
(
bool
show_values
,
bool
show_functions
)
const
{
std
::
stringstream
values
,
functions
;
for
(
auto
&
n
:
nodes
.
nodes
())
{
if
(
show_values
&&
n
->
IsValue
())
{
values
<<
n
->
repr
()
<<
"
\n
"
;
}
if
(
show_functions
&&
n
->
IsFunction
())
{
functions
<<
n
->
repr
()
<<
"
\n
"
;
}
}
return
"Values:
\n
"
+
values
.
str
()
+
"
\n\n
"
+
"Functions:
\n
"
+
functions
.
str
();
}
//
//
// NodesBFSIterator
// NodesBFSIterator
//
//
...
@@ -146,7 +160,7 @@ bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
...
@@ -146,7 +160,7 @@ bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
if
((
!
queue_
.
empty
())
&&
(
!
other
.
queue_
.
empty
()))
{
if
((
!
queue_
.
empty
())
&&
(
!
other
.
queue_
.
empty
()))
{
return
queue_
.
front
()
==
other
.
queue_
.
front
()
&&
return
queue_
.
front
()
==
other
.
queue_
.
front
()
&&
visited_
.
size
()
==
other
.
visited_
.
size
();
// here need to check the
visited_
.
size
()
==
other
.
visited_
.
size
();
// here need to check the
// equality of queue and
// equality of queue and
// visited. Just a light but week implementation.
// visited. Just a light but week implementation.
}
}
return
false
;
return
false
;
...
@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
...
@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return
stack_
.
top
();
return
stack_
.
top
();
}
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inlinks
.
begin
(),
p
->
inlinks
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
{
return
visited
.
count
(
x
);
});
if
(
inlink_visited
.
size
()
==
p
->
inlinks
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outlinks
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
=
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
==
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph.h
浏览文件 @
9130a884
...
@@ -48,6 +48,9 @@ struct DataFlowGraph {
...
@@ -48,6 +48,9 @@ struct DataFlowGraph {
// Output a DOT graph file for debug.
// Output a DOT graph file for debug.
std
::
string
DotString
()
const
;
std
::
string
DotString
()
const
;
std
::
string
HumanReadableInfo
(
bool
show_values
=
true
,
bool
show_functions
=
true
)
const
;
private:
private:
// Remove duplicate edges and so on.
// Remove duplicate edges and so on.
void
Clean
();
void
Clean
();
...
@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> {
...
@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> {
std
::
unordered_set
<
Node
*>
visited_
;
std
::
unordered_set
<
Node
*>
visited_
;
};
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesTSIterator
()
=
default
;
explicit
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
Node
*
operator
->
();
private:
std
::
vector
<
Node
*>
sorted_
;
int
cursor_
{
0
};
};
explicit
GraphTraits
(
DataFlowGraph
*
graph
)
:
graph_
(
graph
)
{}
explicit
GraphTraits
(
DataFlowGraph
*
graph
)
:
graph_
(
graph
)
{}
// default use BFS to visit the nodes.
// default use BFS to visit the nodes.
...
@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> {
...
@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> {
iterator_range
<
NodesDFSIterator
>
nodes_in_DFS
()
{
iterator_range
<
NodesDFSIterator
>
nodes_in_DFS
()
{
return
iterator_range
<
NodesDFSIterator
>
(
nodes_dfs_begin
(),
nodes_dfs_end
());
return
iterator_range
<
NodesDFSIterator
>
(
nodes_dfs_begin
(),
nodes_dfs_end
());
}
}
iterator_range
<
NodesTSIterator
>
nodes_in_TS
()
{
return
iterator_range
<
NodesTSIterator
>
(
nodes_ts_begin
(),
nodes_ts_end
());
}
private:
private:
NodesBFSIterator
nodes_bfs_begin
()
{
NodesBFSIterator
nodes_bfs_begin
()
{
return
NodesBFSIterator
(
graph_
->
inputs
);
return
NodesBFSIterator
(
graph_
->
inputs
);
}
}
NodesBFSIterator
nodes_bfs_end
()
{
return
NodesBFSIterator
();
}
NodesBFSIterator
nodes_bfs_end
()
{
return
NodesBFSIterator
();
}
NodesDFSIterator
nodes_dfs_begin
()
{
NodesDFSIterator
nodes_dfs_begin
()
{
return
NodesDFSIterator
(
graph_
->
inputs
);
return
NodesDFSIterator
(
graph_
->
inputs
);
}
}
NodesDFSIterator
nodes_dfs_end
()
{
return
NodesDFSIterator
();
}
NodesDFSIterator
nodes_dfs_end
()
{
return
NodesDFSIterator
();
}
NodesTSIterator
nodes_ts_begin
()
{
return
NodesTSIterator
(
graph_
->
inputs
);
}
NodesTSIterator
nodes_ts_end
()
{
return
NodesTSIterator
();
}
private:
private:
DataFlowGraph
*
graph_
;
DataFlowGraph
*
graph_
;
};
};
...
...
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
浏览文件 @
9130a884
...
@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) {
...
@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) {
auto
dfg
=
ProgramDescToDFG
(
desc
);
auto
dfg
=
ProgramDescToDFG
(
desc
);
dfg
.
Build
();
dfg
.
Build
();
for
(
auto
*
in
:
dfg
.
inputs
)
{
for
(
auto
*
in
:
dfg
.
inputs
)
{
LOG
(
INFO
)
<<
"inputs: "
<<
in
->
name
()
<<
" "
LOG
(
INFO
)
<<
"inputs: "
<<
in
->
name
()
<<
" "
<<
static_cast
<
int
>
(
in
->
type
());
<<
static_cast
<
int
>
(
in
->
type
());
}
}
for
(
auto
*
out
:
dfg
.
outputs
)
{
for
(
auto
*
out
:
dfg
.
outputs
)
{
LOG
(
INFO
)
<<
"outputs: "
<<
out
->
name
()
<<
" "
LOG
(
INFO
)
<<
"outputs: "
<<
out
->
name
()
<<
" "
<<
static_cast
<
int
>
(
out
->
type
());
<<
static_cast
<
int
>
(
out
->
type
());
}
}
...
@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) {
...
@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) {
ASSERT_EQ
(
count
,
dfg
.
nodes
.
size
());
ASSERT_EQ
(
count
,
dfg
.
nodes
.
size
());
}
}
// Topological sorting.
/*
* Graph topology
* inputs: 0, 1, 2
* 0 -> 4
* 0 -> 5
* 1 -> 6
* 2 -> 7
* 4 -> 5
* 4 -> 7
* 4 -> 3
* 7 -> 3
*/
TEST
(
DataFlowGraph
,
TS
)
{
DataFlowGraph
graph
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
auto
*
node
=
graph
.
nodes
.
Create
(
Node
::
Type
::
kValue
);
node
->
SetName
(
"node-"
+
std
::
to_string
(
i
));
}
auto
add_link
=
[
&
](
int
i
,
int
j
)
{
Node
*
source
=
graph
.
nodes
.
GetMutable
(
i
);
Node
*
target
=
graph
.
nodes
.
GetMutable
(
j
);
target
->
inlinks
.
push_back
(
source
);
source
->
outlinks
.
push_back
(
target
);
};
graph
.
inputs
.
push_back
(
graph
.
nodes
.
GetMutable
(
0
));
graph
.
inputs
.
push_back
(
graph
.
nodes
.
GetMutable
(
1
));
graph
.
inputs
.
push_back
(
graph
.
nodes
.
GetMutable
(
2
));
add_link
(
0
,
4
);
add_link
(
0
,
5
);
add_link
(
1
,
6
);
add_link
(
2
,
7
);
add_link
(
4
,
5
);
add_link
(
4
,
7
);
add_link
(
4
,
3
);
add_link
(
7
,
3
);
auto
its
=
GraphTraits
<
DataFlowGraph
>
(
&
graph
).
nodes_in_TS
();
std
::
vector
<
int
>
sorted_ids
;
for
(
auto
it
=
its
.
begin
();
it
!=
its
.
end
();
++
it
)
{
LOG
(
INFO
)
<<
it
->
name
();
sorted_ids
.
push_back
(
it
->
id
());
}
// Assert a occurs prior to b in the sorted_ids.
auto
assert_positive_sequence_pair
=
[
&
](
int
a
,
int
b
)
{
auto
a_offset
=
std
::
find
(
sorted_ids
.
begin
(),
sorted_ids
.
end
(),
a
);
auto
b_offset
=
std
::
find
(
sorted_ids
.
begin
(),
sorted_ids
.
end
(),
b
);
ASSERT_LT
(
a_offset
,
b_offset
);
};
assert_positive_sequence_pair
(
2
,
7
);
assert_positive_sequence_pair
(
7
,
3
);
assert_positive_sequence_pair
(
4
,
3
);
assert_positive_sequence_pair
(
0
,
4
);
assert_positive_sequence_pair
(
0
,
5
);
assert_positive_sequence_pair
(
1
,
6
);
assert_positive_sequence_pair
(
4
,
5
);
assert_positive_sequence_pair
(
4
,
7
);
}
}
// namespace analysis
}
// namespace analysis
}
// namespace inference
}
// namespace inference
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/paddle_fluid.sym
0 → 100644
浏览文件 @
9130a884
*paddle*
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
9130a884
...
@@ -29,6 +29,79 @@ using mkldnn::stream;
...
@@ -29,6 +29,79 @@ using mkldnn::stream;
using
platform
::
to_void_cast
;
using
platform
::
to_void_cast
;
using
platform
::
GetMKLDNNFormat
;
using
platform
::
GetMKLDNNFormat
;
class
ConvMKLDNNHandler
:
public
platform
::
MKLDNNHandler
{
public:
ConvMKLDNNHandler
(
std
::
shared_ptr
<
mkldnn
::
convolution_forward
::
primitive_desc
>
conv_pd
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
)
{
conv_pd_
=
conv_pd
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemoryFromPrimitive
(
void
*
ptr
)
{
return
this
->
AcquireMemoryFromPrimitive
(
conv_pd_
->
dst_primitive_desc
(),
ptr
,
"@dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemoryFromPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
auto
src_pd
=
conv_pd_
->
src_primitive_desc
();
auto
user_pd
=
user_memory_p
->
get_primitive_desc
();
return
this
->
AcquireMemory
(
src_pd
,
user_pd
,
user_memory_p
,
"@src_mem_p"
,
pipeline
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWeightsMemoryFromPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_weights_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
auto
user_weights_pd
=
user_weights_memory_p
->
get_primitive_desc
();
auto
weights_pd
=
conv_pd_
->
weights_primitive_desc
();
return
this
->
AcquireMemory
(
weights_pd
,
user_weights_pd
,
user_weights_memory_p
,
"@weights_mem_p"
,
pipeline
);
}
std
::
shared_ptr
<
mkldnn
::
convolution_forward
>
AcquireConvolution
(
std
::
shared_ptr
<
mkldnn
::
memory
>
src_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
weights_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
)
{
auto
prim_key
=
key_
+
"@conv_p"
;
auto
prim_desc_key
=
key_
+
"@conv_pd"
;
auto
conv_p
=
std
::
static_pointer_cast
<
mkldnn
::
convolution_forward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
conv_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find convolution primitive in device context"
);
if
(
conv_p
==
nullptr
)
{
conv_p
=
std
::
make_shared
<
mkldnn
::
convolution_forward
>
(
*
conv_pd_
,
*
(
src_memory_p
),
*
(
weights_memory_p
.
get
()),
*
(
dst_memory_p
.
get
()));
dev_ctx_
.
SetBlob
(
prim_key
,
conv_p
);
}
else
{
is_reusing_
=
true
;
}
return
conv_p
;
}
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static
std
::
string
GetHash
(
memory
::
dims
&
input_dims
,
memory
::
dims
&
weights_dims
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
dilations
,
int
groups
,
const
std
::
string
&
suffix
)
{
return
dims2str
(
input_dims
)
+
dims2str
(
weights_dims
)
+
dims2str
(
strides
)
+
dims2str
(
paddings
)
+
dims2str
(
dilations
)
+
std
::
to_string
(
groups
)
+
suffix
;
}
private:
std
::
shared_ptr
<
mkldnn
::
convolution_forward
::
primitive_desc
>
conv_pd_
;
};
template
<
typename
T
>
template
<
typename
T
>
class
ConvMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
class
ConvMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
public:
...
@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
"It must use CPUPlace."
);
// Get unique name for index
const
std
::
string
key
=
ctx
.
op
().
Output
(
"Output"
);
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
auto
&
dev_ctx
=
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// create mkldnn memory from input tensors (data/weights)
// Get unique name for storing MKLDNN primitives
auto
user_src_memory
=
memory
(
const
std
::
string
key
=
ConvMKLDNNHandler
::
GetHash
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
src_tz
,
weights_tz
,
strides
,
paddings
,
dilations
,
groups
,
to_void_cast
(
input_data
));
ctx
.
op
().
Output
(
"Output"
));
auto
user_weights_memory
=
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
memory
({{{
weights_tz
},
memory
::
data_type
::
f32
,
filter
->
format
()},
mkldnn_engine
},
std
::
vector
<
primitive
>
pipeline
;
to_void_cast
(
filter_data
));
auto
user_src_md
=
platform
::
MKLDNNMemDesc
(
{
src_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
());
auto
user_weights_md
=
platform
::
MKLDNNMemDesc
(
{
weights_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
filter
->
format
());
/* create memory descriptor for convolution without specified format
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
* ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance
* the memory format preferred for best performance
*/
*/
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
memory
::
data_type
::
f32
,
auto
src_md
=
platform
::
MKLDNNMemDesc
(
memory
::
format
::
any
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
auto
weights_md
=
platform
::
MKLDNNMemDesc
(
auto
weights_md
=
platform
::
MKLDNNMemDesc
(
weights_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
weights_tz
,
platform
::
MKLDNNGetDataType
<
T
>
()
,
memory
::
format
::
any
);
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
dst_tz
,
memory
::
data_type
::
f32
,
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
memory
::
format
::
any
);
dst_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
// create a conv primitive descriptor and save it for usage in backward
// create a conv primitive descriptor and save it for usage in backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
conv_pd
=
ConvFwdPrimitiveDesc
(
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
);
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
);
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
// create reorder primitive if the input format is not the preferred one
ConvMKLDNNHandler
handler
(
conv_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
src_memory
=
user_src_memory
;
primitive
reorder_src
;
bool
is_src_reordered
=
false
;
if
(
memory
::
primitive_desc
(
conv_pd
->
src_primitive_desc
())
!=
user_src_memory
.
get_primitive_desc
())
{
src_memory
=
memory
(
conv_pd
->
src_primitive_desc
());
reorder_src
=
reorder
(
user_src_memory
,
src_memory
);
is_src_reordered
=
true
;
}
auto
weights_memory
=
user_weights_memory
;
primitive
reorder_weights
;
bool
is_weights_reordered
=
false
;
if
(
memory
::
primitive_desc
(
conv_pd
->
weights_primitive_desc
())
!=
user_weights_memory
.
get_primitive_desc
())
{
weights_memory
=
memory
(
conv_pd
->
weights_primitive_desc
());
reorder_weights
=
reorder
(
user_weights_memory
,
weights_memory
);
is_weights_reordered
=
true
;
}
// create memory primitive for conv dst
// create mkldnn memory from input tensors (data/weights)
auto
dst_memory
=
memory
(
conv_pd
->
dst_primitive_desc
(),
output_data
);
auto
user_src_memory_p
=
handler
.
AcquireSrcMemory
(
user_src_md
,
to_void_cast
<
T
>
(
input_data
));
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
// create reorder primitive if the input format is not the preferred one
auto
src_memory_p
=
handler
.
AcquireSrcMemoryFromPrimitive
(
user_src_memory_p
,
pipeline
);
auto
weights_memory_p
=
handler
.
AcquireWeightsMemoryFromPrimitive
(
user_weights_memory_p
,
pipeline
);
auto
dst_memory_p
=
handler
.
AcquireDstMemoryFromPrimitive
(
to_void_cast
<
T
>
(
output_data
));
// create convolution op primitive
// create convolution op primitive
auto
conv_prim
=
conv_fwd
(
*
conv_pd
,
src_memory
,
weights_memory
,
dst_memory
);
auto
conv_p
=
handler
.
AcquireConvolution
(
src_memory_p
,
weights_memory_p
,
dst_memory_p
);
// push primitive to stream and wait until it's executed
// push primitive to stream and wait until it's executed
std
::
vector
<
primitive
>
pipeline
;
pipeline
.
push_back
(
*
conv_p
);
if
(
is_src_reordered
)
pipeline
.
push_back
(
reorder_src
);
if
(
is_weights_reordered
)
pipeline
.
push_back
(
reorder_weights
);
pipeline
.
push_back
(
conv_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetMKLDNNFormat
(
dst_memory
));
output
->
set_format
(
GetMKLDNNFormat
(
*
dst_memory_p
));
}
}
private:
private:
...
@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
if
(
!
input_grad
&&
!
filter_grad
)
return
;
if
(
!
input_grad
&&
!
filter_grad
)
return
;
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const
std
::
string
key
=
ctx
.
op
().
Input
(
"Output"
);
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
...
@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const
std
::
string
key
=
ConvMKLDNNHandler
::
GetHash
(
src_tz
,
weights_tz
,
strides
,
paddings
,
dilations
,
groups
,
ctx
.
op
().
Input
(
"Output"
));
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
// create mkldnn memory from input tensors (input/weights/output_grad)
// create mkldnn memory from input tensors (input/weights/output_grad)
auto
user_src_memory
=
memory
(
auto
user_src_memory
=
memory
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
9130a884
...
@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
...
@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
std
::
minstd_rand
engine
,
std
::
minstd_rand
engine
,
std
::
vector
<
int
>*
inds
)
const
{
std
::
vector
<
int
>*
inds
)
const
{
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
if
(
inds
->
size
()
>
num
)
{
const
int64_t
size
=
static_cast
<
int64_t
>
(
inds
->
size
());
for
(
int
i
=
num
;
i
<
inds
->
size
();
++
i
)
{
if
(
size
>
num
)
{
for
(
int64_t
i
=
num
;
i
<
size
;
++
i
)
{
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
if
(
rng_ind
<
num
)
if
(
rng_ind
<
num
)
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
+
offset
,
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
+
offset
,
...
...
paddle/fluid/operators/im2sequence_op.cc
浏览文件 @
9130a884
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h"
#include "paddle/fluid/operators/im2sequence_op.h"
#include <string>
#include <vector>
#include <vector>
namespace
paddle
{
namespace
paddle
{
...
@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
...
@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
"Input(X) of Im2SequenceOp should not be null."
);
"Input(X) of Im2SequenceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of Im2SequenceOp op should not be null."
);
"Output(Out) of Im2SequenceOp op should not be null."
);
auto
in_dim
=
ctx
->
GetInputDim
(
"X"
);
auto
in_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
in_dim
.
size
(),
4
,
PADDLE_ENFORCE_EQ
(
in_dim
.
size
(),
4
,
"Input(X) format must be 4D tensor, eg., NCHW."
);
"Input(X) format must be 4D tensor, eg., NCHW."
);
auto
kernels
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
batch_size
=
in_dim
[
0
];
int
batch_size
=
in_dim
[
0
];
int
img_channels
=
in_dim
[
1
];
int
img_channels
=
in_dim
[
1
];
int
img_height
=
in_dim
[
2
];
int
img_height
=
in_dim
[
2
];
int
img_width
=
in_dim
[
3
];
int
img_width
=
in_dim
[
3
];
auto
kernels
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]);
paddings
[
2
],
strides
[
0
]);
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
...
@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"C: channels"
"C: channels"
"H: height"
"H: height"
"W: width"
);
"W: width"
);
AddInput
(
"Y"
,
"(Tensor) The input tensor of image real size(H, W)."
"2-D with shape [batchsize, 2]"
)
.
AsDispensable
();
AddOutput
(
"Out"
,
"(LodTensor) The output data of im2sequence op,"
);
AddOutput
(
"Out"
,
"(LodTensor) The output data of im2sequence op,"
);
AddAttr
<
std
::
vector
<
int
>>
(
"kernels"
,
AddAttr
<
std
::
vector
<
int
>>
(
"kernels"
,
"(vector<int>), the "
"(vector<int>), the "
...
@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"(vector<int> default:{0, 0, 0, 0}), the "
"(vector<int> default:{0, 0, 0, 0}), the "
"paddings(up_pad, left_pad, down_pad, right_pad)"
)
"paddings(up_pad, left_pad, down_pad, right_pad)"
)
.
SetDefault
({
0
,
0
,
0
,
0
});
.
SetDefault
({
0
,
0
,
0
,
0
});
AddAttr
<
std
::
vector
<
int
>>
(
"out_stride"
,
"the attribute is valid only when input(Y)"
"is not NULL.this attribute represents the"
"scaling of the pic through the CNN"
"(vector<int> dedault:{1,1}),the out_stride"
" (out_stride_height, out_stride_width)"
)
.
SetDefault
({
1
,
1
});
AddComment
(
R"DOC(
AddComment
(
R"DOC(
This op uses kernels to scan images and converts these images to sequences.
This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width
After expanding, The number of time steps are output_height * output_width
...
@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
...
@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
[ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8,
9
}
output.dims = {8,
8
}
output.lod = [[0, 4, 8]]
output.lod = [[0, 4, 8]]
)DOC"
);
)DOC"
);
...
...
paddle/fluid/operators/im2sequence_op.h
浏览文件 @
9130a884
...
@@ -13,6 +13,7 @@
...
@@ -13,6 +13,7 @@
limitations under the License. */
limitations under the License. */
#pragma once
#pragma once
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/eigen.h"
...
@@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
...
@@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
const
Tensor
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
LoDTensor
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
LoDTensor
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// TODO(wanghaoshuang): Add layout checker after 'set_layout'
// being available for python API
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
// "Input(X) layout must be NCHW");
auto
in_dim
=
in
->
dims
();
auto
in_dim
=
in
->
dims
();
int
batch_size
=
in_dim
[
0
];
int
batch_size
=
in_dim
[
0
];
int
img_channels
=
in_dim
[
1
];
int
img_channels
=
in_dim
[
1
];
int
img_height
=
in_dim
[
2
];
int
img_height
=
in_dim
[
2
];
int
img_width
=
in_dim
[
3
];
int
img_width
=
in_dim
[
3
];
auto
kernels
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
kernels
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
auto
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
auto
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
if
(
ctx
.
HasInput
(
"Y"
)
&&
batch_size
>
1
)
{
paddings
[
2
],
strides
[
0
]);
const
Tensor
*
imgrealsize
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
auto
out_stride
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"out_stride"
);
paddings
[
3
],
strides
[
1
]);
Tensor
cpu_shape_tensor
;
TensorCopySync
(
*
imgrealsize
,
platform
::
CPUPlace
(),
&
cpu_shape_tensor
);
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
std
::
vector
<
int
>
imgreal_h
;
std
::
vector
<
int
>
imgreal_w
;
auto
out_dims
=
out
->
dims
();
std
::
vector
<
int
>
output_height
;
out
->
Resize
({
batch_size
,
out
->
numel
()
/
batch_size
});
std
::
vector
<
int
>
output_width
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
int
result
=
0
;
const
Tensor
src
=
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
in
->
Slice
(
i
,
i
+
1
).
Resize
({
img_channels
,
img_height
,
img_width
});
int
tmp_real_h
=
static_cast
<
int
>
((
cpu_shape_tensor
.
data
<
T
>
())[
2
*
i
]);
Tensor
dst
=
out
->
Slice
(
i
,
i
+
1
).
Resize
(
int
tmp_real_w
=
{
output_height
,
output_width
,
img_channels
,
kernels
[
0
],
kernels
[
1
]});
static_cast
<
int
>
((
cpu_shape_tensor
.
data
<
T
>
())[
2
*
i
+
1
]);
if
(
tmp_real_h
%
out_stride
[
0
]
==
0
)
{
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
DeviceContext
,
T
>
f
;
tmp_real_h
=
tmp_real_h
/
out_stride
[
0
];
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
}
else
{
f
(
dev_ctx
,
src
,
dilations
,
strides
,
paddings
,
&
dst
);
tmp_real_h
=
tmp_real_h
/
out_stride
[
0
]
+
1
;
}
}
out
->
Resize
(
out_dims
);
if
(
tmp_real_w
%
out_stride
[
1
]
==
0
)
{
tmp_real_w
=
tmp_real_w
/
out_stride
[
1
];
// set lod information
}
else
{
// TODO(wanghaoshuang): Move this to InferShape
tmp_real_w
=
tmp_real_w
/
out_stride
[
1
]
+
1
;
framework
::
LoD
lod
(
1
);
}
lod
[
0
].
reserve
(
batch_size
+
1
);
imgreal_h
.
push_back
(
tmp_real_h
);
for
(
int
i
=
0
,
offset
=
0
;
i
<
batch_size
+
1
;
++
i
)
{
imgreal_w
.
push_back
(
tmp_real_w
);
output_height
.
push_back
(
Im2SeqOutputSize
(
imgreal_h
[
i
],
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]));
output_width
.
push_back
(
Im2SeqOutputSize
(
imgreal_w
[
i
],
kernels
[
1
],
paddings
[
1
],
paddings
[
3
],
strides
[
1
]));
result
+=
output_height
[
i
]
*
output_width
[
i
];
}
out
->
mutable_data
<
T
>
({
result
,
img_channels
*
kernels
[
0
]
*
kernels
[
1
]},
ctx
.
GetPlace
());
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
int
offset_out
=
0
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
Tensor
src
=
in
->
Slice
(
i
,
i
+
1
).
Resize
({
img_channels
,
img_height
,
img_width
});
Tensor
dst
=
out
->
Slice
(
offset_out
,
offset_out
+
output_height
[
i
]
*
output_width
[
i
])
.
Resize
({
output_height
[
i
],
output_width
[
i
],
img_channels
,
kernels
[
0
],
kernels
[
1
]});
offset_out
+=
output_height
[
i
]
*
output_width
[
i
];
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
DeviceContext
,
T
>
f
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
f
(
dev_ctx
,
src
,
dilations
,
strides
,
paddings
,
&
dst
);
}
framework
::
LoD
lod
(
1
);
lod
[
0
].
reserve
(
batch_size
+
1
);
int
offset
=
0
;
lod
[
0
].
push_back
(
offset
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
offset
+=
output_height
[
i
]
*
output_width
[
i
];
lod
[
0
].
push_back
(
offset
);
}
out
->
set_lod
(
lod
);
}
else
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]);
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
paddings
[
3
],
strides
[
1
]);
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
auto
out_dims
=
out
->
dims
();
out
->
Resize
({
batch_size
,
out
->
numel
()
/
batch_size
});
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
Tensor
src
=
in
->
Slice
(
i
,
i
+
1
).
Resize
({
img_channels
,
img_height
,
img_width
});
Tensor
dst
=
out
->
Slice
(
i
,
i
+
1
).
Resize
({
output_height
,
output_width
,
img_channels
,
kernels
[
0
],
kernels
[
1
]});
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
DeviceContext
,
T
>
f
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
f
(
dev_ctx
,
src
,
dilations
,
strides
,
paddings
,
&
dst
);
}
out
->
Resize
(
out_dims
);
framework
::
LoD
lod
(
1
);
lod
[
0
].
reserve
(
batch_size
+
1
);
int
offset
=
0
;
lod
[
0
].
push_back
(
offset
);
lod
[
0
].
push_back
(
offset
);
offset
+=
output_height
*
output_width
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
offset
+=
output_height
*
output_width
;
lod
[
0
].
push_back
(
offset
);
}
out
->
set_lod
(
lod
);
}
}
out
->
set_lod
(
lod
);
}
}
};
};
...
...
paddle/fluid/operators/math/im2col.cc
浏览文件 @
9130a884
...
@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int
col_height
=
col
->
dims
()[
3
];
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
((
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
)))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
((
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
)))
/
stride
[
1
]
+
1
,
col_width
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
const
T
*
im_data
=
im
.
data
<
T
>
();
...
@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int
col_height
=
col
->
dims
()[
0
];
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
filter_width
)
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
const
T
*
im_data
=
im
.
data
<
T
>
();
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
...
...
paddle/fluid/operators/math/im2col.cu
浏览文件 @
9130a884
...
@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
...
@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int
col_height
=
col
->
dims
()[
3
];
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
num_outputs
=
im_channels
*
col_height
*
col_width
;
int
num_outputs
=
im_channels
*
col_height
*
col_width
;
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
int
block_x
=
512
;
int
block_x
=
512
;
...
@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
...
@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int
col_height
=
col
->
dims
()[
0
];
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
block_dim_x
=
0
;
int
block_dim_x
=
0
;
int
block_dim_y
=
0
;
int
block_dim_y
=
0
;
if
(
filter_height
<=
4
&&
filter_width
<=
4
)
{
if
(
filter_height
<=
4
&&
filter_width
<=
4
)
{
...
...
paddle/fluid/platform/CMakeLists.txt
浏览文件 @
9130a884
...
@@ -46,7 +46,7 @@ ENDIF()
...
@@ -46,7 +46,7 @@ ENDIF()
# memcpy depends on device_context, here add deps individually for
# memcpy depends on device_context, here add deps individually for
# avoiding cycle dependencies
# avoiding cycle dependencies
cc_library
(
device_context SRCS device_context.cc init.cc DEPS malloc
cc_library
(
device_context SRCS device_context.cc init.cc DEPS malloc
place eigen3 stringpiece cpu_helper
${
GPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
)
place eigen3 stringpiece cpu_helper
framework_proto
${
GPU_CTX_DEPS
}
${
MKLDNN_CTX_DEPS
}
)
nv_test
(
device_context_test SRCS device_context_test.cu DEPS device_context gpu_info
)
nv_test
(
device_context_test SRCS device_context_test.cu DEPS device_context gpu_info
)
cc_test
(
init_test SRCS init_test.cc DEPS device_context
)
cc_test
(
init_test SRCS init_test.cc DEPS device_context
)
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
9130a884
...
@@ -222,15 +222,16 @@ class MKLDNNHandler {
...
@@ -222,15 +222,16 @@ class MKLDNNHandler {
static
std
::
string
GetHash
(
mkldnn
::
memory
::
dims
&
operand_dims
,
// NOLINT
static
std
::
string
GetHash
(
mkldnn
::
memory
::
dims
&
operand_dims
,
// NOLINT
const
std
::
string
&
suffix
)
{
const
std
::
string
&
suffix
)
{
auto
dims2str
=
[](
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
};
return
dims2str
(
operand_dims
)
+
suffix
;
return
dims2str
(
operand_dims
)
+
suffix
;
};
protected:
static
std
::
string
dims2str
(
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
}
}
protected:
protected:
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
9130a884
...
@@ -510,11 +510,23 @@ function gen_fluid_inference_lib() {
...
@@ -510,11 +510,23 @@ function gen_fluid_inference_lib() {
EOF
EOF
make
-j
`
nproc
`
inference_lib_dist
make
-j
`
nproc
`
inference_lib_dist
cd
${
PADDLE_ROOT
}
/build
cd
${
PADDLE_ROOT
}
/build
mv
fluid_install_dir fluid
cp
-r
fluid_install_dir fluid
tar
-cf
fluid.tgz fluid
tar
-cf
fluid.tgz fluid
fi
fi
}
}
function
test_fluid_inference_lib
()
{
if
[
${
WITH_C_API
:-
OFF
}
==
"OFF"
]
;
then
cat
<<
EOF
========================================
Testing fluid inference library ...
========================================
EOF
cd
${
PADDLE_ROOT
}
/paddle/contrib/inference/demo_ci
sh run.sh
${
PADDLE_ROOT
}
${
WITH_MKL
:-
ON
}
${
WITH_GPU
:-
OFF
}
fi
}
function
main
()
{
function
main
()
{
set
-e
set
-e
local
CMD
=
$1
local
CMD
=
$1
...
@@ -568,6 +580,7 @@ function main() {
...
@@ -568,6 +580,7 @@ function main() {
run_test
run_test
gen_capi_package
gen_capi_package
gen_fluid_inference_lib
gen_fluid_inference_lib
test_fluid_inference_lib
;;
;;
*
)
*
)
print_usage
print_usage
...
...
python/paddle/fluid/backward.py
浏览文件 @
9130a884
...
@@ -123,7 +123,8 @@ def _append_grad_suffix_(name):
...
@@ -123,7 +123,8 @@ def _append_grad_suffix_(name):
def
_addup_repetitive_outputs_
(
op_descs
):
def
_addup_repetitive_outputs_
(
op_descs
):
"""
"""
In backward part, an variable may be the output of more than one ops.
In backward part, an variable may be the output of more than one ops.
In this case, the variable should be the accumulation of all the outputs.
And one op may yield its multiple outputs to the same variable.
In these cases, the variable should be the accumulation of all the outputs.
`sum_op`s are added to implement the accumulate.
`sum_op`s are added to implement the accumulate.
"""
"""
pending_sum_ops
=
[]
pending_sum_ops
=
[]
...
@@ -136,29 +137,46 @@ def _addup_repetitive_outputs_(op_descs):
...
@@ -136,29 +137,46 @@ def _addup_repetitive_outputs_(op_descs):
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
{
"Out"
:
[
var_name
]},
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
{
"Out"
:
[
var_name
]},
{
"use_mkldnn"
:
False
}),
idx
))
{
"use_mkldnn"
:
False
}),
idx
))
renamed_vars
[
var_name
]
=
[
var_name
]
renamed_vars
[
var_name
]
=
[
var_name
]
for
var_name
in
op_desc
.
output_arg_names
():
for
param_idx
,
param_name
in
enumerate
(
op_desc
.
output_names
()):
if
var_name
==
core
.
empty_var_name
(
arg_names
=
op_desc
.
output
(
param_name
)
)
or
var_name
in
op_desc
.
input_arg_names
():
for
arg_idx
,
var_name
in
enumerate
(
arg_names
):
# empty variable or inplace op
if
var_name
==
core
.
empty_var_name
(
continue
)
or
var_name
in
op_desc
.
input_arg_names
():
if
len
(
renamed_vars
[
var_name
])
==
0
:
# empty variable or inplace op
# it's the first time we get the variable
continue
renamed_vars
[
var_name
]
=
[
var_name
]
if
len
(
renamed_vars
[
var_name
])
==
0
:
else
:
# it's the first time we get the variable
if
len
(
renamed_vars
[
var_name
])
==
1
:
renamed_vars
[
var_name
]
=
[
var_name
]
else
:
if
len
(
renamed_vars
[
var_name
])
==
1
:
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
# rename original var_name
renamed_vars
[
var_name
][
0
]
=
new_name
_rename_arg_
(
op_descs
,
var_name
,
new_name
,
0
,
idx
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
for
p
in
op_desc
.
output_names
()[:
param_idx
]:
p_arg_names
=
op_desc
.
output
(
p
)
if
var_name
in
p_arg_names
:
op_desc
.
set_output
(
p
,
[
new_name
if
x
==
var_name
else
x
for
x
in
p_arg_names
])
arg_names
=
[
new_name
if
x
==
var_name
else
x
for
x
in
arg_names
[:
arg_idx
]
]
+
arg_names
[
arg_idx
:]
new_name
=
var_name
+
"@RENAME@"
+
\
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
var_rename_count
[
var_name
]
+=
1
# rename original var_name
arg_names
[
arg_idx
]
=
new_name
renamed_vars
[
var_name
][
0
]
=
new_name
op_desc
.
set_output
(
param_name
,
arg_names
)
_rename_arg_
(
op_descs
,
var_name
,
new_name
,
0
,
idx
)
renamed_vars
[
var_name
].
append
(
new_name
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
op_desc
.
rename_output
(
var_name
,
new_name
)
renamed_vars
[
var_name
].
append
(
new_name
)
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
if
len
(
inputs
)
>
1
:
if
len
(
inputs
)
>
1
:
pending_sum_ops
.
append
(
pending_sum_ops
.
append
(
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
9130a884
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c ) 2018 PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None):
...
@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None):
return
out
return
out
def
im2sequence
(
input
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
name
=
None
):
def
im2sequence
(
input
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
input_image_size
=
None
,
out_stride
=
1
,
name
=
None
):
"""
"""
Extracts image patches from the input tensor to form a tensor of shape
Extracts image patches from the input tensor to form a tensor of shape
{input.batch_size * output_height * output_width, filter_size_H *
{input.batch_size * output_height * output_width, filter_size_H *
...
@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
...
@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
padding_up = padding_down = padding_left = padding_right = padding
padding_up = padding_down = padding_left = padding_right = padding
Default: padding = 0.
Default: padding = 0.
input_image_size(Variable): the input contains image real size.It's dim
is [batchsize, 2]. It is dispensable.It is just for batch inference.
out_stride(int|tuple): The scaling of image through CNN. It is
dispensable. It is valid only when input_image_size is not null.
If out_stride is tuple, it must contain two intergers,
(out_stride_H, out_stride_W). Otherwise,
the out_stride_H = out_stride_W = out_stride.
name (int): The name of this layer. It is optional.
name (int): The name of this layer. It is optional.
Returns:
Returns:
...
@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
...
@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8,
9
}
output.dims = {8,
8
}
output.lod = [[4, 4]]
output.lod = [[4, 4]]
...
@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
...
@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
if
len
(
padding
)
==
2
:
if
len
(
padding
)
==
2
:
padding
.
append
(
padding
[
0
])
padding
.
append
(
padding
[
0
])
padding
.
append
(
padding
[
1
])
padding
.
append
(
padding
[
1
])
inputs
=
{
"X"
:
input
}
attrs
=
{
"kernels"
:
filter_size
,
"strides"
:
stride
,
"padding"
:
padding
}
if
input_image_size
:
if
isinstance
(
out_stride
,
int
):
out_stride
=
[
out_stride
,
out_stride
]
inputs
[
"Y"
]
=
input_image_size
attrs
[
"out_stride"
]
=
out_stride
helper
=
LayerHelper
(
'im2sequence'
,
**
locals
())
helper
=
LayerHelper
(
'im2sequence'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
helper
.
append_op
(
type
=
'im2sequence'
,
type
=
'im2sequence'
,
inputs
=
inputs
,
outputs
=
{
'Out'
:
out
},
attrs
=
attrs
)
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'kernels'
:
filter_size
,
'strides'
:
stride
,
'paddings'
:
padding
,
})
return
out
return
out
...
...
python/paddle/fluid/tests/unittests/test_im2sequence_op.py
浏览文件 @
9130a884
...
@@ -16,23 +16,48 @@ import numpy as np
...
@@ -16,23 +16,48 @@ import numpy as np
from
op_test
import
OpTest
from
op_test
import
OpTest
def
get_output_shape
(
attrs
,
in_shape
):
def
get_output_shape
(
attrs
,
in_shape
,
img_real_size
):
batchsize
=
in_shape
[
0
]
img_height
=
in_shape
[
2
]
img_height
=
in_shape
[
2
]
img_width
=
in_shape
[
3
]
img_width
=
in_shape
[
3
]
paddings
=
np
.
array
(
attrs
[
'paddings'
]).
astype
(
"int32"
)
kernels
=
np
.
array
(
attrs
[
'kernels'
]).
astype
(
"int32"
)
strides
=
np
.
array
(
attrs
[
'strides'
]).
astype
(
"int32"
)
output_height
=
np
.
zeros
((
1
,
batchsize
)).
astype
(
"int32"
)
output_width
=
np
.
zeros
((
1
,
batchsize
)).
astype
(
"int32"
)
if
len
(
img_real_size
):
out_stride
=
np
.
array
(
attrs
[
'out_stride'
]).
astype
(
"int32"
)
imgreal_h
=
0
imgreal_w
=
0
for
index
in
range
(
batchsize
):
if
img_real_size
[
index
,
0
]
%
out_stride
[
0
]
==
0
:
imgreal_h
=
img_real_size
[
index
,
0
]
/
out_stride
[
0
]
else
:
imgreal_h
=
img_real_size
[
index
,
0
]
/
out_stride
[
0
]
+
1
if
img_real_size
[
index
,
0
]
%
out_stride
[
1
]
==
0
:
imgreal_w
=
img_real_size
[
index
,
1
]
/
out_stride
[
1
]
else
:
imgreal_w
=
img_real_size
[
index
,
0
]
/
out_stride
[
1
]
+
1
output_height
[
0
,
index
]
=
\
1
+
\
(
imgreal_h
+
paddings
[
0
]
+
paddings
[
2
]
-
kernels
[
0
]
+
strides
[
0
]
-
1
)
/
\
strides
[
0
]
paddings
=
attrs
[
'paddings'
]
output_width
[
0
,
index
]
=
\
kernels
=
attrs
[
'kernels'
]
1
+
\
strides
=
attrs
[
'strides'
]
(
imgreal_w
+
paddings
[
1
]
+
paddings
[
3
]
-
kernels
[
1
]
+
strides
[
1
]
-
1
)
/
\
strides
[
1
]
else
:
for
index
in
range
(
batchsize
):
output_height
[
0
,
index
]
=
\
1
+
\
(
img_height
+
paddings
[
0
]
+
paddings
[
2
]
-
kernels
[
0
]
+
strides
[
0
]
-
1
)
/
\
strides
[
0
]
output_height
=
\
output_width
[
0
,
index
]
=
\
1
+
\
1
+
\
(
img_height
+
paddings
[
0
]
+
paddings
[
2
]
-
kernels
[
0
]
+
strides
[
0
]
-
1
)
/
\
(
img_width
+
paddings
[
1
]
+
paddings
[
3
]
-
kernels
[
1
]
+
strides
[
1
]
-
1
)
/
\
strides
[
0
]
strides
[
1
]
output_width
=
\
1
+
\
(
img_width
+
paddings
[
1
]
+
paddings
[
3
]
-
kernels
[
1
]
+
strides
[
1
]
-
1
)
/
\
strides
[
1
]
return
output_height
,
output_width
return
output_height
,
output_width
...
@@ -75,22 +100,25 @@ def im2col(attrs, im, col):
...
@@ -75,22 +100,25 @@ def im2col(attrs, im, col):
im_row_offset
][
im_col_offset
]
im_row_offset
][
im_col_offset
]
def
Im2Sequence
(
inputs
,
attrs
):
def
Im2Sequence
(
inputs
,
img_real_size
,
attrs
):
output_height
,
output_width
=
get_output_shape
(
attrs
,
inputs
.
shape
)
output_height
,
output_width
=
get_output_shape
(
attrs
,
inputs
.
shape
,
img_real_size
)
img_channels
=
inputs
.
shape
[
1
]
img_channels
=
inputs
.
shape
[
1
]
batch_size
=
inputs
.
shape
[
0
]
batch_size
=
inputs
.
shape
[
0
]
out
=
np
.
zeros
([
out
=
[]
batch_size
,
output_height
,
output_width
,
img_channels
,
for
index
in
range
(
batch_size
):
attrs
[
'kernels'
][
0
],
attrs
[
'kernels'
][
1
]
tmp
=
np
.
zeros
([
]).
astype
(
"float32"
)
output_height
[
0
,
index
],
output_width
[
0
,
index
],
img_channels
,
attrs
[
'kernels'
][
0
],
attrs
[
'kernels'
][
1
]
for
i
in
range
(
len
(
inputs
)):
]).
astype
(
"float32"
)
im2col
(
attrs
,
inputs
[
i
],
out
[
i
])
out
.
append
(
tmp
)
for
index
in
range
(
len
(
inputs
)):
out
=
out
.
reshape
([
im2col
(
attrs
,
inputs
[
index
],
out
[
index
])
batch_size
*
output_height
*
output_width
,
out
[
index
]
=
out
[
index
].
reshape
([
img_channels
*
attrs
[
'kernels'
][
0
]
*
attrs
[
'kernels'
][
1
]
output_height
[
0
,
index
]
*
output_width
[
0
,
index
],
])
img_channels
*
attrs
[
'kernels'
][
0
]
*
attrs
[
'kernels'
][
1
]
])
out
=
np
.
concatenate
(
out
,
axis
=
0
)
return
out
return
out
...
@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest):
...
@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest):
self
.
attrs
=
{
self
.
attrs
=
{
'kernels'
:
[
2
,
2
],
'kernels'
:
[
2
,
2
],
'strides'
:
[
1
,
1
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
1
,
1
,
1
,
1
]
'paddings'
:
[
1
,
1
,
1
,
1
]
,
}
}
def
setUp
(
self
):
def
setUp
(
self
):
...
@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest):
...
@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest):
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
]).
astype
(
"float32"
)
out
=
Im2Sequence
(
x
,
self
.
attrs
)
real_size
=
np
.
array
([]).
astype
(
"float32"
)
out
=
Im2Sequence
(
x
,
real_size
,
self
.
attrs
)
self
.
inputs
=
{
'X'
:
x
}
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
out
}
self
.
outputs
=
{
'Out'
:
out
}
...
@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp):
...
@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp):
self
.
attrs
=
{
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'kernels'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'paddings'
:
[
2
,
1
,
2
,
1
]
'paddings'
:
[
2
,
1
,
2
,
1
]
,
}
}
class
TestBlockExpandOpCase3
(
TestBlockExpandOp
):
class
TestBlockExpandOpCase3
(
TestBlockExpandOp
):
def
config
(
self
):
def
config
(
self
):
self
.
batch_size
=
3
self
.
batch_size
=
2
self
.
img_channels
=
1
self
.
img_channels
=
1
self
.
img_height
=
4
self
.
img_height
=
4
self
.
img_width
=
5
self
.
img_width
=
5
self
.
attrs
=
{
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'kernels'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'paddings'
:
[
2
,
0
,
2
,
0
]
'paddings'
:
[
2
,
0
,
2
,
0
]
,
}
}
...
@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp):
...
@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp):
self
.
attrs
=
{
self
.
attrs
=
{
'kernels'
:
[
2
,
2
],
'kernels'
:
[
2
,
2
],
'strides'
:
[
1
,
1
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
0
,
0
,
0
,
0
]
'paddings'
:
[
0
,
0
,
0
,
0
],
}
class
TestBlockExpandOpCase5
(
OpTest
):
def
config
(
self
):
self
.
batch_size
=
1
self
.
img_channels
=
3
self
.
img_height
=
4
self
.
img_width
=
5
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'paddings'
:
[
2
,
1
,
2
,
1
],
'out_stride'
:
[
2
,
2
],
}
def
setUp
(
self
):
self
.
config
()
self
.
op_type
=
"im2sequence"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
real_size
=
np
.
array
([[
8
,
10
],
[
5
,
8
]]).
astype
(
"float32"
)
out
=
np
.
array
(
Im2Sequence
(
x
,
real_size
,
self
.
attrs
))
self
.
inputs
=
{
'X'
:
x
,
'Y'
:
real_size
}
#l ??
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestBlockExpandOpCase6
(
OpTest
):
def
config
(
self
):
self
.
batch_size
=
3
self
.
img_channels
=
1
self
.
img_height
=
4
self
.
img_width
=
5
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
0
,
0
,
0
,
0
],
'out_stride'
:
[
1
,
1
],
}
def
setUp
(
self
):
self
.
config
()
self
.
op_type
=
"im2sequence"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
real_size
=
np
.
array
([[
8
,
10
],
[
5
,
8
],
[
5
,
8
]]).
astype
(
"float32"
)
out
=
np
.
array
(
Im2Sequence
(
x
,
real_size
,
self
.
attrs
))
self
.
inputs
=
{
'X'
:
x
,
'Y'
:
real_size
}
#l ??
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestBlockExpandOpCase7
(
OpTest
):
def
config
(
self
):
self
.
batch_size
=
2
self
.
img_channels
=
2
self
.
img_height
=
3
self
.
img_width
=
3
self
.
attrs
=
{
'kernels'
:
[
2
,
2
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
1
,
0
,
1
,
0
],
'out_stride'
:
[
2
,
2
],
}
}
def
setUp
(
self
):
self
.
config
()
self
.
op_type
=
"im2sequence"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
real_size
=
np
.
array
([[
6
,
6
],
[
4
,
4
]]).
astype
(
"float32"
)
out
=
np
.
array
(
Im2Sequence
(
x
,
real_size
,
self
.
attrs
))
self
.
inputs
=
{
'X'
:
x
,
'Y'
:
real_size
}
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
#set shiftwidth=4 set expandtab set tabstop=4
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
9130a884
...
@@ -251,12 +251,16 @@ class TestBook(unittest.TestCase):
...
@@ -251,12 +251,16 @@ class TestBook(unittest.TestCase):
print
(
str
(
program
))
print
(
str
(
program
))
def
test_im2sequence
(
self
):
def
test_im2sequence
(
self
):
print
(
"test_im2sequence"
)
program
=
Program
()
program
=
Program
()
with
program_guard
(
program
):
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
3
,
128
,
128
],
dtype
=
'float32'
)
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
3
,
128
,
128
],
dtype
=
'float32'
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[],
dtype
=
'float32'
)
output
=
layers
.
im2sequence
(
output
=
layers
.
im2sequence
(
input
=
x
,
stride
=
[
1
,
1
],
filter_size
=
[
2
,
2
])
input
=
x
,
input_image_size
=
y
,
stride
=
[
1
,
1
],
filter_size
=
[
2
,
2
],
out_stride
=
[
1
,
1
])
self
.
assertIsNotNone
(
output
)
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
print
(
str
(
program
))
...
...
python/setup.py.in
浏览文件 @
9130a884
...
@@ -181,6 +181,14 @@ else:
...
@@ -181,6 +181,14 @@ else:
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
if os.system(command) != 0:
if os.system(command) != 0:
raise Exception("patch core.so failed, command: %s" % command)
raise Exception("patch core.so failed, command: %s" % command)
if '${WITH_FLUID_ONLY}'== 'OFF':
# change rpath of _swig_paddle.so.
if "@APPLE@" == "1":
command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
else:
command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
if os.system(command) != 0:
raise Exception("patch _swig_paddle.so failed, command: %s" % command)
setup(name='${PACKAGE_NAME}',
setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}',
version='${PADDLE_VERSION}',
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录