Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
dbaaca78
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
dbaaca78
编写于
7月 12, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into port_py3
上级
a1a1109c
3c4f04b7
变更
49
隐藏空白更改
内联
并排
Showing
49 changed file
with
1641 addition
and
321 deletion
+1641
-321
paddle/contrib/inference/CMakeLists.txt
paddle/contrib/inference/CMakeLists.txt
+16
-1
paddle/contrib/inference/check_symbol.sh
paddle/contrib/inference/check_symbol.sh
+12
-0
paddle/contrib/inference/demo/CMakeLists.txt
paddle/contrib/inference/demo/CMakeLists.txt
+0
-2
paddle/contrib/inference/demo_ci/CMakeLists.txt
paddle/contrib/inference/demo_ci/CMakeLists.txt
+77
-0
paddle/contrib/inference/demo_ci/run.sh
paddle/contrib/inference/demo_ci/run.sh
+34
-0
paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
paddle/contrib/inference/demo_ci/simple_on_word2vec.cc
+39
-23
paddle/contrib/inference/paddle_inference_api.map
paddle/contrib/inference/paddle_inference_api.map
+6
-0
paddle/contrib/inference/paddle_inference_api.sym
paddle/contrib/inference/paddle_inference_api.sym
+1
-0
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
...id/framework/details/scope_buffered_ssa_graph_executor.cc
+13
-2
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+11
-4
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+3
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+6
-0
paddle/fluid/inference/analysis/data_flow_graph.cc
paddle/fluid/inference/analysis/data_flow_graph.cc
+85
-1
paddle/fluid/inference/analysis/data_flow_graph.h
paddle/fluid/inference/analysis/data_flow_graph.h
+36
-0
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
+67
-2
paddle/fluid/inference/paddle_fluid.sym
paddle/fluid/inference/paddle_fluid.sym
+1
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+2
-0
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+120
-52
paddle/fluid/operators/detection/rpn_target_assign_op.cc
paddle/fluid/operators/detection/rpn_target_assign_op.cc
+3
-2
paddle/fluid/operators/im2sequence_op.cc
paddle/fluid/operators/im2sequence_op.cc
+18
-7
paddle/fluid/operators/im2sequence_op.h
paddle/fluid/operators/im2sequence_op.h
+91
-34
paddle/fluid/operators/math/im2col.cc
paddle/fluid/operators/math/im2col.cc
+0
-26
paddle/fluid/operators/math/im2col.cu
paddle/fluid/operators/math/im2col.cu
+0
-30
paddle/fluid/operators/reader/create_batch_reader_op.cc
paddle/fluid/operators/reader/create_batch_reader_op.cc
+5
-5
paddle/fluid/operators/squeeze_op.cc
paddle/fluid/operators/squeeze_op.cc
+202
-0
paddle/fluid/operators/unsqueeze_op.cc
paddle/fluid/operators/unsqueeze_op.cc
+191
-0
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+9
-8
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+9
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+14
-1
python/CMakeLists.txt
python/CMakeLists.txt
+12
-5
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+3
-0
python/paddle/fluid/annotations.py
python/paddle/fluid/annotations.py
+38
-0
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+40
-25
python/paddle/fluid/layers/device.py
python/paddle/fluid/layers/device.py
+3
-1
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+40
-12
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+12
-12
python/paddle/fluid/tests/book/notest_understand_sentiment.py
...on/paddle/fluid/tests/book/notest_understand_sentiment.py
+2
-2
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+10
-8
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+2
-1
python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
.../tests/book_memory_optimization/test_memopt_fit_a_line.py
+5
-4
python/paddle/fluid/tests/unittests/test_calc_gradient.py
python/paddle/fluid/tests/unittests/test_calc_gradient.py
+0
-2
python/paddle/fluid/tests/unittests/test_get_places_op.py
python/paddle/fluid/tests/unittests/test_get_places_op.py
+2
-1
python/paddle/fluid/tests/unittests/test_im2sequence_op.py
python/paddle/fluid/tests/unittests/test_im2sequence_op.py
+147
-33
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+8
-3
python/paddle/fluid/tests/unittests/test_optimizer.py
python/paddle/fluid/tests/unittests/test_optimizer.py
+11
-11
python/paddle/fluid/tests/unittests/test_parallel_op.py
python/paddle/fluid/tests/unittests/test_parallel_op.py
+2
-1
python/paddle/fluid/tests/unittests/test_squeeze_op.py
python/paddle/fluid/tests/unittests/test_squeeze_op.py
+114
-0
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
+111
-0
python/setup.py.in
python/setup.py.in
+8
-0
未找到文件。
paddle/contrib/inference/CMakeLists.txt
浏览文件 @
dbaaca78
...
...
@@ -45,6 +45,10 @@ endfunction(inference_api_test)
cc_library
(
paddle_inference_api
SRCS paddle_inference_api.cc paddle_inference_api_impl.cc
DEPS
${
FLUID_CORE_MODULES
}
${
GLOB_OP_LIB
}
)
if
(
NOT APPLE
)
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_inference_api.sym"
)
set_target_properties
(
paddle_inference_api PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
# Here the shared library doesn't depend on other fluid libraries, or double free will occur.
cc_library
(
paddle_inference_api_shared SHARED
...
...
@@ -53,8 +57,19 @@ add_dependencies(paddle_inference_api_shared ${FLUID_CORE_MODULES} ${GLOB_OP_LIB
set_target_properties
(
paddle_inference_api_shared PROPERTIES OUTPUT_NAME paddle_inference_api
)
if
(
NOT APPLE
)
set
(
LINK_FLAGS
"-
fPIC -fvisibility=hidden
"
)
set
(
LINK_FLAGS
"-
Wl,--version-script
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_inference_api.map
"
)
set_target_properties
(
paddle_inference_api_shared PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
FILE
(
WRITE
${
CMAKE_CURRENT_BINARY_DIR
}
/check_symbol.cmake
"execute_process(COMMAND bash -c
\"
${
CMAKE_CURRENT_SOURCE_DIR
}
/check_symbol.sh"
"
${
CMAKE_CURRENT_BINARY_DIR
}
/libpaddle_inference_api.so
\"
RESULT_VARIABLE symbol_res)
\n
"
"if(NOT
\"\$
{symbol_res}
\"
STREQUAL
\"
0
\"
)
\n
"
" message(FATAL_ERROR
\"
Check symbol failed.
\"
)
\n
"
"endif()
\n
"
)
add_custom_command
(
OUTPUT
"
${
CMAKE_CURRENT_BINARY_DIR
}
/.check_symbol"
COMMAND
${
CMAKE_COMMAND
}
-P
"
${
CMAKE_CURRENT_BINARY_DIR
}
/check_symbol.cmake"
DEPENDS paddle_inference_api_shared
)
add_custom_target
(
check_symbol ALL DEPENDS
"
${
CMAKE_CURRENT_BINARY_DIR
}
/.check_symbol"
)
endif
()
cc_test
(
test_paddle_inference_api
...
...
paddle/contrib/inference/check_symbol.sh
0 → 100755
浏览文件 @
dbaaca78
#!/bin/bash
lib
=
$1
if
[
$#
-ne
1
]
;
then
echo
"No input library"
;
exit
-1
;
fi
num_paddle_syms
=
$(
nm
-D
--defined-only
${
lib
}
|
grep
paddle |
wc
-l
)
num_google_syms
=
$(
nm
-D
--defined-only
${
lib
}
|
grep
google |
wc
-l
)
if
[
$num_paddle_syms
-le
0
]
;
then
echo
"Have no paddle symbols"
;
exit
-1
;
fi
if
[
$num_google_syms
-ge
1
]
;
then
echo
"Have some google symbols"
;
exit
-1
;
fi
exit
0
paddle/contrib/inference/demo/CMakeLists.txt
浏览文件 @
dbaaca78
...
...
@@ -13,8 +13,6 @@
# limitations under the License.
#
inference_api_test
(
simple_on_word2vec ARGS test_word2vec
)
option
(
WITH_INFERENCE_DEMO
"Compile with Inference demo"
OFF
)
if
(
NOT WITH_INFERENCE_DEMO
)
return
()
...
...
paddle/contrib/inference/demo_ci/CMakeLists.txt
0 → 100644
浏览文件 @
dbaaca78
cmake_minimum_required
(
VERSION 3.0
)
project
(
cpp_inference_demo CXX C
)
set
(
CMAKE_CXX_FLAGS
"
${
CMAKE_CXX_FLAGS
}
-std=c++11"
)
if
(
NOT DEFINED PADDLE_LIB
)
message
(
FATAL_ERROR
"please set PADDLE_LIB with -DPADDLE_LIB=/path/paddle/lib"
)
endif
()
if
(
NOT DEFINED DEMO_NAME
)
message
(
FATAL_ERROR
"please set DEMO_NAME with -DDEMO_NAME=demo_name"
)
endif
()
option
(
WITH_MKL
"Compile demo with MKL/OpenBlas support, default use MKL."
ON
)
option
(
WITH_GPU
"Compile demo with GPU/CPU, default use CPU."
OFF
)
option
(
WITH_STATIC_LIB
"Compile demo with static/shared library, default use static."
ON
)
if
(
WITH_GPU
)
set
(
CUDA_LIB
"/usr/local/cuda/lib64/"
CACHE STRING
"CUDA Library"
)
endif
()
include_directories
(
"
${
PADDLE_LIB
}
"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/eigen3"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
add_executable
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel.so
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5.so
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
include_directories
(
"
${
MKLDNN_PATH
}
/include"
)
set
(
MKLDNN_LIB
${
MKLDNN_PATH
}
/lib/libmkldnn.so.0
)
endif
()
else
()
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/openblas/lib/libopenblas.a
)
endif
()
if
(
WITH_STATIC_LIB
)
set
(
DEPS
"-Wl,--whole-archive"
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid.a
"-Wl,--no-whole-archive"
${
PADDLE_LIB
}
/contrib/inference/libpaddle_inference_api.a
)
else
()
# Note: libpaddle_inference_api.so must put before libpaddle_fluid.so
set
(
DEPS
${
PADDLE_LIB
}
/contrib/inference/libpaddle_inference_api.so
${
PADDLE_LIB
}
/paddle/fluid/inference/libpaddle_fluid.so
)
endif
()
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
DEPS
${
DEPS
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
glog gflags protobuf snappystream snappy z
${
EXTERNAL_LIB
}
)
if
(
WITH_GPU
)
set
(
DEPS
${
DEPS
}
${
CUDA_LIB
}
/libcudart.so
)
endif
()
target_link_libraries
(
${
DEMO_NAME
}
${
DEPS
}
)
paddle/contrib/inference/demo_ci/run.sh
0 → 100755
浏览文件 @
dbaaca78
set
-x
PADDLE_ROOT
=
$1
WITH_MKL
=
$2
WITH_GPU
=
$3
if
[
$3
==
"ON"
]
;
then
use_gpu_list
=
'true false'
else
use_gpu_list
=
'false'
fi
mkdir
-p
build
cd
build
for
WITH_STATIC_LIB
in
false
;
do
rm
-rf
*
cmake ..
-DPADDLE_LIB
=
${
PADDLE_ROOT
}
/build/fluid_install_dir/
\
-DWITH_MKL
=
$WITH_MKL
\
-DDEMO_NAME
=
simple_on_word2vec
\
-DWITH_GPU
=
$WITH_GPU
\
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
make
for
use_gpu
in
$use_gpu_list
;
do
./simple_on_word2vec
\
--dirname
=
${
PADDLE_ROOT
}
/build/python/paddle/fluid/tests/book/word2vec.inference.model
\
--use_gpu
=
$use_gpu
done
done
if
[
$?
-eq
0
]
;
then
exit
0
else
echo
"inference demo runs fail."
exit
1
fi
set
+x
paddle/contrib/inference/demo/simple_on_word2vec.cc
→
paddle/contrib/inference/demo
_ci
/simple_on_word2vec.cc
浏览文件 @
dbaaca78
...
...
@@ -16,21 +16,27 @@ limitations under the License. */
* This file contains a simple demo for how to take a model for inference.
*/
#include <gflags/gflags.h>
#include <glog/logging.h>
#include <gtest/gtest.h>
#include <memory>
#include <thread>
#include "paddle/contrib/inference/paddle_inference_api.h"
#include "contrib/inference/paddle_inference_api.h"
#include "paddle/fluid/platform/enforce.h"
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
DEFINE_bool
(
use_gpu
,
false
,
"Whether use gpu."
);
namespace
paddle
{
namespace
demo
{
DEFINE_string
(
dirname
,
""
,
"Directory of the inference model."
);
void
Main
(
bool
use_gpu
)
{
//# 1. Create PaddlePredictor with a config.
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
if
(
FLAGS_dirname
.
empty
())
{
LOG
(
INFO
)
<<
"Usage: ./simple_on_word2vec --dirname=path/to/your/model"
;
exit
(
1
);
}
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
...
...
@@ -54,12 +60,16 @@ void Main(bool use_gpu) {
CHECK
(
predictor
->
Run
(
slots
,
&
outputs
));
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
}
}
...
...
@@ -68,7 +78,7 @@ void MainThreads(int num_threads, bool use_gpu) {
// Multi-threads only support on CPU
// 0. Create PaddlePredictor with a config.
NativeConfig
config
;
config
.
model_dir
=
FLAGS_dirname
+
"word2vec.inference.model"
;
config
.
model_dir
=
FLAGS_dirname
;
config
.
use_gpu
=
use_gpu
;
config
.
fraction_of_gpu_memory
=
0.15
;
config
.
device
=
0
;
...
...
@@ -94,14 +104,17 @@ void MainThreads(int num_threads, bool use_gpu) {
CHECK
(
predictor
->
Run
(
inputs
,
&
outputs
));
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
PADDLE_ENFORCE
(
outputs
.
size
(),
1UL
);
// Check the output buffer size and result of each tid.
PADDLE_ENFORCE
(
outputs
.
front
().
data
.
length
(),
33168UL
);
float
result
[
5
]
=
{
0.00129761
,
0.00151112
,
0.000423564
,
0.00108815
,
0.000932706
};
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
];
PADDLE_ENFORCE
(
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
())[
i
],
result
[
i
]);
}
}
});
...
...
@@ -111,15 +124,18 @@ void MainThreads(int num_threads, bool use_gpu) {
}
}
TEST
(
demo
,
word2vec_cpu
)
{
Main
(
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_1
)
{
MainThreads
(
1
,
false
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_cpu_4
)
{
MainThreads
(
4
,
false
/*use_gpu*/
);
}
#ifdef PADDLE_WITH_CUDA
TEST
(
demo
,
word2vec_gpu
)
{
Main
(
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_1
)
{
MainThreads
(
1
,
true
/*use_gpu*/
);
}
TEST
(
demo_multi_threads
,
word2vec_gpu_4
)
{
MainThreads
(
4
,
true
/*use_gpu*/
);
}
#endif
}
// namespace demo
}
// namespace paddle
int
main
(
int
argc
,
char
**
argv
)
{
google
::
ParseCommandLineFlags
(
&
argc
,
&
argv
,
true
);
paddle
::
demo
::
Main
(
false
/* use_gpu*/
);
paddle
::
demo
::
MainThreads
(
1
,
false
/* use_gpu*/
);
paddle
::
demo
::
MainThreads
(
4
,
false
/* use_gpu*/
);
if
(
FLAGS_use_gpu
)
{
paddle
::
demo
::
Main
(
true
/*use_gpu*/
);
paddle
::
demo
::
MainThreads
(
1
,
true
/*use_gpu*/
);
paddle
::
demo
::
MainThreads
(
4
,
true
/*use_gpu*/
);
}
return
0
;
}
paddle/contrib/inference/paddle_inference_api.map
0 → 100644
浏览文件 @
dbaaca78
{
global:
*paddle*;
local:
*;
};
paddle/contrib/inference/paddle_inference_api.sym
0 → 100644
浏览文件 @
dbaaca78
*paddle*
paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.cc
浏览文件 @
dbaaca78
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include "paddle/fluid/framework/details/scope_buffered_ssa_graph_executor.h"
#include <stdexcept>
#include <string>
#include <vector>
#include "paddle/fluid/framework/executor.h"
...
...
@@ -53,8 +54,14 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
}
}
}
std
::
vector
<
framework
::
LoDTensor
>
fetch_data
;
std
::
exception_ptr
eptr
;
try
{
fetch_data
=
underlying_executor_
->
Run
(
fetch_tensors
);
}
catch
(...)
{
eptr
=
std
::
current_exception
();
}
auto
fetch_data
=
underlying_executor_
->
Run
(
fetch_tensors
);
drop_scope_counter_
+=
1
;
if
(
!
fetch_tensors
.
empty
()
||
drop_scope_counter_
==
strategy_
.
num_iteration_per_drop_scope_
)
{
...
...
@@ -69,7 +76,11 @@ FeedFetchList ScopeBufferedSSAGraphExecutor::Run(
scope
->
DeleteScope
(
local_scope
);
}
}
return
fetch_data
;
if
(
eptr
)
{
std
::
rethrow_exception
(
eptr
);
}
else
{
return
fetch_data
;
}
}
}
// namespace details
}
// namespace framework
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
dbaaca78
...
...
@@ -78,6 +78,10 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
set
.
clear
();
};
// Clean run context
run_op_futures_
.
clear
();
exception_
.
reset
();
// Step 3. Execution
while
(
!
pending_vars
.
empty
())
{
// 1. Run All Ready ops
...
...
@@ -96,16 +100,19 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
auto
cur_ready_vars
=
ready_vars
.
PopAll
(
1
,
&
timeout
);
if
(
timeout
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
exception_mu_
);
std
::
unique_lock
<
std
::
mutex
>
l
(
exception_mu_
);
if
(
exception_
)
{
l
.
unlock
();
for
(
auto
&
run_op_future
:
run_op_futures_
)
{
run_op_future
.
wait
();
}
l
.
lock
();
std
::
exception
*
exp
=
exception_
.
get
();
if
(
dynamic_cast
<
platform
::
EOFException
*>
(
exp
))
{
auto
e
=
*
static_cast
<
platform
::
EOFException
*>
(
exp
);
exception_
.
reset
();
throw
e
;
}
else
if
(
dynamic_cast
<
platform
::
EnforceNotMet
*>
(
exp
))
{
auto
e
=
*
static_cast
<
platform
::
EnforceNotMet
*>
(
exp
);
exception_
.
reset
();
throw
e
;
}
else
{
LOG
(
FATAL
)
<<
"Unknown exception."
;
...
...
@@ -222,7 +229,7 @@ void ThreadedSSAGraphExecutor::RunOp(
}
};
if
(
pool_
)
{
pool_
->
enqueue
(
op_run
);
run_op_futures_
.
emplace_back
(
pool_
->
enqueue
(
op_run
)
);
}
else
{
op_run
();
}
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
浏览文件 @
dbaaca78
...
...
@@ -15,6 +15,7 @@
#pragma once
#include <deque>
#include <list>
#include <string>
#include <unordered_set>
#include <utility>
...
...
@@ -77,6 +78,8 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
private:
ExecutionStrategy
strategy_
;
// use std::list because clear(), push_back, and for_each are O(1)
std
::
list
<
std
::
future
<
void
>>
run_op_futures_
;
};
}
// namespace details
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
dbaaca78
...
...
@@ -13,6 +13,12 @@ endif()
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
paddle_fluid_api
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
set
(
LINK_FLAGS
"-Wl,--retain-symbols-file
${
CMAKE_CURRENT_SOURCE_DIR
}
/paddle_fluid.sym"
)
set_target_properties
(
paddle_fluid PROPERTIES LINK_FLAGS
"
${
LINK_FLAGS
}
"
)
endif
()
# Create shared library
cc_library
(
paddle_fluid_shared SHARED
SRCS io.cc
...
...
paddle/fluid/inference/analysis/data_flow_graph.cc
浏览文件 @
dbaaca78
...
...
@@ -90,6 +90,20 @@ std::string DataFlowGraph::DotString() const {
return
dot
.
Build
();
}
std
::
string
DataFlowGraph
::
HumanReadableInfo
(
bool
show_values
,
bool
show_functions
)
const
{
std
::
stringstream
values
,
functions
;
for
(
auto
&
n
:
nodes
.
nodes
())
{
if
(
show_values
&&
n
->
IsValue
())
{
values
<<
n
->
repr
()
<<
"
\n
"
;
}
if
(
show_functions
&&
n
->
IsFunction
())
{
functions
<<
n
->
repr
()
<<
"
\n
"
;
}
}
return
"Values:
\n
"
+
values
.
str
()
+
"
\n\n
"
+
"Functions:
\n
"
+
functions
.
str
();
}
//
// NodesBFSIterator
//
...
...
@@ -146,7 +160,7 @@ bool GraphTraits<DataFlowGraph>::NodesBFSIterator::operator==(
if
((
!
queue_
.
empty
())
&&
(
!
other
.
queue_
.
empty
()))
{
return
queue_
.
front
()
==
other
.
queue_
.
front
()
&&
visited_
.
size
()
==
other
.
visited_
.
size
();
// here need to check the
// equality of queue and
// equality of queue and
// visited. Just a light but week implementation.
}
return
false
;
...
...
@@ -208,6 +222,76 @@ Node *GraphTraits<DataFlowGraph>::NodesDFSIterator::operator->() {
return
stack_
.
top
();
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
)
{
PADDLE_ENFORCE
(
!
source
.
empty
(),
"Start points of topological sorting should not be empty!"
);
std
::
unordered_set
<
Node
*>
visited
;
std
::
unordered_set
<
Node
*>
to_visit
{
source
.
begin
(),
source
.
end
()};
std
::
vector
<
Node
*>
inlink_visited
;
while
(
!
to_visit
.
empty
())
{
std
::
vector
<
Node
*>
queue
(
to_visit
.
begin
(),
to_visit
.
end
());
for
(
auto
*
p
:
queue
)
{
inlink_visited
.
clear
();
std
::
copy_if
(
p
->
inlinks
.
begin
(),
p
->
inlinks
.
end
(),
std
::
back_inserter
(
inlink_visited
),
[
&
](
Node
*
x
)
{
return
visited
.
count
(
x
);
});
if
(
inlink_visited
.
size
()
==
p
->
inlinks
.
size
())
{
sorted_
.
push_back
(
p
);
for
(
auto
*
_
:
p
->
outlinks
)
{
if
(
!
visited
.
count
(
_
))
{
to_visit
.
insert
(
_
);
}
}
to_visit
.
erase
(
p
);
visited
.
insert
(
p
);
}
}
}
}
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
NodesTSIterator
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
:
sorted_
(
other
.
sorted_
),
cursor_
(
other
.
cursor_
)
{}
Node
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
*
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
*
sorted_
[
cursor_
];
}
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
++
()
{
if
(
++
cursor_
>=
sorted_
.
size
())
{
sorted_
.
clear
();
cursor_
=
0
;
}
return
*
this
;
}
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
=
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
{
cursor_
=
other
.
cursor_
;
sorted_
=
other
.
sorted_
;
return
*
this
;
}
bool
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
==
(
const
paddle
::
inference
::
analysis
::
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
&
other
)
{
return
sorted_
==
other
.
sorted_
&&
cursor_
==
other
.
cursor_
;
}
Node
*
GraphTraits
<
DataFlowGraph
>::
NodesTSIterator
::
operator
->
()
{
PADDLE_ENFORCE_LT
(
cursor_
,
sorted_
.
size
());
return
sorted_
[
cursor_
];
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/data_flow_graph.h
浏览文件 @
dbaaca78
...
...
@@ -48,6 +48,9 @@ struct DataFlowGraph {
// Output a DOT graph file for debug.
std
::
string
DotString
()
const
;
std
::
string
HumanReadableInfo
(
bool
show_values
=
true
,
bool
show_functions
=
true
)
const
;
private:
// Remove duplicate edges and so on.
void
Clean
();
...
...
@@ -107,6 +110,32 @@ struct GraphTraits<DataFlowGraph> {
std
::
unordered_set
<
Node
*>
visited_
;
};
// Topological sorting iterator on nodes.
struct
NodesTSIterator
:
public
std
::
iterator
<
std
::
forward_iterator_tag
,
Node
*>
{
NodesTSIterator
()
=
default
;
explicit
NodesTSIterator
(
const
std
::
vector
<
Node
*>
&
source
);
NodesTSIterator
(
NodesTSIterator
&&
other
)
:
sorted_
(
std
::
move
(
other
.
sorted_
)),
cursor_
(
other
.
cursor_
)
{
other
.
cursor_
=
0
;
}
NodesTSIterator
(
const
NodesTSIterator
&
other
);
Node
&
operator
*
();
NodesTSIterator
&
operator
++
();
// TODO(Superjomn) current implementation just compare the first
// element, need to compare the graph and all the elements in the queue and
// set.
NodesTSIterator
&
operator
=
(
const
NodesTSIterator
&
other
);
bool
operator
==
(
const
NodesTSIterator
&
other
);
bool
operator
!=
(
const
NodesTSIterator
&
other
)
{
return
!
(
*
this
==
other
);
}
Node
*
operator
->
();
private:
std
::
vector
<
Node
*>
sorted_
;
int
cursor_
{
0
};
};
explicit
GraphTraits
(
DataFlowGraph
*
graph
)
:
graph_
(
graph
)
{}
// default use BFS to visit the nodes.
...
...
@@ -119,17 +148,24 @@ struct GraphTraits<DataFlowGraph> {
iterator_range
<
NodesDFSIterator
>
nodes_in_DFS
()
{
return
iterator_range
<
NodesDFSIterator
>
(
nodes_dfs_begin
(),
nodes_dfs_end
());
}
iterator_range
<
NodesTSIterator
>
nodes_in_TS
()
{
return
iterator_range
<
NodesTSIterator
>
(
nodes_ts_begin
(),
nodes_ts_end
());
}
private:
NodesBFSIterator
nodes_bfs_begin
()
{
return
NodesBFSIterator
(
graph_
->
inputs
);
}
NodesBFSIterator
nodes_bfs_end
()
{
return
NodesBFSIterator
();
}
NodesDFSIterator
nodes_dfs_begin
()
{
return
NodesDFSIterator
(
graph_
->
inputs
);
}
NodesDFSIterator
nodes_dfs_end
()
{
return
NodesDFSIterator
();
}
NodesTSIterator
nodes_ts_begin
()
{
return
NodesTSIterator
(
graph_
->
inputs
);
}
NodesTSIterator
nodes_ts_end
()
{
return
NodesTSIterator
();
}
private:
DataFlowGraph
*
graph_
;
};
...
...
paddle/fluid/inference/analysis/data_flow_graph_tester.cc
浏览文件 @
dbaaca78
...
...
@@ -24,11 +24,11 @@ TEST(DataFlowGraph, BFS) {
auto
dfg
=
ProgramDescToDFG
(
desc
);
dfg
.
Build
();
for
(
auto
*
in
:
dfg
.
inputs
)
{
for
(
auto
*
in
:
dfg
.
inputs
)
{
LOG
(
INFO
)
<<
"inputs: "
<<
in
->
name
()
<<
" "
<<
static_cast
<
int
>
(
in
->
type
());
}
for
(
auto
*
out
:
dfg
.
outputs
)
{
for
(
auto
*
out
:
dfg
.
outputs
)
{
LOG
(
INFO
)
<<
"outputs: "
<<
out
->
name
()
<<
" "
<<
static_cast
<
int
>
(
out
->
type
());
}
...
...
@@ -57,6 +57,71 @@ TEST(DataFlowGraph, DFS) {
ASSERT_EQ
(
count
,
dfg
.
nodes
.
size
());
}
// Topological sorting.
/*
* Graph topology
* inputs: 0, 1, 2
* 0 -> 4
* 0 -> 5
* 1 -> 6
* 2 -> 7
* 4 -> 5
* 4 -> 7
* 4 -> 3
* 7 -> 3
*/
TEST
(
DataFlowGraph
,
TS
)
{
DataFlowGraph
graph
;
for
(
int
i
=
0
;
i
<
8
;
i
++
)
{
auto
*
node
=
graph
.
nodes
.
Create
(
Node
::
Type
::
kValue
);
node
->
SetName
(
"node-"
+
std
::
to_string
(
i
));
}
auto
add_link
=
[
&
](
int
i
,
int
j
)
{
Node
*
source
=
graph
.
nodes
.
GetMutable
(
i
);
Node
*
target
=
graph
.
nodes
.
GetMutable
(
j
);
target
->
inlinks
.
push_back
(
source
);
source
->
outlinks
.
push_back
(
target
);
};
graph
.
inputs
.
push_back
(
graph
.
nodes
.
GetMutable
(
0
));
graph
.
inputs
.
push_back
(
graph
.
nodes
.
GetMutable
(
1
));
graph
.
inputs
.
push_back
(
graph
.
nodes
.
GetMutable
(
2
));
add_link
(
0
,
4
);
add_link
(
0
,
5
);
add_link
(
1
,
6
);
add_link
(
2
,
7
);
add_link
(
4
,
5
);
add_link
(
4
,
7
);
add_link
(
4
,
3
);
add_link
(
7
,
3
);
auto
its
=
GraphTraits
<
DataFlowGraph
>
(
&
graph
).
nodes_in_TS
();
std
::
vector
<
int
>
sorted_ids
;
for
(
auto
it
=
its
.
begin
();
it
!=
its
.
end
();
++
it
)
{
LOG
(
INFO
)
<<
it
->
name
();
sorted_ids
.
push_back
(
it
->
id
());
}
// Assert a occurs prior to b in the sorted_ids.
auto
assert_positive_sequence_pair
=
[
&
](
int
a
,
int
b
)
{
auto
a_offset
=
std
::
find
(
sorted_ids
.
begin
(),
sorted_ids
.
end
(),
a
);
auto
b_offset
=
std
::
find
(
sorted_ids
.
begin
(),
sorted_ids
.
end
(),
b
);
ASSERT_LT
(
a_offset
,
b_offset
);
};
assert_positive_sequence_pair
(
2
,
7
);
assert_positive_sequence_pair
(
7
,
3
);
assert_positive_sequence_pair
(
4
,
3
);
assert_positive_sequence_pair
(
0
,
4
);
assert_positive_sequence_pair
(
0
,
5
);
assert_positive_sequence_pair
(
1
,
6
);
assert_positive_sequence_pair
(
4
,
5
);
assert_positive_sequence_pair
(
4
,
7
);
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/paddle_fluid.sym
0 → 100644
浏览文件 @
dbaaca78
*paddle*
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
dbaaca78
...
...
@@ -265,6 +265,8 @@ op_library(recurrent_op DEPS executor)
op_library
(
warpctc_op DEPS dynload_warpctc sequence_padding sequence_scale
)
op_library
(
cos_sim_op DEPS cos_sim_functor
)
op_library
(
parallel_do_op DEPS executor
)
op_library
(
unsqueeze_op DEPS reshape_op
)
op_library
(
squeeze_op DEPS reshape_op
)
if
(
WITH_GPU
)
op_library
(
conv_op DEPS vol2col depthwise_conv im2col
)
...
...
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
dbaaca78
...
...
@@ -29,6 +29,79 @@ using mkldnn::stream;
using
platform
::
to_void_cast
;
using
platform
::
GetMKLDNNFormat
;
class
ConvMKLDNNHandler
:
public
platform
::
MKLDNNHandler
{
public:
ConvMKLDNNHandler
(
std
::
shared_ptr
<
mkldnn
::
convolution_forward
::
primitive_desc
>
conv_pd
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
)
{
conv_pd_
=
conv_pd
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemoryFromPrimitive
(
void
*
ptr
)
{
return
this
->
AcquireMemoryFromPrimitive
(
conv_pd_
->
dst_primitive_desc
(),
ptr
,
"@dst_mem_p"
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemoryFromPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
auto
src_pd
=
conv_pd_
->
src_primitive_desc
();
auto
user_pd
=
user_memory_p
->
get_primitive_desc
();
return
this
->
AcquireMemory
(
src_pd
,
user_pd
,
user_memory_p
,
"@src_mem_p"
,
pipeline
);
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireWeightsMemoryFromPrimitive
(
const
std
::
shared_ptr
<
mkldnn
::
memory
>
user_weights_memory_p
,
std
::
vector
<
mkldnn
::
primitive
>&
pipeline
)
{
auto
user_weights_pd
=
user_weights_memory_p
->
get_primitive_desc
();
auto
weights_pd
=
conv_pd_
->
weights_primitive_desc
();
return
this
->
AcquireMemory
(
weights_pd
,
user_weights_pd
,
user_weights_memory_p
,
"@weights_mem_p"
,
pipeline
);
}
std
::
shared_ptr
<
mkldnn
::
convolution_forward
>
AcquireConvolution
(
std
::
shared_ptr
<
mkldnn
::
memory
>
src_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
weights_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
)
{
auto
prim_key
=
key_
+
"@conv_p"
;
auto
prim_desc_key
=
key_
+
"@conv_pd"
;
auto
conv_p
=
std
::
static_pointer_cast
<
mkldnn
::
convolution_forward
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
conv_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find convolution primitive in device context"
);
if
(
conv_p
==
nullptr
)
{
conv_p
=
std
::
make_shared
<
mkldnn
::
convolution_forward
>
(
*
conv_pd_
,
*
(
src_memory_p
),
*
(
weights_memory_p
.
get
()),
*
(
dst_memory_p
.
get
()));
dev_ctx_
.
SetBlob
(
prim_key
,
conv_p
);
}
else
{
is_reusing_
=
true
;
}
return
conv_p
;
}
// Generate keys for storing/retriving primitives for this operator
// TODO(jczaja): Make hashing function more optimial
static
std
::
string
GetHash
(
memory
::
dims
&
input_dims
,
memory
::
dims
&
weights_dims
,
std
::
vector
<
int
>&
strides
,
std
::
vector
<
int
>&
paddings
,
std
::
vector
<
int
>&
dilations
,
int
groups
,
const
std
::
string
&
suffix
)
{
return
dims2str
(
input_dims
)
+
dims2str
(
weights_dims
)
+
dims2str
(
strides
)
+
dims2str
(
paddings
)
+
dims2str
(
dilations
)
+
std
::
to_string
(
groups
)
+
suffix
;
}
private:
std
::
shared_ptr
<
mkldnn
::
convolution_forward
::
primitive_desc
>
conv_pd_
;
};
template
<
typename
T
>
class
ConvMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -36,10 +109,6 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
// Get unique name for index
const
std
::
string
key
=
ctx
.
op
().
Output
(
"Output"
);
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
...
...
@@ -80,68 +149,62 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// create mkldnn memory from input tensors (data/weights)
auto
user_src_memory
=
memory
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
to_void_cast
(
input_data
));
auto
user_weights_memory
=
memory
({{{
weights_tz
},
memory
::
data_type
::
f32
,
filter
->
format
()},
mkldnn_engine
},
to_void_cast
(
filter_data
));
// Get unique name for storing MKLDNN primitives
const
std
::
string
key
=
ConvMKLDNNHandler
::
GetHash
(
src_tz
,
weights_tz
,
strides
,
paddings
,
dilations
,
groups
,
ctx
.
op
().
Output
(
"Output"
));
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
std
::
vector
<
primitive
>
pipeline
;
auto
user_src_md
=
platform
::
MKLDNNMemDesc
(
{
src_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
input
->
format
());
auto
user_weights_md
=
platform
::
MKLDNNMemDesc
(
{
weights_tz
},
platform
::
MKLDNNGetDataType
<
T
>
(),
filter
->
format
());
/* create memory descriptor for convolution without specified format
* ('any') which lets a primitive (convolution in this case) choose
* the memory format preferred for best performance
*/
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
auto
weights_md
=
platform
::
MKLDNNMemDesc
(
weights_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
dst_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
weights_tz
,
platform
::
MKLDNNGetDataType
<
T
>
()
,
memory
::
format
::
any
);
auto
dst_md
=
platform
::
MKLDNNMemDesc
(
dst_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
memory
::
format
::
any
);
// create a conv primitive descriptor and save it for usage in backward
std
::
shared_ptr
<
conv_fwd
::
primitive_desc
>
conv_pd
=
ConvFwdPrimitiveDesc
(
src_md
,
weights_md
,
dst_md
,
strides
,
paddings
,
mkldnn_engine
);
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
// create reorder primitive if the input format is not the preferred one
auto
src_memory
=
user_src_memory
;
primitive
reorder_src
;
bool
is_src_reordered
=
false
;
if
(
memory
::
primitive_desc
(
conv_pd
->
src_primitive_desc
())
!=
user_src_memory
.
get_primitive_desc
())
{
src_memory
=
memory
(
conv_pd
->
src_primitive_desc
());
reorder_src
=
reorder
(
user_src_memory
,
src_memory
);
is_src_reordered
=
true
;
}
auto
weights_memory
=
user_weights_memory
;
primitive
reorder_weights
;
bool
is_weights_reordered
=
false
;
if
(
memory
::
primitive_desc
(
conv_pd
->
weights_primitive_desc
())
!=
user_weights_memory
.
get_primitive_desc
())
{
weights_memory
=
memory
(
conv_pd
->
weights_primitive_desc
());
reorder_weights
=
reorder
(
user_weights_memory
,
weights_memory
);
is_weights_reordered
=
true
;
}
ConvMKLDNNHandler
handler
(
conv_pd
,
dev_ctx
,
mkldnn_engine
,
key
);
// create memory primitive for conv dst
auto
dst_memory
=
memory
(
conv_pd
->
dst_primitive_desc
(),
output_data
);
// create mkldnn memory from input tensors (data/weights)
auto
user_src_memory_p
=
handler
.
AcquireSrcMemory
(
user_src_md
,
to_void_cast
<
T
>
(
input_data
));
auto
user_weights_memory_p
=
handler
.
AcquireWeightsMemory
(
user_weights_md
,
to_void_cast
<
T
>
(
filter_data
));
// create reorder primitive if the input format is not the preferred one
auto
src_memory_p
=
handler
.
AcquireSrcMemoryFromPrimitive
(
user_src_memory_p
,
pipeline
);
auto
weights_memory_p
=
handler
.
AcquireWeightsMemoryFromPrimitive
(
user_weights_memory_p
,
pipeline
);
auto
dst_memory_p
=
handler
.
AcquireDstMemoryFromPrimitive
(
to_void_cast
<
T
>
(
output_data
));
// create convolution op primitive
auto
conv_prim
=
conv_fwd
(
*
conv_pd
,
src_memory
,
weights_memory
,
dst_memory
);
auto
conv_p
=
handler
.
AcquireConvolution
(
src_memory_p
,
weights_memory_p
,
dst_memory_p
);
// push primitive to stream and wait until it's executed
std
::
vector
<
primitive
>
pipeline
;
if
(
is_src_reordered
)
pipeline
.
push_back
(
reorder_src
);
if
(
is_weights_reordered
)
pipeline
.
push_back
(
reorder_weights
);
pipeline
.
push_back
(
conv_prim
);
pipeline
.
push_back
(
*
conv_p
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
// Save conv_pd/src_memory/weights_memory for backward pass
dev_ctx
.
SetBlob
(
key_conv_pd
,
conv_pd
);
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
GetMKLDNNFormat
(
dst_memory
));
output
->
set_format
(
GetMKLDNNFormat
(
*
dst_memory_p
));
}
private:
...
...
@@ -197,13 +260,10 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
if
(
!
input_grad
&&
!
filter_grad
)
return
;
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const
std
::
string
key
=
ctx
.
op
().
Input
(
"Output"
);
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
std
::
vector
<
int
>
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
std
::
vector
<
int
>
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
std
::
vector
<
int
>
dilations
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"dilations"
);
int
groups
=
ctx
.
Attr
<
int
>
(
"groups"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
const
T
*
filter_data
=
filter
->
data
<
T
>
();
...
...
@@ -223,6 +283,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
paddle
::
framework
::
vectorize2int
(
filter
->
dims
());
std
::
vector
<
int
>
dst_tz
=
paddle
::
framework
::
vectorize2int
(
output
->
dims
());
// Get an unique name from "argument" name of "Output" variable
// This name will be used as key when saving info into device context
const
std
::
string
key
=
ConvMKLDNNHandler
::
GetHash
(
src_tz
,
weights_tz
,
strides
,
paddings
,
dilations
,
groups
,
ctx
.
op
().
Input
(
"Output"
));
const
std
::
string
key_conv_pd
=
key
+
"@conv_pd"
;
// create mkldnn memory from input tensors (input/weights/output_grad)
auto
user_src_memory
=
memory
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
input
->
format
()},
mkldnn_engine
},
...
...
paddle/fluid/operators/detection/rpn_target_assign_op.cc
浏览文件 @
dbaaca78
...
...
@@ -86,8 +86,9 @@ class RpnTargetAssignKernel : public framework::OpKernel<T> {
std
::
minstd_rand
engine
,
std
::
vector
<
int
>*
inds
)
const
{
std
::
uniform_real_distribution
<
float
>
uniform
(
0
,
1
);
if
(
inds
->
size
()
>
num
)
{
for
(
int
i
=
num
;
i
<
inds
->
size
();
++
i
)
{
const
int64_t
size
=
static_cast
<
int64_t
>
(
inds
->
size
());
if
(
size
>
num
)
{
for
(
int64_t
i
=
num
;
i
<
size
;
++
i
)
{
int
rng_ind
=
std
::
floor
(
uniform
(
engine
)
*
i
);
if
(
rng_ind
<
num
)
std
::
iter_swap
(
inds
->
begin
()
+
rng_ind
+
offset
,
...
...
paddle/fluid/operators/im2sequence_op.cc
浏览文件 @
dbaaca78
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/im2sequence_op.h"
#include <string>
#include <vector>
namespace
paddle
{
...
...
@@ -28,20 +29,19 @@ class Im2SequenceOp : public framework::OperatorWithKernel {
"Input(X) of Im2SequenceOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of Im2SequenceOp op should not be null."
);
auto
in_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
in_dim
.
size
(),
4
,
"Input(X) format must be 4D tensor, eg., NCHW."
);
auto
kernels
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
batch_size
=
in_dim
[
0
];
int
img_channels
=
in_dim
[
1
];
int
img_height
=
in_dim
[
2
];
int
img_width
=
in_dim
[
3
];
auto
kernels
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
strides
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"paddings"
);
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]);
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
...
...
@@ -61,6 +61,10 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"C: channels"
"H: height"
"W: width"
);
AddInput
(
"Y"
,
"(Tensor) The input tensor of image real size(H, W)."
"2-D with shape [batchsize, 2]"
)
.
AsDispensable
();
AddOutput
(
"Out"
,
"(LodTensor) The output data of im2sequence op,"
);
AddAttr
<
std
::
vector
<
int
>>
(
"kernels"
,
"(vector<int>), the "
...
...
@@ -73,6 +77,13 @@ class Im2SequenceOpMaker : public framework::OpProtoAndCheckerMaker {
"(vector<int> default:{0, 0, 0, 0}), the "
"paddings(up_pad, left_pad, down_pad, right_pad)"
)
.
SetDefault
({
0
,
0
,
0
,
0
});
AddAttr
<
std
::
vector
<
int
>>
(
"out_stride"
,
"the attribute is valid only when input(Y)"
"is not NULL.this attribute represents the"
"scaling of the pic through the CNN"
"(vector<int> dedault:{1,1}),the out_stride"
" (out_stride_height, out_stride_width)"
)
.
SetDefault
({
1
,
1
});
AddComment
(
R"DOC(
This op uses kernels to scan images and converts these images to sequences.
After expanding, The number of time steps are output_height * output_width
...
...
@@ -123,7 +134,7 @@ output.data = [[ 6. 2. 8. 3. 2. 4. 6. 3.]
[ 7. 1. 7. 9. 2. 1. 3. 5.]
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8,
9
}
output.dims = {8,
8
}
output.lod = [[0, 4, 8]]
)DOC"
);
...
...
paddle/fluid/operators/im2sequence_op.h
浏览文件 @
dbaaca78
...
...
@@ -13,6 +13,7 @@
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/data_layout.h"
#include "paddle/fluid/framework/eigen.h"
...
...
@@ -39,50 +40,106 @@ class Im2SequenceKernel : public framework::OpKernel<T> {
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
Tensor
*
in
=
ctx
.
Input
<
Tensor
>
(
"X"
);
LoDTensor
*
out
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// TODO(wanghaoshuang): Add layout checker after 'set_layout'
// being available for python API
// PADDLE_ENFORCE_EQ(in->layout(), framework::DataLayout::kNCHW,
// "Input(X) layout must be NCHW");
auto
in_dim
=
in
->
dims
();
int
batch_size
=
in_dim
[
0
];
int
img_channels
=
in_dim
[
1
];
int
img_height
=
in_dim
[
2
];
int
img_width
=
in_dim
[
3
];
auto
kernels
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"kernels"
);
auto
strides
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"strides"
);
auto
paddings
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"paddings"
);
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]);
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
paddings
[
3
],
strides
[
1
]);
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
auto
out_dims
=
out
->
dims
();
out
->
Resize
({
batch_size
,
out
->
numel
()
/
batch_size
});
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
Tensor
src
=
in
->
Slice
(
i
,
i
+
1
).
Resize
({
img_channels
,
img_height
,
img_width
});
Tensor
dst
=
out
->
Slice
(
i
,
i
+
1
).
Resize
(
{
output_height
,
output_width
,
img_channels
,
kernels
[
0
],
kernels
[
1
]});
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
DeviceContext
,
T
>
f
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
f
(
dev_ctx
,
src
,
dilations
,
strides
,
paddings
,
&
dst
);
}
out
->
Resize
(
out_dims
);
// set lod information
// TODO(wanghaoshuang): Move this to InferShape
framework
::
LoD
lod
(
1
);
lod
[
0
].
reserve
(
batch_size
+
1
);
for
(
int
i
=
0
,
offset
=
0
;
i
<
batch_size
+
1
;
++
i
)
{
if
(
ctx
.
HasInput
(
"Y"
)
&&
batch_size
>
1
)
{
const
Tensor
*
imgrealsize
=
ctx
.
Input
<
Tensor
>
(
"Y"
);
auto
out_stride
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"out_stride"
);
Tensor
cpu_shape_tensor
;
TensorCopySync
(
*
imgrealsize
,
platform
::
CPUPlace
(),
&
cpu_shape_tensor
);
std
::
vector
<
int
>
imgreal_h
;
std
::
vector
<
int
>
imgreal_w
;
std
::
vector
<
int
>
output_height
;
std
::
vector
<
int
>
output_width
;
int
result
=
0
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
int
tmp_real_h
=
static_cast
<
int
>
((
cpu_shape_tensor
.
data
<
T
>
())[
2
*
i
]);
int
tmp_real_w
=
static_cast
<
int
>
((
cpu_shape_tensor
.
data
<
T
>
())[
2
*
i
+
1
]);
if
(
tmp_real_h
%
out_stride
[
0
]
==
0
)
{
tmp_real_h
=
tmp_real_h
/
out_stride
[
0
];
}
else
{
tmp_real_h
=
tmp_real_h
/
out_stride
[
0
]
+
1
;
}
if
(
tmp_real_w
%
out_stride
[
1
]
==
0
)
{
tmp_real_w
=
tmp_real_w
/
out_stride
[
1
];
}
else
{
tmp_real_w
=
tmp_real_w
/
out_stride
[
1
]
+
1
;
}
imgreal_h
.
push_back
(
tmp_real_h
);
imgreal_w
.
push_back
(
tmp_real_w
);
output_height
.
push_back
(
Im2SeqOutputSize
(
imgreal_h
[
i
],
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]));
output_width
.
push_back
(
Im2SeqOutputSize
(
imgreal_w
[
i
],
kernels
[
1
],
paddings
[
1
],
paddings
[
3
],
strides
[
1
]));
result
+=
output_height
[
i
]
*
output_width
[
i
];
}
out
->
mutable_data
<
T
>
({
result
,
img_channels
*
kernels
[
0
]
*
kernels
[
1
]},
ctx
.
GetPlace
());
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
int
offset_out
=
0
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
Tensor
src
=
in
->
Slice
(
i
,
i
+
1
).
Resize
({
img_channels
,
img_height
,
img_width
});
Tensor
dst
=
out
->
Slice
(
offset_out
,
offset_out
+
output_height
[
i
]
*
output_width
[
i
])
.
Resize
({
output_height
[
i
],
output_width
[
i
],
img_channels
,
kernels
[
0
],
kernels
[
1
]});
offset_out
+=
output_height
[
i
]
*
output_width
[
i
];
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
DeviceContext
,
T
>
f
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
f
(
dev_ctx
,
src
,
dilations
,
strides
,
paddings
,
&
dst
);
}
framework
::
LoD
lod
(
1
);
lod
[
0
].
reserve
(
batch_size
+
1
);
int
offset
=
0
;
lod
[
0
].
push_back
(
offset
);
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
offset
+=
output_height
[
i
]
*
output_width
[
i
];
lod
[
0
].
push_back
(
offset
);
}
out
->
set_lod
(
lod
);
}
else
{
out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
int
output_height
=
Im2SeqOutputSize
(
img_height
,
kernels
[
0
],
paddings
[
0
],
paddings
[
2
],
strides
[
0
]);
int
output_width
=
Im2SeqOutputSize
(
img_width
,
kernels
[
1
],
paddings
[
1
],
paddings
[
3
],
strides
[
1
]);
const
std
::
vector
<
int
>
dilations
({
1
,
1
});
auto
out_dims
=
out
->
dims
();
out
->
Resize
({
batch_size
,
out
->
numel
()
/
batch_size
});
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
const
Tensor
src
=
in
->
Slice
(
i
,
i
+
1
).
Resize
({
img_channels
,
img_height
,
img_width
});
Tensor
dst
=
out
->
Slice
(
i
,
i
+
1
).
Resize
({
output_height
,
output_width
,
img_channels
,
kernels
[
0
],
kernels
[
1
]});
math
::
Im2ColFunctor
<
math
::
ColFormat
::
kOCF
,
DeviceContext
,
T
>
f
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
f
(
dev_ctx
,
src
,
dilations
,
strides
,
paddings
,
&
dst
);
}
out
->
Resize
(
out_dims
);
framework
::
LoD
lod
(
1
);
lod
[
0
].
reserve
(
batch_size
+
1
);
int
offset
=
0
;
lod
[
0
].
push_back
(
offset
);
offset
+=
output_height
*
output_width
;
for
(
int
i
=
0
;
i
<
batch_size
;
++
i
)
{
offset
+=
output_height
*
output_width
;
lod
[
0
].
push_back
(
offset
);
}
out
->
set_lod
(
lod
);
}
out
->
set_lod
(
lod
);
}
};
...
...
paddle/fluid/operators/math/im2col.cc
浏览文件 @
dbaaca78
...
...
@@ -43,21 +43,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
((
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
)))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
((
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
)))
/
stride
[
1
]
+
1
,
col_width
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
int
channels_col
=
im_channels
*
filter_height
*
filter_width
;
const
T
*
im_data
=
im
.
data
<
T
>
();
...
...
@@ -178,17 +163,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
(
(
im_height
+
padding
[
0
]
+
padding
[
2
]
-
filter_height
)
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
(
(
im_width
+
padding
[
1
]
+
padding
[
3
]
-
filter_width
)
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
const
T
*
im_data
=
im
.
data
<
T
>
();
T
*
col_data
=
col
->
data
<
T
>
();
...
...
paddle/fluid/operators/math/im2col.cu
浏览文件 @
dbaaca78
...
...
@@ -77,21 +77,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int
col_height
=
col
->
dims
()[
3
];
int
col_width
=
col
->
dims
()[
4
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
num_outputs
=
im_channels
*
col_height
*
col_width
;
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
int
block_x
=
512
;
...
...
@@ -274,21 +259,6 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
int
col_height
=
col
->
dims
()[
0
];
int
col_width
=
col
->
dims
()[
1
];
PADDLE_ENFORCE_EQ
((
im_height
+
padding
[
0
]
+
padding
[
2
]
-
(
dilation
[
0
]
*
(
filter_height
-
1
)
+
1
))
/
stride
[
0
]
+
1
,
col_height
,
"Output_height and padding(padding_up, padding_down) are "
"inconsistent."
);
PADDLE_ENFORCE_EQ
((
im_width
+
padding
[
1
]
+
padding
[
3
]
-
(
dilation
[
1
]
*
(
filter_width
-
1
)
+
1
))
/
stride
[
1
]
+
1
,
col_width
,
"col_width and padding(padding_left, padding_right) are "
"inconsistent."
);
int
block_dim_x
=
0
;
int
block_dim_y
=
0
;
if
(
filter_height
<=
4
&&
filter_width
<=
4
)
{
...
...
paddle/fluid/operators/reader/create_batch_reader_op.cc
浏览文件 @
dbaaca78
...
...
@@ -23,7 +23,7 @@ class BatchReader : public framework::DecoratedReader {
BatchReader
(
const
std
::
shared_ptr
<
ReaderBase
>&
reader
,
int
batch_size
,
bool
discard_leftover
)
:
DecoratedReader
(
reader
),
batch_size_
(
batch_size
),
batch_size_
(
static_cast
<
size_t
>
(
batch_size
)
),
discard_leftover_
(
discard_leftover
)
{
buffer_
.
reserve
(
batch_size_
);
}
...
...
@@ -31,7 +31,7 @@ class BatchReader : public framework::DecoratedReader {
void
ReadNextImpl
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
override
;
private:
in
t
batch_size_
;
size_
t
batch_size_
;
bool
discard_leftover_
;
std
::
vector
<
std
::
vector
<
framework
::
LoDTensor
>>
buffer_
;
};
...
...
@@ -78,7 +78,7 @@ class CreateBatchReaderOpMaker : public DecoratedReaderMakerBase {
void
BatchReader
::
ReadNextImpl
(
std
::
vector
<
framework
::
LoDTensor
>*
out
)
{
buffer_
.
clear
();
buffer_
.
reserve
(
batch_size_
);
for
(
in
t
i
=
0
;
i
<
batch_size_
;
++
i
)
{
for
(
size_
t
i
=
0
;
i
<
batch_size_
;
++
i
)
{
buffer_
.
push_back
(
std
::
vector
<
framework
::
LoDTensor
>
());
reader_
->
ReadNext
(
&
buffer_
.
back
());
if
(
buffer_
.
back
().
empty
())
{
...
...
@@ -95,9 +95,9 @@ void BatchReader::ReadNextImpl(std::vector<framework::LoDTensor>* out) {
// if buffer_ is empty, the 'out' will return as an empty vector.
return
;
}
in
t
out_num
=
buffer_
[
0
].
size
();
size_
t
out_num
=
buffer_
[
0
].
size
();
out
->
reserve
(
out_num
);
for
(
in
t
j
=
0
;
j
<
out_num
;
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
out_num
;
++
j
)
{
// Merge shape and check date type
std
::
type_index
batch_type
=
buffer_
[
0
][
j
].
type
();
framework
::
DDim
batch_shape
=
buffer_
[
0
][
j
].
dims
();
...
...
paddle/fluid/operators/squeeze_op.cc
0 → 100644
浏览文件 @
dbaaca78
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
class
SqueezeOpInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of SqueezeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of SqueezeOp should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
// Check input tensor dims (<6) Eigen limit.
PADDLE_ENFORCE
(
x_dims
.
size
()
<=
6
,
"Invalid dimnesions, the rank of Input(X) "
"should be in the range of [1, 6] (Eigen limit)."
);
const
auto
&
axes
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"axes"
);
for
(
int
a
:
axes
)
{
PADDLE_ENFORCE_LT
(
a
,
x_dims
.
size
(),
"The squeeze axis should be less than input "
"tensor's rank."
);
}
auto
out_dims
=
GetOutputShape
(
axes
,
x_dims
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
if
(
x_dims
[
0
]
==
out_dims
[
0
])
{
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx
->
ShareLoD
(
"X"
,
"Out"
);
}
}
static
framework
::
DDim
GetOutputShape
(
const
std
::
vector
<
int
>
squeeze_dims
,
const
framework
::
DDim
&
in_dims
)
{
size_t
num_squeeze_dims
=
squeeze_dims
.
size
();
int
cnt_squeezed_dims
=
0
;
bool
should_squeeze
[
9
]
=
{
false
};
// Determines number of dimensions of output tensor after squeeze.
// Mark and count the dimensions need to be squeezed
if
(
num_squeeze_dims
==
0
)
{
for
(
int
idx
=
0
;
idx
<
in_dims
.
size
();
++
idx
)
{
if
(
in_dims
[
idx
]
==
1
)
{
should_squeeze
[
idx
]
=
true
;
++
cnt_squeezed_dims
;
}
}
}
else
{
for
(
size_t
idx
=
0
;
idx
<
num_squeeze_dims
;
++
idx
)
{
int
current
=
squeeze_dims
[
idx
]
<
0
?
squeeze_dims
[
idx
]
+
in_dims
.
size
()
:
squeeze_dims
[
idx
];
// Check current index, the upper limit has beed checked in line 36.
PADDLE_ENFORCE
(
current
>=
0
,
"Invalid axis, the negative axis is out of range."
);
PADDLE_ENFORCE
(
in_dims
[
current
]
==
1
,
"Invalid axis index, the axis that will be squeezed "
"should be equal to 1."
);
if
(
!
(
should_squeeze
[
current
]))
{
++
cnt_squeezed_dims
;
}
should_squeeze
[
current
]
=
true
;
}
}
// Make output dimensions
std
::
vector
<
int64_t
>
output_shape
(
in_dims
.
size
()
-
cnt_squeezed_dims
,
0
);
for
(
int
in_idx
=
0
,
out_idx
=
0
;
in_idx
<
in_dims
.
size
();
++
in_idx
)
{
if
(
!
should_squeeze
[
in_idx
])
{
output_shape
[
out_idx
++
]
=
in_dims
[
in_idx
];
}
}
return
framework
::
make_ddim
(
output_shape
);
}
};
class
SqueezeOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
SqueezeOpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
attrs
[
"inplace"
]
=
Attr
<
bool
>
(
"inplace"
);
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
SqueezeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor). The input tensor of squeeze operator."
);
AddOutput
(
"Out"
,
"(Tensor). The output tensor of squeeze operator."
);
AddAttr
<
std
::
vector
<
int
>>
(
"axes"
,
"(std::vector<int>). List of integers,"
" indicating the dimensions to squeeze."
)
.
SetDefault
({});
AddAttr
<
bool
>
(
"inplace"
,
"(default: false) Squeeze the source tensor's shape without "
"memory copy. When Attr(inplace) is set true, the output "
"tensor shares memory with Input(X), otherwise, a new output "
"tensor is created, and its data are copied from Input(x)."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Squeeze Operator.
Remove single-dimensional entries from the shape of a tensor.
Takes a parameter axes with a list of axes to squeeze.
If axes is not provided, all the single dimensions will be removed from the shape.
If an axis is selected with shape entry not equal to one, an error is raised.
Examples:
Case 1:
Given
X.shape = (1, 3, 1, 5)
and
axes = [0]
we get:
Out.shape = (3, 1, 5)
Case 2:
Given
X.shape = (1, 3, 1, 5)
and
axes = []
we get:
Out.shape = (3, 5)
)DOC"
);
}
};
class
SqueezeGradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
context
->
GetInputDim
(
"X"
));
context
->
ShareLoD
(
"X"
,
framework
::
GradVarName
(
"X"
));
}
};
class
SqueezeGradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
attrs
[
"inplace"
]
=
Attr
<
bool
>
(
"inplace"
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
// Tell linker to use reshape op
USE_OP
(
reshape
);
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
squeeze
,
ops
::
SqueezeOp
,
ops
::
SqueezeOpMaker
,
ops
::
SqueezeOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
squeeze_grad
,
ops
::
SqueezeGradOp
,
ops
::
SqueezeGradInferShape
);
paddle/fluid/operators/unsqueeze_op.cc
0 → 100644
浏览文件 @
dbaaca78
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
class
UnsqueezeOpInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of UnsqueezeOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of UnsqueezeOp should not be null."
);
const
auto
&
axes
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int
>>
(
"axes"
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
// Validity Check: input tensor dims (<6).
PADDLE_ENFORCE
(
x_dims
.
size
()
<=
6
,
"Invalid dimensions, the rank of Input(X) "
"should be in the range of [1, 6] (Eigen limit)"
);
auto
out_dims
=
GetOutputShape
(
axes
,
x_dims
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
if
(
x_dims
[
0
]
==
out_dims
[
0
])
{
// Only pass LoD when the first dimension of output and Input(X)
// are the same.
ctx
->
ShareLoD
(
"X"
,
"Out"
);
}
}
static
framework
::
DDim
GetOutputShape
(
const
std
::
vector
<
int
>
unsqz_dims
,
const
framework
::
DDim
&
in_dims
)
{
int
output_size
=
in_dims
.
size
()
+
static_cast
<
int
>
(
unsqz_dims
.
size
());
int
cur_output_size
=
in_dims
.
size
();
std
::
vector
<
int64_t
>
output_shape
(
output_size
,
0
);
// Validity Check: rank range.
PADDLE_ENFORCE
(
output_size
<=
6
,
"The output tensor's rank should be less than 6."
);
for
(
int
axis
:
unsqz_dims
)
{
int
cur
=
axis
<
0
?
axis
+
cur_output_size
+
1
:
axis
;
// Vaildity Check: the axis bound
PADDLE_ENFORCE
(
cur
>=
0
&&
cur
<=
cur_output_size
,
"The unsqueeze dims must be within range of current rank."
);
// Move old axis, and insert new axis
for
(
int
i
=
cur_output_size
;
i
>=
cur
;
--
i
)
{
if
(
output_shape
[
i
]
==
1
)
{
// Move axis
output_shape
[
i
+
1
]
=
1
;
output_shape
[
i
]
=
0
;
}
}
output_shape
[
cur
]
=
1
;
// Add the output size.
cur_output_size
++
;
}
// Make output shape
for
(
int
in_idx
=
0
,
out_idx
=
0
;
out_idx
<
output_size
;
++
out_idx
)
{
if
(
output_shape
[
out_idx
]
==
0
)
{
output_shape
[
out_idx
]
=
in_dims
[
in_idx
++
];
}
}
return
framework
::
make_ddim
(
output_shape
);
}
};
class
UnsqueezeOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
UnsqueezeOpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
attrs
[
"inplace"
]
=
Attr
<
bool
>
(
"inplace"
);
// Invoke Reshape op.
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
UnsqueezeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor). The input tensor of unsqueeze operator."
);
AddOutput
(
"Out"
,
"(Tensor). The output tensor of unsqueeze operator."
);
AddAttr
<
std
::
vector
<
int
>>
(
"axes"
,
"(std::vector<int>). List of integers,"
" indicating the dimensions to be inserted"
)
.
AddCustomChecker
([](
const
std
::
vector
<
int
>
&
axes
)
{
PADDLE_ENFORCE
(
!
axes
.
empty
(),
"Invalid axes, The unsqueeze axes is empty."
);
// Validity Check: axes dims (<6).
PADDLE_ENFORCE
(
static_cast
<
int
>
(
axes
.
size
())
<
6
,
"Invalid dimensions, dynamic dimensions should be "
"within [1, 6] dimensions (Eigen limit)."
);
// Validity Check: the range of unsqueeze aixs.
for
(
int
axis
:
axes
)
{
PADDLE_ENFORCE
(
axis
<
6
,
"Invalid dimensions, input axis should be"
" within [1, 6] dimensions (Eigen limit)."
);
}
});
AddAttr
<
bool
>
(
"inplace"
,
"(default: false) Unsqueeze the source tensor's shape without "
"memory copy. When Attr(inplace) is set true, the output "
"tensor shares memory with Input(X), otherwise, a new output "
"tensor is created, and its data are copied from Input(x)."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
Unsqueeze Operator.
Insert single-dimensional entries to the shape of a tensor.
Takes one required argument axes, a list of dimensions that will be inserted.
Dimension indices in axes are as seen in the output tensor.
For example:
Given a tensor such that tensor with shape [3, 4, 5],
then Unsqueeze(tensor, axes=[0, 4]) has shape [1, 3, 4, 5, 1]
)DOC"
);
}
};
class
UnsqueezeGradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
ctx
->
GetInputDim
(
"X"
));
ctx
->
ShareLoD
(
"X"
,
framework
::
GradVarName
(
"X"
));
}
};
class
UnsqueezeGradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
attrs
[
"inplace"
]
=
Attr
<
bool
>
(
"inplace"
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
// Tell linker to use reshape op.
USE_OP
(
reshape
);
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
unsqueeze
,
ops
::
UnsqueezeOp
,
ops
::
UnsqueezeOpMaker
,
ops
::
UnsqueezeOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
unsqueeze_grad
,
ops
::
UnsqueezeGradOp
,
ops
::
UnsqueezeGradInferShape
);
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
dbaaca78
...
...
@@ -222,15 +222,16 @@ class MKLDNNHandler {
static
std
::
string
GetHash
(
mkldnn
::
memory
::
dims
&
operand_dims
,
// NOLINT
const
std
::
string
&
suffix
)
{
auto
dims2str
=
[](
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
};
return
dims2str
(
operand_dims
)
+
suffix
;
};
protected:
static
std
::
string
dims2str
(
const
mkldnn
::
memory
::
dims
&
operand_dims
)
{
std
::
string
dstr
=
""
;
for
(
size_t
i
=
0
;
i
<
operand_dims
.
size
();
++
i
)
{
dstr
+=
std
::
to_string
(
operand_dims
[
i
])
+
"-"
;
}
return
dstr
;
}
protected:
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
dbaaca78
...
...
@@ -66,6 +66,14 @@ bool IsCompiledWithCUDA() {
#endif
}
bool
IsCompiledWithDIST
()
{
#ifdef PADDLE_WITH_DIST
return
true
;
#else
return
false
;
#endif
}
PYBIND11_PLUGIN
(
core
)
{
py
::
module
m
(
"core"
,
"C++ core of PaddlePaddle"
);
...
...
@@ -508,6 +516,7 @@ All parameter, weight, gradient are variables in Paddle.
[](
bool
init_p2p
)
{
framework
::
InitDevices
(
init_p2p
);
});
m
.
def
(
"is_compiled_with_cuda"
,
IsCompiledWithCUDA
);
m
.
def
(
"is_compiled_with_dist"
,
IsCompiledWithDIST
);
#ifdef PADDLE_WITH_CUDA
m
.
def
(
"is_float16_supported"
,
[](
const
platform
::
CUDAPlace
&
place
)
->
bool
{
// Only GPUs with Compute Capability >= 53 support float16
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
dbaaca78
...
...
@@ -518,11 +518,23 @@ function gen_fluid_inference_lib() {
EOF
make
-j
`
nproc
`
inference_lib_dist
cd
${
PADDLE_ROOT
}
/build
mv
fluid_install_dir fluid
cp
-r
fluid_install_dir fluid
tar
-cf
fluid.tgz fluid
fi
}
function
test_fluid_inference_lib
()
{
if
[
${
WITH_C_API
:-
OFF
}
==
"OFF"
]
;
then
cat
<<
EOF
========================================
Testing fluid inference library ...
========================================
EOF
cd
${
PADDLE_ROOT
}
/paddle/contrib/inference/demo_ci
sh run.sh
${
PADDLE_ROOT
}
${
WITH_MKL
:-
ON
}
${
WITH_GPU
:-
OFF
}
fi
}
function
main
()
{
set
-e
local
CMD
=
$1
...
...
@@ -576,6 +588,7 @@ function main() {
run_test
gen_capi_package
gen_fluid_inference_lib
test_fluid_inference_lib
;;
*
)
print_usage
...
...
python/CMakeLists.txt
浏览文件 @
dbaaca78
...
...
@@ -92,8 +92,15 @@ install(DIRECTORY ${PADDLE_PYTHON_PACKAGE_DIR}
DESTINATION opt/paddle/share/wheels
)
find_program
(
PATCHELF_EXECUTABLE patchelf
)
if
(
NOT PATCHELF_EXECUTABLE
)
message
(
FATAL_ERROR
"patchelf not found, please install it.
\n
"
"For Ubuntu, the command is: apt-get install -y patchelf."
)
endif
()
if
(
APPLE
)
find_program
(
INSTALL_NAME_TOOL_EXECUTABLE install_name_tool
)
if
(
NOT INSTALL_NAME_TOOL_EXECUTABLE
)
message
(
FATAL_ERROR
"install_name_tool not found, please check.
\n
"
)
endif
()
else
(
APPLE
)
find_program
(
PATCHELF_EXECUTABLE patchelf
)
if
(
NOT PATCHELF_EXECUTABLE
)
message
(
FATAL_ERROR
"patchelf not found, please install it.
\n
"
"For Ubuntu, the command is: apt-get install -y patchelf."
)
endif
()
endif
(
APPLE
)
python/paddle/fluid/__init__.py
浏览文件 @
dbaaca78
...
...
@@ -121,6 +121,9 @@ def __bootstrap__():
'eager_delete_scope'
,
'use_mkldnn'
,
'initial_cpu_memory_in_mb'
,
'init_allocated_mem'
]
if
core
.
is_compiled_with_dist
():
read_env_flags
.
append
(
'rpc_deadline'
)
if
core
.
is_compiled_with_cuda
():
read_env_flags
+=
[
'fraction_of_gpu_memory_to_use'
,
'cudnn_deterministic'
...
...
python/paddle/fluid/annotations.py
0 → 100644
浏览文件 @
dbaaca78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
functools
import
sys
__all__
=
[
'deprecated'
]
def
deprecated
(
since
,
instead
,
extra_message
=
""
):
def
decorator
(
func
):
err_msg
=
"API {0} is deprecated since {1}. Please use {2} instead."
.
format
(
func
.
__name__
,
since
,
instead
)
if
len
(
extra_message
)
!=
0
:
err_msg
+=
"
\n
"
err_msg
+=
extra_message
@
functools
.
wraps
(
func
)
def
wrapper
(
*
args
,
**
kwargs
):
print
>>
sys
.
stderr
,
err_msg
return
func
(
*
args
,
**
kwargs
)
wrapper
.
__doc__
+=
"
\n
"
wrapper
.
__doc__
+=
err_msg
return
wrapper
return
decorator
python/paddle/fluid/backward.py
浏览文件 @
dbaaca78
...
...
@@ -18,10 +18,7 @@ import collections
import
copy
import
unique_name
__all__
=
[
'append_backward'
,
'calc_gradient'
,
]
__all__
=
[
'append_backward'
]
def
_rename_arg_
(
op_descs
,
old_name
,
new_name
,
begin_idx
=
None
,
end_idx
=
None
):
...
...
@@ -123,7 +120,8 @@ def _append_grad_suffix_(name):
def
_addup_repetitive_outputs_
(
op_descs
):
"""
In backward part, an variable may be the output of more than one ops.
In this case, the variable should be the accumulation of all the outputs.
And one op may yield its multiple outputs to the same variable.
In these cases, the variable should be the accumulation of all the outputs.
`sum_op`s are added to implement the accumulate.
"""
pending_sum_ops
=
[]
...
...
@@ -136,29 +134,46 @@ def _addup_repetitive_outputs_(op_descs):
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
{
"Out"
:
[
var_name
]},
{
"use_mkldnn"
:
False
}),
idx
))
renamed_vars
[
var_name
]
=
[
var_name
]
for
var_name
in
op_desc
.
output_arg_names
():
if
var_name
==
core
.
empty_var_name
(
)
or
var_name
in
op_desc
.
input_arg_names
():
# empty variable or inplace op
continue
if
len
(
renamed_vars
[
var_name
])
==
0
:
# it's the first time we get the variable
renamed_vars
[
var_name
]
=
[
var_name
]
else
:
if
len
(
renamed_vars
[
var_name
])
==
1
:
for
param_idx
,
param_name
in
enumerate
(
op_desc
.
output_names
()):
arg_names
=
op_desc
.
output
(
param_name
)
for
arg_idx
,
var_name
in
enumerate
(
arg_names
):
if
var_name
==
core
.
empty_var_name
(
)
or
var_name
in
op_desc
.
input_arg_names
():
# empty variable or inplace op
continue
if
len
(
renamed_vars
[
var_name
])
==
0
:
# it's the first time we get the variable
renamed_vars
[
var_name
]
=
[
var_name
]
else
:
if
len
(
renamed_vars
[
var_name
])
==
1
:
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
# rename original var_name
renamed_vars
[
var_name
][
0
]
=
new_name
_rename_arg_
(
op_descs
,
var_name
,
new_name
,
0
,
idx
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
for
p
in
op_desc
.
output_names
()[:
param_idx
]:
p_arg_names
=
op_desc
.
output
(
p
)
if
var_name
in
p_arg_names
:
op_desc
.
set_output
(
p
,
[
new_name
if
x
==
var_name
else
x
for
x
in
p_arg_names
])
arg_names
=
[
new_name
if
x
==
var_name
else
x
for
x
in
arg_names
[:
arg_idx
]
]
+
arg_names
[
arg_idx
:]
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
# rename original var_name
renamed_vars
[
var_name
][
0
]
=
new_name
_rename_arg_
(
op_descs
,
var_name
,
new_name
,
0
,
idx
)
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
op_desc
.
rename_output
(
var_name
,
new_name
)
renamed_vars
[
var_name
].
append
(
new_name
)
arg_names
[
arg_idx
]
=
new_name
op_desc
.
set_output
(
param_name
,
arg_names
)
renamed_vars
[
var_name
].
append
(
new_name
)
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
if
len
(
inputs
)
>
1
:
pending_sum_ops
.
append
(
...
...
python/paddle/fluid/layers/device.py
浏览文件 @
dbaaca78
...
...
@@ -18,10 +18,12 @@ All util layers.
from
layer_function_generator
import
autodoc
from
..framework
import
unique_name
from
..layer_helper
import
LayerHelper
from
..annotations
import
deprecated
__all__
=
[
'get_places'
]
__all__
=
[]
@
deprecated
(
since
=
'0.15.0'
,
instead
=
"ParallelExecutor"
)
@
autodoc
()
def
get_places
(
device_count
=
None
,
device_type
=
None
):
helper
=
LayerHelper
(
'get_places'
,
**
locals
())
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
dbaaca78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c ) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -3900,7 +3914,13 @@ def transpose(x, perm, name=None):
return
out
def
im2sequence
(
input
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
name
=
None
):
def
im2sequence
(
input
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
input_image_size
=
None
,
out_stride
=
1
,
name
=
None
):
"""
Extracts image patches from the input tensor to form a tensor of shape
{input.batch_size * output_height * output_width, filter_size_H *
...
...
@@ -3937,6 +3957,15 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
padding_up = padding_down = padding_left = padding_right = padding
Default: padding = 0.
input_image_size(Variable): the input contains image real size.It's dim
is [batchsize, 2]. It is dispensable.It is just for batch inference.
out_stride(int|tuple): The scaling of image through CNN. It is
dispensable. It is valid only when input_image_size is not null.
If out_stride is tuple, it must contain two intergers,
(out_stride_H, out_stride_W). Otherwise,
the out_stride_H = out_stride_W = out_stride.
name (int): The name of this layer. It is optional.
Returns:
...
...
@@ -3987,7 +4016,7 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
[ 5. 7. 2. 4. 1. 3. 9. 0.]
[ 7. 9. 4. 8. 3. 5. 0. 8.]]
output.dims = {8,
9
}
output.dims = {8,
8
}
output.lod = [[4, 4]]
...
...
@@ -4009,18 +4038,17 @@ def im2sequence(input, filter_size=1, stride=1, padding=0, name=None):
if
len
(
padding
)
==
2
:
padding
.
append
(
padding
[
0
])
padding
.
append
(
padding
[
1
])
inputs
=
{
"X"
:
input
}
attrs
=
{
"kernels"
:
filter_size
,
"strides"
:
stride
,
"padding"
:
padding
}
if
input_image_size
:
if
isinstance
(
out_stride
,
int
):
out_stride
=
[
out_stride
,
out_stride
]
inputs
[
"Y"
]
=
input_image_size
attrs
[
"out_stride"
]
=
out_stride
helper
=
LayerHelper
(
'im2sequence'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'im2sequence'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'kernels'
:
filter_size
,
'strides'
:
stride
,
'paddings'
:
padding
,
})
type
=
'im2sequence'
,
inputs
=
inputs
,
outputs
=
{
'Out'
:
out
},
attrs
=
attrs
)
return
out
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
dbaaca78
...
...
@@ -29,7 +29,7 @@ __all__ = [
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'Ftrl'
,
'SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
,
'AdamOptimizer'
,
'AdamaxOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'FtrlOptimizer'
,
'Adadelta'
,
'ModelAverage'
,
'
Optimizer'
,
'
RMSPropOptimizer'
'FtrlOptimizer'
,
'Adadelta'
,
'ModelAverage'
,
'RMSPropOptimizer'
]
...
...
@@ -67,7 +67,7 @@ class Optimizer(object):
self
.
_LARS_weight_decay
=
LARS_weight_decay
def
_create_global_learning_rate
(
self
):
lr
=
self
.
global_learning_rate
()
lr
=
self
.
_
global_learning_rate
()
if
isinstance
(
lr
,
framework
.
Variable
):
return
...
...
@@ -86,7 +86,7 @@ class Optimizer(object):
dtype
=
'float32'
if
self
.
_dtype
==
None
else
self
.
_dtype
,
persistable
=
True
)
def
global_learning_rate
(
self
,
program
=
None
):
def
_
global_learning_rate
(
self
,
program
=
None
):
"""
get global decayed learning rate
:return:
...
...
@@ -110,9 +110,9 @@ class Optimizer(object):
return
param_lr
else
:
if
param_lr
==
1.0
:
return
self
.
global_learning_rate
()
return
self
.
_
global_learning_rate
()
else
:
return
self
.
global_learning_rate
()
*
param_lr
return
self
.
_
global_learning_rate
()
*
param_lr
def
_create_accumulators
(
self
,
block
,
parameters
):
"""Create all accumulators needed by the parameters
...
...
@@ -185,10 +185,10 @@ class Optimizer(object):
format
(
name
,
param
.
name
))
return
self
.
_accumulators
[
name
][
param
.
name
]
def
create_optimization_pass
(
self
,
parameters_and_grads
,
loss
,
startup_program
=
None
):
def
_
create_optimization_pass
(
self
,
parameters_and_grads
,
loss
,
startup_program
=
None
):
"""Add optimization operators to update gradients to variables.
Args:
...
...
@@ -221,7 +221,7 @@ class Optimizer(object):
self
.
_create_global_learning_rate
()
if
self
.
_LARS_weight_decay
>
0.0
:
layers
.
append_LARS
(
parameters_and_grads
,
self
.
global_learning_rate
(),
self
.
_
global_learning_rate
(),
self
.
_LARS_weight_decay
)
optimize_ops
=
[]
...
...
@@ -262,8 +262,8 @@ class Optimizer(object):
params_grads
=
append_regularization_ops
(
params_grads
,
self
.
regularization
)
optimize_ops
=
self
.
create_optimization_pass
(
params_grads
,
loss
,
startup_program
)
optimize_ops
=
self
.
_
create_optimization_pass
(
params_grads
,
loss
,
startup_program
)
return
optimize_ops
,
params_grads
...
...
python/paddle/fluid/tests/book/notest_understand_sentiment.py
浏览文件 @
dbaaca78
...
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
from
paddle.fluid.layers.device
import
get_places
import
unittest
import
paddle.fluid
as
fluid
import
paddle
...
...
@@ -144,7 +144,7 @@ def train(word_dict,
cost
,
acc_out
,
prediction
=
net_method
(
data
,
label
,
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
else
:
places
=
fluid
.
layers
.
get_places
()
places
=
get_places
()
pd
=
fluid
.
layers
.
ParallelDo
(
places
)
with
pd
.
do
():
cost
,
acc
,
_
=
net_method
(
...
...
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
dbaaca78
...
...
@@ -12,15 +12,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
argparse
import
paddle.fluid
as
fluid
import
paddle
import
sys
import
numpy
import
unittest
import
math
import
sys
import
os
import
sys
import
unittest
import
numpy
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
BATCH_SIZE
=
64
...
...
@@ -76,7 +78,7 @@ def train(nn_type,
net_conf
=
conv_net
if
parallel
:
places
=
fluid
.
layers
.
get_places
()
places
=
get_places
()
pd
=
fluid
.
layers
.
ParallelDo
(
places
)
with
pd
.
do
():
img_
=
pd
.
read_input
(
img
)
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
dbaaca78
...
...
@@ -14,6 +14,7 @@
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
import
unittest
import
os
import
numpy
as
np
...
...
@@ -80,7 +81,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
avg_cost
,
predict_word
=
__network__
(
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
])
else
:
places
=
fluid
.
layers
.
get_places
()
places
=
get_places
()
pd
=
fluid
.
layers
.
ParallelDo
(
places
)
with
pd
.
do
():
avg_cost
,
predict_word
=
__network__
(
...
...
python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
浏览文件 @
dbaaca78
...
...
@@ -12,12 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
import
math
import
sys
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
# need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization
# version.
...
...
@@ -34,7 +35,7 @@ if fluid.core.is_compiled_with_cuda():
use_nccl
=
False
place
=
fluid
.
CUDAPlace
(
0
)
places
=
fluid
.
layers
.
get_places
(
device_count
=
0
,
device_type
=
device_type
)
places
=
get_places
(
device_count
=
0
,
device_type
=
device_type
)
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
with
pd
.
do
():
x_
=
pd
.
read_input
(
x
)
...
...
python/paddle/fluid/tests/unittests/test_calc_gradient.py
浏览文件 @
dbaaca78
...
...
@@ -16,8 +16,6 @@ import unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
import
paddle.fluid.framework
as
framework
import
paddle.fluid.optimizer
as
optimizer
from
paddle.fluid.backward
import
calc_gradient
...
...
python/paddle/fluid/tests/unittests/test_get_places_op.py
浏览文件 @
dbaaca78
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
import
decorators
import
unittest
...
...
@@ -20,7 +21,7 @@ import unittest
class
TestGetPlaces
(
unittest
.
TestCase
):
@
decorators
.
prog_scope
()
def
test_get_places
(
self
):
places
=
fluid
.
layers
.
get_places
()
places
=
get_places
()
cpu
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
cpu
)
exe
.
run
(
fluid
.
default_main_program
())
...
...
python/paddle/fluid/tests/unittests/test_im2sequence_op.py
浏览文件 @
dbaaca78
...
...
@@ -16,23 +16,48 @@ import numpy as np
from
op_test
import
OpTest
def
get_output_shape
(
attrs
,
in_shape
):
def
get_output_shape
(
attrs
,
in_shape
,
img_real_size
):
batchsize
=
in_shape
[
0
]
img_height
=
in_shape
[
2
]
img_width
=
in_shape
[
3
]
paddings
=
np
.
array
(
attrs
[
'paddings'
]).
astype
(
"int32"
)
kernels
=
np
.
array
(
attrs
[
'kernels'
]).
astype
(
"int32"
)
strides
=
np
.
array
(
attrs
[
'strides'
]).
astype
(
"int32"
)
output_height
=
np
.
zeros
((
1
,
batchsize
)).
astype
(
"int32"
)
output_width
=
np
.
zeros
((
1
,
batchsize
)).
astype
(
"int32"
)
if
len
(
img_real_size
):
out_stride
=
np
.
array
(
attrs
[
'out_stride'
]).
astype
(
"int32"
)
imgreal_h
=
0
imgreal_w
=
0
for
index
in
range
(
batchsize
):
if
img_real_size
[
index
,
0
]
%
out_stride
[
0
]
==
0
:
imgreal_h
=
img_real_size
[
index
,
0
]
/
out_stride
[
0
]
else
:
imgreal_h
=
img_real_size
[
index
,
0
]
/
out_stride
[
0
]
+
1
if
img_real_size
[
index
,
0
]
%
out_stride
[
1
]
==
0
:
imgreal_w
=
img_real_size
[
index
,
1
]
/
out_stride
[
1
]
else
:
imgreal_w
=
img_real_size
[
index
,
0
]
/
out_stride
[
1
]
+
1
output_height
[
0
,
index
]
=
\
1
+
\
(
imgreal_h
+
paddings
[
0
]
+
paddings
[
2
]
-
kernels
[
0
]
+
strides
[
0
]
-
1
)
/
\
strides
[
0
]
paddings
=
attrs
[
'paddings'
]
kernels
=
attrs
[
'kernels'
]
strides
=
attrs
[
'strides'
]
output_width
[
0
,
index
]
=
\
1
+
\
(
imgreal_w
+
paddings
[
1
]
+
paddings
[
3
]
-
kernels
[
1
]
+
strides
[
1
]
-
1
)
/
\
strides
[
1
]
else
:
for
index
in
range
(
batchsize
):
output_height
[
0
,
index
]
=
\
1
+
\
(
img_height
+
paddings
[
0
]
+
paddings
[
2
]
-
kernels
[
0
]
+
strides
[
0
]
-
1
)
/
\
strides
[
0
]
output_height
=
\
1
+
\
(
img_height
+
paddings
[
0
]
+
paddings
[
2
]
-
kernels
[
0
]
+
strides
[
0
]
-
1
)
/
\
strides
[
0
]
output_width
=
\
1
+
\
(
img_width
+
paddings
[
1
]
+
paddings
[
3
]
-
kernels
[
1
]
+
strides
[
1
]
-
1
)
/
\
strides
[
1
]
output_width
[
0
,
index
]
=
\
1
+
\
(
img_width
+
paddings
[
1
]
+
paddings
[
3
]
-
kernels
[
1
]
+
strides
[
1
]
-
1
)
/
\
strides
[
1
]
return
output_height
,
output_width
...
...
@@ -75,22 +100,25 @@ def im2col(attrs, im, col):
im_row_offset
][
im_col_offset
]
def
Im2Sequence
(
inputs
,
attrs
):
output_height
,
output_width
=
get_output_shape
(
attrs
,
inputs
.
shape
)
def
Im2Sequence
(
inputs
,
img_real_size
,
attrs
):
output_height
,
output_width
=
get_output_shape
(
attrs
,
inputs
.
shape
,
img_real_size
)
img_channels
=
inputs
.
shape
[
1
]
batch_size
=
inputs
.
shape
[
0
]
out
=
np
.
zeros
([
batch_size
,
output_height
,
output_width
,
img_channels
,
attrs
[
'kernels'
][
0
],
attrs
[
'kernels'
][
1
]
]).
astype
(
"float32"
)
for
i
in
range
(
len
(
inputs
)):
im2col
(
attrs
,
inputs
[
i
],
out
[
i
])
out
=
out
.
reshape
([
batch_size
*
output_height
*
output_width
,
img_channels
*
attrs
[
'kernels'
][
0
]
*
attrs
[
'kernels'
][
1
]
])
out
=
[]
for
index
in
range
(
batch_size
):
tmp
=
np
.
zeros
([
output_height
[
0
,
index
],
output_width
[
0
,
index
],
img_channels
,
attrs
[
'kernels'
][
0
],
attrs
[
'kernels'
][
1
]
]).
astype
(
"float32"
)
out
.
append
(
tmp
)
for
index
in
range
(
len
(
inputs
)):
im2col
(
attrs
,
inputs
[
index
],
out
[
index
])
out
[
index
]
=
out
[
index
].
reshape
([
output_height
[
0
,
index
]
*
output_width
[
0
,
index
],
img_channels
*
attrs
[
'kernels'
][
0
]
*
attrs
[
'kernels'
][
1
]
])
out
=
np
.
concatenate
(
out
,
axis
=
0
)
return
out
...
...
@@ -103,7 +131,7 @@ class TestBlockExpandOp(OpTest):
self
.
attrs
=
{
'kernels'
:
[
2
,
2
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
1
,
1
,
1
,
1
]
'paddings'
:
[
1
,
1
,
1
,
1
]
,
}
def
setUp
(
self
):
...
...
@@ -113,7 +141,8 @@ class TestBlockExpandOp(OpTest):
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
out
=
Im2Sequence
(
x
,
self
.
attrs
)
real_size
=
np
.
array
([]).
astype
(
"float32"
)
out
=
Im2Sequence
(
x
,
real_size
,
self
.
attrs
)
self
.
inputs
=
{
'X'
:
x
}
self
.
outputs
=
{
'Out'
:
out
}
...
...
@@ -133,20 +162,20 @@ class TestBlockExpandOpCase2(TestBlockExpandOp):
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'paddings'
:
[
2
,
1
,
2
,
1
]
'paddings'
:
[
2
,
1
,
2
,
1
]
,
}
class
TestBlockExpandOpCase3
(
TestBlockExpandOp
):
def
config
(
self
):
self
.
batch_size
=
3
self
.
batch_size
=
2
self
.
img_channels
=
1
self
.
img_height
=
4
self
.
img_width
=
5
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'paddings'
:
[
2
,
0
,
2
,
0
]
'paddings'
:
[
2
,
0
,
2
,
0
]
,
}
...
...
@@ -159,9 +188,94 @@ class TestBlockExpandOpCase4(TestBlockExpandOp):
self
.
attrs
=
{
'kernels'
:
[
2
,
2
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
0
,
0
,
0
,
0
]
'paddings'
:
[
0
,
0
,
0
,
0
],
}
class
TestBlockExpandOpCase5
(
OpTest
):
def
config
(
self
):
self
.
batch_size
=
1
self
.
img_channels
=
3
self
.
img_height
=
4
self
.
img_width
=
5
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'strides'
:
[
2
,
1
],
'paddings'
:
[
2
,
1
,
2
,
1
],
'out_stride'
:
[
2
,
2
],
}
def
setUp
(
self
):
self
.
config
()
self
.
op_type
=
"im2sequence"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
real_size
=
np
.
array
([[
8
,
10
],
[
5
,
8
]]).
astype
(
"float32"
)
out
=
np
.
array
(
Im2Sequence
(
x
,
real_size
,
self
.
attrs
))
self
.
inputs
=
{
'X'
:
x
,
'Y'
:
real_size
}
#l ??
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestBlockExpandOpCase6
(
OpTest
):
def
config
(
self
):
self
.
batch_size
=
3
self
.
img_channels
=
1
self
.
img_height
=
4
self
.
img_width
=
5
self
.
attrs
=
{
'kernels'
:
[
2
,
1
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
0
,
0
,
0
,
0
],
'out_stride'
:
[
1
,
1
],
}
def
setUp
(
self
):
self
.
config
()
self
.
op_type
=
"im2sequence"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
real_size
=
np
.
array
([[
8
,
10
],
[
5
,
8
],
[
5
,
8
]]).
astype
(
"float32"
)
out
=
np
.
array
(
Im2Sequence
(
x
,
real_size
,
self
.
attrs
))
self
.
inputs
=
{
'X'
:
x
,
'Y'
:
real_size
}
#l ??
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestBlockExpandOpCase7
(
OpTest
):
def
config
(
self
):
self
.
batch_size
=
2
self
.
img_channels
=
2
self
.
img_height
=
3
self
.
img_width
=
3
self
.
attrs
=
{
'kernels'
:
[
2
,
2
],
'strides'
:
[
1
,
1
],
'paddings'
:
[
1
,
0
,
1
,
0
],
'out_stride'
:
[
2
,
2
],
}
def
setUp
(
self
):
self
.
config
()
self
.
op_type
=
"im2sequence"
x
=
np
.
random
.
uniform
(
0.1
,
1
,
[
self
.
batch_size
,
self
.
img_channels
,
self
.
img_height
,
self
.
img_width
]).
astype
(
"float32"
)
real_size
=
np
.
array
([[
6
,
6
],
[
4
,
4
]]).
astype
(
"float32"
)
out
=
np
.
array
(
Im2Sequence
(
x
,
real_size
,
self
.
attrs
))
self
.
inputs
=
{
'X'
:
x
,
'Y'
:
real_size
}
self
.
outputs
=
{
'Out'
:
out
}
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
'__main__'
:
unittest
.
main
()
#set shiftwidth=4 set expandtab set tabstop=4
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
dbaaca78
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
paddle.fluid.layers
as
layers
from
paddle.fluid.layers.device
import
get_places
import
paddle.fluid.nets
as
nets
from
paddle.fluid.framework
import
Program
,
program_guard
,
default_main_program
from
paddle.fluid.param_attr
import
ParamAttr
...
...
@@ -238,7 +239,7 @@ class TestBook(unittest.TestCase):
def
test_get_places
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
get_places
(
device_count
=
4
)
x
=
get_places
(
device_count
=
4
)
self
.
assertIsNotNone
(
x
)
print
(
str
(
program
))
...
...
@@ -251,12 +252,16 @@ class TestBook(unittest.TestCase):
print
(
str
(
program
))
def
test_im2sequence
(
self
):
print
(
"test_im2sequence"
)
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
3
,
128
,
128
],
dtype
=
'float32'
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[],
dtype
=
'float32'
)
output
=
layers
.
im2sequence
(
input
=
x
,
stride
=
[
1
,
1
],
filter_size
=
[
2
,
2
])
input
=
x
,
input_image_size
=
y
,
stride
=
[
1
,
1
],
filter_size
=
[
2
,
2
],
out_stride
=
[
1
,
1
])
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
...
...
python/paddle/fluid/tests/unittests/test_optimizer.py
浏览文件 @
dbaaca78
...
...
@@ -97,7 +97,7 @@ class TestMomentumOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
momentum_optimizer
.
get_accumulators
()),
0
)
opts
=
momentum_optimizer
.
create_optimization_pass
(
opts
=
momentum_optimizer
.
_
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
3
)
sgd_op
=
opts
[
-
1
]
...
...
@@ -151,7 +151,7 @@ class TestMomentumOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
momentum_optimizer
.
get_accumulators
()),
0
)
opts
=
momentum_optimizer
.
create_optimization_pass
(
opts
=
momentum_optimizer
.
_
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
3
)
sgd_op
=
opts
[
-
1
]
...
...
@@ -214,8 +214,8 @@ class TestAdagradOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
adagrad_optimizer
.
get_accumulators
()),
0
)
opts
=
adagrad_optimizer
.
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
opts
=
adagrad_optimizer
.
_create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
3
)
self
.
assertEqual
([
op
.
type
for
op
in
opts
],
[
"fill_constant"
,
"elementwise_mul"
,
"adagrad"
])
...
...
@@ -278,8 +278,8 @@ class TestAdamOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
adam_optimizer
.
get_accumulators
()),
0
)
opts
=
adam_optimizer
.
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
opts
=
adam_optimizer
.
_
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
5
)
self
.
assertEqual
(
[
op
.
type
for
op
in
opts
],
...
...
@@ -345,8 +345,8 @@ class TestAdamaxOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
adamax_optimizer
.
get_accumulators
()),
0
)
opts
=
adamax_optimizer
.
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
opts
=
adamax_optimizer
.
_
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
4
)
self
.
assertEqual
(
[
op
.
type
for
op
in
opts
],
...
...
@@ -409,7 +409,7 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
decayed_adagrad_optimizer
.
get_accumulators
()),
0
)
opts
=
decayed_adagrad_optimizer
.
create_optimization_pass
(
opts
=
decayed_adagrad_optimizer
.
_
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
3
)
self
.
assertEqual
(
...
...
@@ -475,8 +475,8 @@ class TestFtrlOptimizer(unittest.TestCase):
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
ftrl_optimizer
.
get_accumulators
()),
0
)
opts
=
ftrl_optimizer
.
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
opts
=
ftrl_optimizer
.
_
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
3
)
self
.
assertEqual
([
op
.
type
for
op
in
opts
],
[
"fill_constant"
,
"elementwise_mul"
,
"ftrl"
])
...
...
python/paddle/fluid/tests/unittests/test_parallel_op.py
浏览文件 @
dbaaca78
...
...
@@ -15,6 +15,7 @@
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
import
paddle.fluid.profiler
as
profiler
import
numpy
...
...
@@ -115,7 +116,7 @@ class BaseParallelForTest(unittest.TestCase):
if
use_parallel
:
thread_num
=
fluid
.
core
.
get_cuda_device_count
(
)
if
use_gpu
else
8
places
=
fluid
.
layers
.
get_places
(
thread_num
)
places
=
get_places
(
thread_num
)
pd
=
fluid
.
layers
.
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
data
=
next
(
generator
)
...
...
python/paddle/fluid/tests/unittests/test_squeeze_op.py
0 → 100644
浏览文件 @
dbaaca78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
# Correct: General.
class
TestSqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"squeeze"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
def
init_test_case
(
self
):
self
.
ori_shape
=
(
1
,
3
,
1
,
5
)
self
.
axes
=
(
0
,
2
)
self
.
new_shape
=
(
3
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
False
}
# Correct: There is mins axis.
class
TestSqueezeOp1
(
TestSqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
1
,
3
,
1
,
5
)
self
.
axes
=
(
0
,
-
2
)
self
.
new_shape
=
(
3
,
5
)
# Correct: No axes input.
class
TestSqueezeOp2
(
TestSqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
1
,
3
,
1
,
5
)
self
.
axes
=
()
self
.
new_shape
=
(
3
,
5
)
# Correct: Just part of axes be squeezed.
class
TestSqueezeOp3
(
TestSqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
1
,
5
,
1
,
4
,
1
)
self
.
axes
=
(
1
,
-
1
)
self
.
new_shape
=
(
3
,
5
,
1
,
4
)
# Correct: Inplace.
class
TestSqueezeOpInplace1
(
TestSqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
1
,
3
,
1
,
5
)
self
.
axes
=
(
0
,
2
)
self
.
new_shape
=
(
3
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
# Correct: Inplace. There is mins axis.
class
TestSqueezeOpInplace2
(
TestSqueezeOp
):
def
inti_test_case
(
self
):
self
.
ori_shape
=
(
1
,
3
,
1
,
5
)
self
.
axes
=
(
0
,
-
2
)
self
.
new_shape
=
(
3
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
# Correct: Inplace. No axes input.
class
TestSqueezeOpInplace3
(
TestSqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
1
,
3
,
1
,
5
)
self
.
axes
=
()
self
.
new_shape
=
(
3
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
# Correct: Inpalce. Just part of axes be squeezed.
class
TestSqueezeOpInplace4
(
TestSqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
1
,
5
,
1
,
4
,
1
)
self
.
axes
=
(
1
,
-
1
)
self
.
new_shape
=
(
3
,
5
,
1
,
4
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
0 → 100644
浏览文件 @
dbaaca78
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
# Correct: General.
class
TestUnsqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
op_type
=
"unsqueeze"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
5
)
self
.
axes
=
(
1
,
2
)
self
.
new_shape
=
(
3
,
1
,
1
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
False
}
# Correct: Single input index.
class
TestUnsqueezeOp1
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
5
)
self
.
axes
=
(
-
1
,
)
self
.
new_shape
=
(
3
,
5
,
1
)
# Correct: Mixed input axis.
class
TestUnsqueezeOp2
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
5
)
self
.
axes
=
(
0
,
-
1
)
self
.
new_shape
=
(
1
,
3
,
5
,
1
)
# Correct: There is duplicated axis.
class
TestUnsqueezeOp3
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
2
,
5
)
self
.
axes
=
(
0
,
3
,
3
)
self
.
new_shape
=
(
1
,
3
,
2
,
1
,
1
,
5
)
# Correct: Reversed axes.
class
TestUnsqueezeOp4
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
2
,
5
)
self
.
axes
=
(
3
,
1
,
1
)
self
.
new_shape
=
(
3
,
1
,
1
,
2
,
5
,
1
)
# Correct: Inplace.
class
TestUnsqueezeOpInplace1
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
5
)
self
.
axes
=
(
0
,
2
)
self
.
new_shape
=
(
1
,
3
,
1
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
# Correct: Inplace. There is mins index.
class
TestUnsqueezeOpInplace2
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
5
)
self
.
axes
=
(
0
,
-
2
)
self
.
new_shape
=
(
1
,
3
,
1
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
# Correct: Inplace. There is duplicated axis.
class
TestUnsqueezeOpInplace3
(
TestUnsqueezeOp
):
def
init_test_case
(
self
):
self
.
ori_shape
=
(
3
,
2
,
5
)
self
.
axes
=
(
0
,
3
,
3
)
self
.
new_shape
=
(
1
,
3
,
2
,
1
,
1
,
5
)
def
init_attrs
(
self
):
self
.
attrs
=
{
"axes"
:
self
.
axes
,
"inplace"
:
True
}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/setup.py.in
浏览文件 @
dbaaca78
...
...
@@ -181,6 +181,14 @@ else:
command = "patchelf --set-rpath '$ORIGIN/../libs/' ${PADDLE_BINARY_DIR}/python/paddle/fluid/core.so"
if os.system(command) != 0:
raise Exception("patch core.so failed, command: %s" % command)
if '${WITH_FLUID_ONLY}'== 'OFF':
# change rpath of _swig_paddle.so.
if "@APPLE@" == "1":
command = "install_name_tool -id \"@loader_path/../paddle/libs/\" ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
else:
command = "patchelf --set-rpath '$ORIGIN/../paddle/libs/' ${PADDLE_BINARY_DIR}/python/py_paddle/_swig_paddle.so"
if os.system(command) != 0:
raise Exception("patch _swig_paddle.so failed, command: %s" % command)
setup(name='${PACKAGE_NAME}',
version='${PADDLE_VERSION}',
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录