Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
06180779
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
06180779
编写于
9月 06, 2018
作者:
T
tensor-tang
浏览文件
操作
浏览文件
下载
差异文件
Merge remote-tracking branch 'ups/develop' into refine/ut/lac
上级
6b104c90
6de0a18d
变更
63
显示空白变更内容
内联
并排
Showing
63 changed file
with
1364 addition
and
803 deletion
+1364
-803
CMakeLists.txt
CMakeLists.txt
+3
-1
cmake/external/anakin.cmake
cmake/external/anakin.cmake
+6
-13
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
doc/fluid/api/layers.rst
doc/fluid/api/layers.rst
+16
-0
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
+1
-0
paddle/fluid/API.spec
paddle/fluid/API.spec
+1
-0
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
+0
-3
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
+1
-0
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+1
-1
paddle/fluid/framework/ir/graph_viz_pass.cc
paddle/fluid/framework/ir/graph_viz_pass.cc
+42
-17
paddle/fluid/inference/analysis/CMakeLists.txt
paddle/fluid/inference/analysis/CMakeLists.txt
+8
-2
paddle/fluid/inference/analysis/analysis_pass.cc
paddle/fluid/inference/analysis/analysis_pass.cc
+1
-1
paddle/fluid/inference/analysis/analysis_pass.h
paddle/fluid/inference/analysis/analysis_pass.h
+8
-36
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+3
-3
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+2
-1
paddle/fluid/inference/analysis/analyzer_tester.cc
paddle/fluid/inference/analysis/analyzer_tester.cc
+84
-80
paddle/fluid/inference/analysis/analyzer_text_classification_tester.cc
...inference/analysis/analyzer_text_classification_tester.cc
+50
-27
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
...fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
+1
-1
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
.../fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
+2
-2
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
+1
-1
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
...fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
+1
-1
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
.../fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
+2
-2
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
+14
-10
paddle/fluid/inference/analysis/model_store_pass.h
paddle/fluid/inference/analysis/model_store_pass.h
+1
-1
paddle/fluid/inference/analysis/pass_manager.cc
paddle/fluid/inference/analysis/pass_manager.cc
+0
-11
paddle/fluid/inference/analysis/pass_manager.h
paddle/fluid/inference/analysis/pass_manager.h
+2
-14
paddle/fluid/inference/analysis/pass_manager_tester.cc
paddle/fluid/inference/analysis/pass_manager_tester.cc
+0
-35
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
...id/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
+1
-1
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h
...uid/inference/analysis/tensorrt_subgraph_node_mark_pass.h
+2
-2
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
+1
-1
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+18
-19
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+0
-1
paddle/fluid/inference/api/api_anakin_engine.cc
paddle/fluid/inference/api/api_anakin_engine.cc
+7
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+41
-0
paddle/fluid/operators/fake_quantize_op.cu
paddle/fluid/operators/fake_quantize_op.cu
+2
-1
paddle/fluid/operators/flatten_op.cc
paddle/fluid/operators/flatten_op.cc
+115
-0
paddle/fluid/operators/fusion_gru_op.cc
paddle/fluid/operators/fusion_gru_op.cc
+8
-10
paddle/fluid/operators/fusion_lstm_op.cc
paddle/fluid/operators/fusion_lstm_op.cc
+240
-280
paddle/fluid/operators/layer_norm_op.cu
paddle/fluid/operators/layer_norm_op.cu
+7
-7
paddle/fluid/operators/reshape_op.cc
paddle/fluid/operators/reshape_op.cc
+100
-0
paddle/fluid/operators/squeeze_op.cc
paddle/fluid/operators/squeeze_op.cc
+119
-7
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+103
-3
paddle/fluid/operators/transpose_op.cu.cc
paddle/fluid/operators/transpose_op.cu.cc
+7
-0
paddle/fluid/operators/unsqueeze_op.cc
paddle/fluid/operators/unsqueeze_op.cc
+117
-6
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+6
-0
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+3
-1
python/paddle/dataset/image.py
python/paddle/dataset/image.py
+2
-4
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+21
-11
python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
...d/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
+46
-4
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+20
-14
python/paddle/fluid/tests/unittests/op_test.py
python/paddle/fluid/tests/unittests/op_test.py
+13
-5
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+10
-1
python/paddle/fluid/tests/unittests/test_flatten_op.py
python/paddle/fluid/tests/unittests/test_flatten_op.py
+6
-3
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
+5
-39
python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
...luid/tests/unittests/test_parallel_executor_fetch_feed.py
+2
-0
python/paddle/fluid/tests/unittests/test_prelu_op.py
python/paddle/fluid/tests/unittests/test_prelu_op.py
+13
-9
python/paddle/fluid/tests/unittests/test_reshape_op.py
python/paddle/fluid/tests/unittests/test_reshape_op.py
+30
-94
python/paddle/fluid/tests/unittests/test_squeeze_op.py
python/paddle/fluid/tests/unittests/test_squeeze_op.py
+6
-3
python/paddle/fluid/tests/unittests/test_transpose_op.py
python/paddle/fluid/tests/unittests/test_transpose_op.py
+7
-4
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
+6
-3
python/paddle/fluid/trainer.py
python/paddle/fluid/trainer.py
+22
-0
python/paddle/fluid/transpiler/details/program_utils.py
python/paddle/fluid/transpiler/details/program_utils.py
+1
-1
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+5
-5
未找到文件。
CMakeLists.txt
浏览文件 @
06180779
...
...
@@ -213,9 +213,11 @@ include(configure) # add paddle env configuration
if
(
WITH_GPU
)
include
(
cuda
)
include
(
tensorrt
)
endif
()
if
(
WITH_MKL OR WITH_MKLML
)
include
(
external/anakin
)
elseif
()
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in
GPU
only now."
FORCE
)
set
(
WITH_ANAKIN OFF CACHE STRING
"Anakin is used in
MKL
only now."
FORCE
)
endif
()
include
(
generic
)
# simplify cmake module
...
...
cmake/external/anakin.cmake
浏览文件 @
06180779
...
...
@@ -16,16 +16,6 @@ set(ANAKIN_LIBRARY ${ANAKIN_INSTALL_DIR})
set
(
ANAKIN_SHARED_LIB
${
ANAKIN_LIBRARY
}
/libanakin.so
)
set
(
ANAKIN_SABER_LIB
${
ANAKIN_LIBRARY
}
/libanakin_saber_common.so
)
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
ANAKIN_MODLE_URL
"
${
INFERENCE_URL
}
/mobilenet_v2.anakin.bin"
)
set
(
ANAKIN_RNN_MODLE_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn.anakin2.model.bin"
)
set
(
ANAKIN_RNN_DATA_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn_data.txt"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
ANAKIN_SOURCE_DIR
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_DATA_URL
}
-N"
)
include_directories
(
${
ANAKIN_INCLUDE
}
)
include_directories
(
${
ANAKIN_INCLUDE
}
/saber/
)
include_directories
(
${
ANAKIN_INCLUDE
}
/saber/core/
)
...
...
@@ -48,6 +38,11 @@ set(ANAKIN_COMPILE_EXTRA_FLAGS
-Wno-reorder
-Wno-error=cpp
)
if
(
WITH_GPU
)
set
(
CMAKE_ARGS_PREFIX -DUSE_GPU_PLACE=YES -DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DCUDNN_INCLUDE_DIR=
${
CUDNN_INCLUDE_DIR
}
)
else
()
set
(
CMAKE_ARGS_PREFIX -DUSE_GPU_PLACE=NO
)
endif
()
ExternalProject_Add
(
extern_anakin
${
EXTERNAL_PROJECT_LOG_ARGS
}
...
...
@@ -56,13 +51,11 @@ ExternalProject_Add(
GIT_TAG
"9424277cf9ae180a14aff09560d3cd60a49c76d2"
PREFIX
${
ANAKIN_SOURCE_DIR
}
UPDATE_COMMAND
""
CMAKE_ARGS
-DUSE_GPU_PLACE=YES
CMAKE_ARGS
${
CMAKE_ARGS_PREFIX
}
-DUSE_X86_PLACE=YES
-DBUILD_WITH_UNIT_TEST=NO
-DPROTOBUF_ROOT=
${
THIRD_PARTY_PATH
}
/install/protobuf
-DMKLML_ROOT=
${
THIRD_PARTY_PATH
}
/install/mklml
-DCUDNN_ROOT=
${
CUDNN_ROOT
}
-DCUDNN_INCLUDE_DIR=
${
CUDNN_INCLUDE_DIR
}
-DENABLE_OP_TIMER=
${
ANAKIN_ENABLE_OP_TIMER
}
${
EXTERNAL_OPTIONAL_ARGS
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
ANAKIN_INSTALL_DIR
}
...
...
cmake/inference_lib.cmake
浏览文件 @
06180779
...
...
@@ -145,7 +145,7 @@ copy(memory_lib
set
(
inference_deps paddle_fluid_shared paddle_fluid
)
set
(
module
"inference/api"
)
if
(
WITH_ANAKIN AND WITH_
GPU
)
if
(
WITH_ANAKIN AND WITH_
MKL
)
copy
(
anakin_inference_lib DEPS paddle_inference_api inference_anakin_api
SRCS
${
PADDLE_BINARY_DIR
}
/paddle/fluid/inference/api/libinference_anakin_api*
# compiled anakin api
...
...
doc/fluid/api/layers.rst
浏览文件 @
06180779
...
...
@@ -822,6 +822,14 @@ pad
.. autofunction:: paddle.fluid.layers.pad
:noindex:
.. _api_fluid_layers_pad_constant_like:
pad_constant_like
---
.. autofunction:: paddle.fluid.layers.pad_constant_like
:noindex:
.. _api_fluid_layers_label_smooth:
label_smooth
...
...
@@ -1145,6 +1153,14 @@ sigmoid
.. autofunction:: paddle.fluid.layers.sigmoid
:noindex:
.. _api_fluid_layers_hsigmoid:
hsigmoid
-------
.. autofunction:: paddle.fluid.layers.hsigmoid
:noindex:
.. _api_fluid_layers_logsigmoid:
logsigmoid
...
...
doc/fluid/new_docs/user_guides/howto/debug/visualdl.md
浏览文件 @
06180779
...
...
@@ -104,6 +104,7 @@ visualDL --logdir=scratch_log --port=8080
# 访问 http://127.0.0.1:8080
```
如果出现`TypeError: __init__() got an unexpected keyword argument 'file'`, 是因为protobuf不是3.5以上,运行`pip install --upgrade protobuf`就能解决。
如果在虚拟环境下仍然遇到安装问题,请尝试以下方法。
...
...
paddle/fluid/API.spec
浏览文件 @
06180779
...
...
@@ -43,6 +43,7 @@ paddle.fluid.Executor.run ArgSpec(args=['self', 'program', 'feed', 'fetch_list',
paddle.fluid.global_scope ArgSpec(args=[], varargs=None, keywords=None, defaults=None)
paddle.fluid.scope_guard ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.Trainer.__init__ ArgSpec(args=['self', 'train_func', 'optimizer_func', 'param_path', 'place', 'parallel', 'checkpoint_config'], varargs=None, keywords=None, defaults=(None, None, False, None))
paddle.fluid.Trainer.save_inference_model ArgSpec(args=['self', 'param_path', 'feeded_var_names', 'target_var_indexes'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.save_params ArgSpec(args=['self', 'param_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None)
paddle.fluid.Trainer.test ArgSpec(args=['self', 'reader', 'feed_order'], varargs=None, keywords=None, defaults=None)
...
...
paddle/fluid/framework/ir/attention_lstm_fuse_pass.cc
浏览文件 @
06180779
...
...
@@ -13,13 +13,10 @@
// limitations under the License.
#include "paddle/fluid/framework/ir/attention_lstm_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/framework/ir/graph_viz_pass.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/inference/api/helper.h"
namespace
paddle
{
namespace
framework
{
...
...
paddle/fluid/framework/ir/fc_lstm_fuse_pass.cc
浏览文件 @
06180779
...
...
@@ -11,6 +11,7 @@
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/fc_lstm_fuse_pass.h"
#include <string>
#include "paddle/fluid/framework/lod_tensor.h"
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
06180779
...
...
@@ -85,7 +85,7 @@ void GraphPatternDetector::operator()(Graph* graph,
LOG
(
INFO
)
<<
"detect "
<<
subgraphs
.
size
()
<<
" subgraph matches the pattern"
;
int
id
=
0
;
for
(
auto
&
g
:
subgraphs
)
{
LOG
(
INFO
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
VLOG
(
3
)
<<
"optimizing #"
<<
id
++
<<
" subgraph"
;
handler
(
g
,
graph
);
}
}
...
...
paddle/fluid/framework/ir/graph_viz_pass.cc
浏览文件 @
06180779
...
...
@@ -50,20 +50,37 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
Dot
dot
;
std
::
vector
<
Dot
::
Attr
>
op_attrs
({
Dot
::
Attr
(
"style"
,
"filled"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"red"
)});
std
::
vector
<
Dot
::
Attr
>
var_attrs
({
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
// Dot::Attr("shape", "diamond"),
const
std
::
vector
<
Dot
::
Attr
>
op_attrs
({
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"color"
,
"#303A3A"
),
//
Dot
::
Attr
(
"fontcolor"
,
"#ffffff"
),
//
Dot
::
Attr
(
"width"
,
"1.3"
),
//
Dot
::
Attr
(
"height"
,
"0.84"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
arg_attrs
({
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
Dot
::
Attr
(
"fillcolor"
,
"#999999"
),
//
Dot
::
Attr
(
"color"
,
"#dddddd"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
param_attrs
({
Dot
::
Attr
(
"shape"
,
"box"
),
//
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
//
Dot
::
Attr
(
"fontname"
,
"Arial"
),
//
Dot
::
Attr
(
"color"
,
"#148b97"
),
//
Dot
::
Attr
(
"fontcolor"
,
"#ffffff"
),
//
});
const
std
::
vector
<
Dot
::
Attr
>
marked_op_attrs
(
{
Dot
::
Attr
(
"style"
,
"rounded,filled,bold"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
const
std
::
vector
<
Dot
::
Attr
>
marked_var_attrs
(
{
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"yellow"
)});
std
::
vector
<
Dot
::
Attr
>
marked_op_attrs
({
Dot
::
Attr
(
"style"
,
"filled"
),
Dot
::
Attr
(
"shape"
,
"box"
),
Dot
::
Attr
(
"fillcolor"
,
"lightgray"
)});
std
::
vector
<
Dot
::
Attr
>
marked_var_attrs
(
{
Dot
::
Attr
(
"style"
,
"filled,rounded"
),
// Dot::Attr("shape", "diamond"),
Dot
::
Attr
(
"fillcolor"
,
"lightgray"
)});
auto
marked_nodes
=
ConsumeMarkedNodes
(
graph
.
get
());
// Create nodes
...
...
@@ -74,9 +91,17 @@ std::unique_ptr<ir::Graph> GraphVizPass::ApplyImpl(
marked_nodes
.
count
(
n
)
?
marked_op_attrs
:
op_attrs
;
dot
.
AddNode
(
node_id
,
attr
,
node_id
);
}
else
if
(
n
->
IsVar
())
{
decltype
(
op_attrs
)
attr
=
marked_nodes
.
count
(
n
)
?
marked_var_attrs
:
var_attrs
;
dot
.
AddNode
(
node_id
,
attr
,
node_id
);
decltype
(
op_attrs
)
*
attr
;
if
(
marked_nodes
.
count
(
n
))
{
attr
=
&
marked_var_attrs
;
}
else
if
(
const_cast
<
Node
*>
(
n
)
->
Var
()
&&
const_cast
<
Node
*>
(
n
)
->
Var
()
->
Persistable
())
{
attr
=
&
param_attrs
;
}
else
{
attr
=
&
arg_attrs
;
}
dot
.
AddNode
(
node_id
,
*
attr
,
node_id
);
}
node2dot
[
n
]
=
node_id
;
}
...
...
paddle/fluid/inference/analysis/CMakeLists.txt
浏览文件 @
06180779
...
...
@@ -6,6 +6,7 @@ cc_library(analysis SRCS pass_manager.cc node.cc data_flow_graph.cc graph_traits
analyzer.cc
helper.cc
# passes
analysis_pass.cc
fluid_to_data_flow_graph_pass.cc
data_flow_graph_to_fluid_pass.cc
dfg_graphviz_draw_pass.cc
...
...
@@ -99,12 +100,17 @@ inference_analysis_test(test_analyzer_lac SRCS analyzer_lac_tester.cc
set
(
TEXT_CLASSIFICATION_MODEL_URL
"http://paddle-inference-dist.bj.bcebos.com/text-classification-Senta.tar.gz"
)
set
(
TEXT_CLASSIFICATION_DATA_URL
"http://paddle-inference-dist.bj.bcebos.com/text_classification_data.txt.tar.gz"
)
set
(
TEXT_CLASSIFICATION_INSTALL_DIR
"
${
THIRD_PARTY_PATH
}
/inference_demo/text_classification"
CACHE PATH
"Text Classification model and data root."
FORCE
)
if
(
NOT EXISTS
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
AND WITH_TESTING AND WITH_INFERENCE
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_MODEL_URL
}
"text-classification-Senta.tar.gz"
)
inference_download_and_uncompress
(
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
${
TEXT_CLASSIFICATION_DATA_URL
}
"text_classification_data.txt.tar.gz"
)
endif
()
inference_analysis_test
(
test_text_classification SRCS
test_text_classification
.cc
inference_analysis_test
(
test_text_classification SRCS
analyzer_text_classification_tester
.cc
EXTRA_DEPS paddle_inference_api paddle_fluid_api analysis_predictor
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
)
ARGS --infer_model=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/text-classification-Senta
--infer_data=
${
TEXT_CLASSIFICATION_INSTALL_DIR
}
/data.txt
--topn=1
# Just run top 1 batch.
)
paddle/fluid/inference/analysis/pass.cc
→
paddle/fluid/inference/analysis/
analysis_
pass.cc
浏览文件 @
06180779
...
...
@@ -12,4 +12,4 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/
analysis_
pass.h"
paddle/fluid/inference/analysis/pass.h
→
paddle/fluid/inference/analysis/
analysis_
pass.h
浏览文件 @
06180779
...
...
@@ -28,10 +28,10 @@ namespace paddle {
namespace
inference
{
namespace
analysis
{
class
Pass
{
class
Analysis
Pass
{
public:
Pass
()
=
default
;
virtual
~
Pass
()
=
default
;
Analysis
Pass
()
=
default
;
virtual
~
Analysis
Pass
()
=
default
;
// Mutable Pass.
virtual
bool
Initialize
(
Argument
*
argument
)
{
return
false
;
}
// Readonly Pass.
...
...
@@ -42,23 +42,16 @@ class Pass {
virtual
bool
Finalize
()
{
return
false
;
}
// Get a Pass appropriate to print the Node this pass operates on.
virtual
Pass
*
CreatePrinterPass
(
std
::
ostream
&
os
,
virtual
Analysis
Pass
*
CreatePrinterPass
(
std
::
ostream
&
os
,
const
std
::
string
&
banner
)
const
{
return
nullptr
;
}
// Create a debugger Pass that draw the DFG by graphviz toolkit.
virtual
Pass
*
CreateGraphvizDebugerPass
()
const
{
return
nullptr
;
}
virtual
Analysis
Pass
*
CreateGraphvizDebugerPass
()
const
{
return
nullptr
;
}
virtual
void
Run
()
{
LOG
(
FATAL
)
<<
"not valid"
;
}
// Run on a single Node.
virtual
void
Run
(
Node
*
x
)
{
LOG
(
FATAL
)
<<
"not valid"
;
}
// Run on a single Function.
virtual
void
Run
(
Function
*
x
)
{
LOG
(
FATAL
)
<<
"not valid"
;
}
// Run on a single FunctionBlock.
virtual
void
Run
(
FunctionBlock
*
x
)
{
LOG
(
FATAL
)
<<
"not valid"
;
}
// Run on a single DataFlowGraph.
virtual
void
Run
(
DataFlowGraph
*
x
)
{
LOG
(
FATAL
)
<<
"not valid"
;
}
virtual
void
Run
(
DataFlowGraph
*
x
)
=
0
;
// Human-readable short representation.
virtual
std
::
string
repr
()
const
=
0
;
...
...
@@ -66,29 +59,8 @@ class Pass {
virtual
std
::
string
description
()
const
{
return
"No DOC"
;
}
};
// NodePass process on any Node types.
class
NodePass
:
public
Pass
{
public:
virtual
void
Run
(
Node
*
node
)
=
0
;
};
// NodePass process on any Function node types.
class
FunctionPass
:
public
Pass
{
public:
virtual
void
Run
(
Function
*
node
)
=
0
;
};
// NodePass process on any FunctionBlock node types.
class
FunctionBlockPass
:
public
Pass
{
public:
virtual
void
Run
(
FunctionBlock
*
node
)
=
0
;
};
// GraphPass processes on any GraphType.
class
DataFlowGraphPass
:
public
Pass
{
public:
virtual
void
Run
(
DataFlowGraph
*
graph
)
=
0
;
};
class
DataFlowGraphPass
:
public
AnalysisPass
{};
}
// namespace analysis
}
// namespace inference
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
06180779
...
...
@@ -15,6 +15,7 @@
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h"
#include "paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h"
#include "paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h"
...
...
@@ -58,7 +59,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
std
::
string
description
()
const
override
{
return
"DFG pass manager."
;
}
private:
void
AddPass
(
const
std
::
string
&
name
,
Pass
*
pass
)
{
void
AddPass
(
const
std
::
string
&
name
,
Analysis
Pass
*
pass
)
{
VLOG
(
3
)
<<
"Adding pass "
<<
name
;
Register
(
name
,
pass
);
AddGraphvizDebugerPass
(
pass
);
...
...
@@ -87,7 +88,7 @@ class DfgPassManagerImpl final : public DfgPassManager {
}
// Add the graphviz debuger pass if the parent pass has one.
void
AddGraphvizDebugerPass
(
Pass
*
pass
)
{
void
AddGraphvizDebugerPass
(
Analysis
Pass
*
pass
)
{
auto
*
debuger_pass
=
pass
->
CreateGraphvizDebugerPass
();
if
(
debuger_pass
)
{
Register
(
debuger_pass
->
repr
(),
debuger_pass
);
...
...
@@ -106,7 +107,6 @@ void Analyzer::Run(Argument* argument) {
}
}
passes
.
push_back
(
"graph_viz_pass"
);
// Ugly support fluid-to-ir-pass
argument
->
Set
(
kFluidToIrPassesAttr
,
new
std
::
vector
<
std
::
string
>
(
passes
));
for
(
auto
&
x
:
data_
)
{
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
06180779
...
...
@@ -38,8 +38,9 @@ limitations under the License. */
#include <gflags/gflags.h>
#include <string>
#include <vector>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/pass_manager.h"
namespace
paddle
{
...
...
paddle/fluid/inference/analysis/analyzer_tester.cc
浏览文件 @
06180779
...
...
@@ -16,6 +16,7 @@
#include <google/protobuf/text_format.h>
#include <gtest/gtest.h>
#include <thread> // NOLINT
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
...
...
@@ -24,12 +25,12 @@
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/platform/profiler.h"
DEFINE_string
(
infer_ditu_rnn_model
,
""
,
"model path for ditu RNN"
);
DEFINE_string
(
infer_ditu_rnn_data
,
""
,
"data path for ditu RNN"
);
DEFINE_int32
(
batch_size
,
10
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"Running the inference program repeat times."
);
DEFINE_int32
(
num_threads
,
1
,
"Running the inference program in multi-threads."
);
namespace
paddle
{
namespace
inference
{
...
...
@@ -220,39 +221,6 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
switch
(
tensor
.
dtype
)
{
case
PaddleDType
::
FLOAT32
:
os
<<
"float32"
;
break
;
case
PaddleDType
::
INT64
:
os
<<
"int64"
;
break
;
default:
os
<<
"unset"
;
}
os
<<
'\n'
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
)
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
)
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
os
<<
'\n'
;
return
os
.
str
();
}
}
// namespace
const
float
ditu_rnn_target_data
[]
=
{
...
...
@@ -266,11 +234,29 @@ const float ditu_rnn_target_data[] = {
10.7286
,
12.0595
,
10.6672
,
0
,
0
,
0
,
0
,
0
,
93.5771
,
3.84641
,
0
,
0
,
0
,
0
,
0
,
0
,
169.426
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
void
CompareResult
(
const
std
::
vector
<
PaddleTensor
>
&
outputs
,
const
std
::
vector
<
PaddleTensor
>
&
base_outputs
)
{
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
EXPECT_NEAR
(
data
[
i
],
base_data
[
i
],
1e-3
);
}
}
}
// Test with a really complicate model.
void
TestDituRNNPrediction
(
const
std
::
string
&
model_path
,
const
std
::
string
&
data_path
,
int
batch_size
,
bool
use_analysis
,
bool
activate_ir
,
int
num_times
=
1
)
{
void
TestDituRNNPrediction
(
bool
use_analysis
,
bool
activate_ir
,
int
num_threads
)
{
AnalysisConfig
config
;
config
.
prog_file
=
FLAGS_infer_ditu_rnn_model
+
"/__model__"
;
config
.
param_file
=
FLAGS_infer_ditu_rnn_model
+
"/param"
;
...
...
@@ -281,6 +267,8 @@ void TestDituRNNPrediction(const std::string &model_path,
PADDLE_ENFORCE
(
config
.
ir_mode
==
AnalysisConfig
::
IrPassMode
::
kExclude
);
// default
config
.
ir_passes
.
clear
();
// Do not exclude any pass.
int
batch_size
=
FLAGS_batch_size
;
int
num_times
=
FLAGS_repeat
;
auto
base_predictor
=
CreatePaddlePredictor
<
NativeConfig
,
PaddleEngineKind
::
kNative
>
(
config
);
...
...
@@ -288,40 +276,55 @@ void TestDituRNNPrediction(const std::string &model_path,
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
data_path
,
batch_size
);
DataRecord
data
(
FLAGS_infer_ditu_rnn_data
,
batch_size
);
// Prepare inputs.
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
,
base_outputs
;
base_predictor
->
Run
(
input_slots
,
&
base_outputs
);
LOG
(
INFO
)
<<
"===========profile result==========="
;
if
(
num_threads
==
1
)
{
// Prepare inputs.
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictor
->
Run
(
input_slots
,
&
outputs
);
}
LOG
(
INFO
)
<<
"===========profile result==========="
;
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
num_times
<<
", latency: "
<<
timer
.
toc
()
/
num_times
<<
"ms"
;
LOG
(
INFO
)
<<
"====================================="
;
PADDLE_ENFORCE_GT
(
outputs
.
size
(),
0
);
PADDLE_ENFORCE_EQ
(
outputs
.
size
(),
base_outputs
.
size
());
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
i
++
)
{
auto
&
out
=
outputs
[
i
];
auto
&
base_out
=
base_outputs
[
i
];
size_t
size
=
std
::
accumulate
(
out
.
shape
.
begin
(),
out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
size_t
size1
=
std
::
accumulate
(
base_out
.
shape
.
begin
(),
base_out
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
PADDLE_ENFORCE_EQ
(
size
,
size1
);
PADDLE_ENFORCE_GT
(
size
,
0
);
float
*
data
=
static_cast
<
float
*>
(
out
.
data
.
data
());
float
*
base_data
=
static_cast
<
float
*>
(
base_out
.
data
.
data
());
for
(
size_t
j
=
0
;
j
<
size
;
j
++
)
{
EXPECT_NEAR
(
data
[
j
],
base_data
[
j
],
1e-3
);
PrintTime
(
batch_size
,
num_times
,
1
,
0
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
}
else
{
std
::
vector
<
std
::
thread
>
threads
;
std
::
vector
<
std
::
unique_ptr
<
PaddlePredictor
>>
predictors
;
// TODO(yanchunwei): Bug here, the analyzer phase can't be parallelled
// because AttentionLSTM's hard code nodeid will be damanged.
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
predictors
.
emplace_back
(
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
));
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
// Each thread should have local input_slots and outputs.
std
::
vector
<
PaddleTensor
>
input_slots
;
DataRecord
data
(
FLAGS_infer_ditu_rnn_data
,
batch_size
);
PrepareInputs
(
&
input_slots
,
&
data
,
batch_size
);
std
::
vector
<
PaddleTensor
>
outputs
;
Timer
timer
;
timer
.
tic
();
for
(
int
i
=
0
;
i
<
num_times
;
i
++
)
{
predictors
[
tid
]
->
Run
(
input_slots
,
&
outputs
);
}
PrintTime
(
batch_size
,
num_times
,
num_threads
,
tid
,
timer
.
toc
()
/
num_times
);
CompareResult
(
outputs
,
base_outputs
);
});
}
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
[
i
].
join
();
}
}
LOG
(
INFO
)
<<
"====================================="
;
if
(
use_analysis
&&
activate_ir
)
{
AnalysisPredictor
*
analysis_predictor
=
...
...
@@ -350,25 +353,26 @@ void TestDituRNNPrediction(const std::string &model_path,
}
}
// Directly infer with the original model.
TEST
(
Analyzer
,
DituRNN_without_analysis
)
{
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
false
,
false
,
FLAGS_repeat
);
// Inference with analysis and IR, easy for profiling independently.
TEST
(
Analyzer
,
DituRNN
)
{
TestDituRNNPrediction
(
true
,
true
,
FLAGS_num_threads
);
}
// Inference with the original model with the analysis turned on, the analysis
// module will transform the program to a data flow graph.
TEST
(
Analyzer
,
DituRNN_with_analysis
)
{
LOG
(
INFO
)
<<
"ditu rnn with analysis"
;
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
true
,
false
,
FLAGS_repeat
);
}
// Inference with analysis and IR. The IR module will fuse some large kernels.
TEST
(
Analyzer
,
DituRNN_with_analysis_with_IR
)
{
LOG
(
INFO
)
<<
"ditu rnn with analysis and IR fuse"
;
TestDituRNNPrediction
(
FLAGS_infer_ditu_rnn_model
,
FLAGS_infer_ditu_rnn_data
,
FLAGS_batch_size
,
true
,
true
,
FLAGS_repeat
);
// Other unit-tests of DituRNN, test different options of use_analysis,
// activate_ir and multi-threads.
TEST
(
Analyzer
,
DituRNN_tests
)
{
int
num_threads
[
2
]
=
{
1
,
4
};
for
(
auto
i
:
num_threads
)
{
// Directly infer with the original model.
TestDituRNNPrediction
(
false
,
false
,
i
);
// Inference with the original model with the analysis turned on, the
// analysis
// module will transform the program to a data flow graph.
TestDituRNNPrediction
(
true
,
false
,
i
);
// Inference with analysis and IR. The IR module will fuse some large
// kernels.
TestDituRNNPrediction
(
true
,
true
,
i
);
}
}
}
// namespace analysis
...
...
paddle/fluid/inference/analysis/
test_text_classification
.cc
→
paddle/fluid/inference/analysis/
analyzer_text_classification_tester
.cc
浏览文件 @
06180779
...
...
@@ -12,19 +12,23 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/analysis/analyzer.h"
#include <gflags/gflags.h>
#include <glog/logging.h> // use glog instead of PADDLE_ENFORCE to avoid importing other paddle header files.
#include <gtest/gtest.h>
#include <fstream>
#include "paddle/fluid/framework/ir/pass.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/ut_helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_pass.h"
#include "paddle/fluid/inference/api/timer.h"
DEFINE_string
(
infer_model
,
""
,
"Directory of the inference model."
);
DEFINE_string
(
infer_data
,
""
,
"Path of the dataset."
);
DEFINE_int32
(
batch_size
,
1
,
"batch size."
);
DEFINE_int32
(
repeat
,
1
,
"How many times to repeat run."
);
DEFINE_int32
(
topn
,
-
1
,
"Run top n batches of data to save time"
);
namespace
paddle
{
...
...
@@ -44,41 +48,67 @@ void PrintTime(const double latency, const int bs, const int repeat) {
LOG
(
INFO
)
<<
"====================================="
;
}
void
Main
(
int
batch_size
)
{
// Three sequence inputs.
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
int64_t
data0
[]
=
{
0
,
1
,
2
};
for
(
auto
&
input
:
input_slots
)
{
input
.
data
.
Reset
(
data0
,
sizeof
(
data0
));
input
.
shape
=
std
::
vector
<
int
>
({
3
,
1
});
// dtype --
input
.
dtype
=
PaddleDType
::
INT64
;
// LoD --
input
.
lod
=
std
::
vector
<
std
::
vector
<
size_t
>>
({{
0
,
3
}});
struct
DataReader
{
DataReader
(
const
std
::
string
&
path
)
:
file
(
new
std
::
ifstream
(
path
))
{}
bool
NextBatch
(
PaddleTensor
*
tensor
,
int
batch_size
)
{
PADDLE_ENFORCE_EQ
(
batch_size
,
1
);
std
::
string
line
;
tensor
->
lod
.
clear
();
tensor
->
lod
.
emplace_back
(
std
::
vector
<
size_t
>
({
0
}));
std
::
vector
<
int64_t
>
data
;
for
(
int
i
=
0
;
i
<
batch_size
;
i
++
)
{
if
(
!
std
::
getline
(
*
file
,
line
))
return
false
;
inference
::
split_to_int64
(
line
,
' '
,
&
data
);
}
tensor
->
lod
.
front
().
push_back
(
data
.
size
());
tensor
->
data
.
Resize
(
data
.
size
()
*
sizeof
(
int64_t
));
memcpy
(
tensor
->
data
.
data
(),
data
.
data
(),
data
.
size
()
*
sizeof
(
int64_t
));
tensor
->
shape
.
clear
();
tensor
->
shape
.
push_back
(
data
.
size
());
tensor
->
shape
.
push_back
(
1
);
return
true
;
}
std
::
unique_ptr
<
std
::
ifstream
>
file
;
};
void
Main
(
int
batch_size
)
{
// shape --
// Create Predictor --
AnalysisConfig
config
;
config
.
model_dir
=
FLAGS_infer_model
;
config
.
use_gpu
=
false
;
config
.
enable_ir_optim
=
true
;
config
.
ir_passes
.
push_back
(
"fc_lstm_fuse_pass"
);
auto
predictor
=
CreatePaddlePredictor
<
AnalysisConfig
,
PaddleEngineKind
::
kAnalysis
>
(
config
);
std
::
vector
<
PaddleTensor
>
input_slots
(
1
);
// one batch starts
// data --
auto
&
input
=
input_slots
[
0
];
input
.
dtype
=
PaddleDType
::
INT64
;
inference
::
Timer
timer
;
double
sum
=
0
;
std
::
vector
<
PaddleTensor
>
output_slots
;
for
(
int
i
=
0
;
i
<
FLAGS_repeat
;
i
++
)
{
int
num_batches
=
0
;
for
(
int
t
=
0
;
t
<
FLAGS_repeat
;
t
++
)
{
DataReader
reader
(
FLAGS_infer_data
);
while
(
reader
.
NextBatch
(
&
input
,
FLAGS_batch_size
))
{
if
(
FLAGS_topn
>
0
&&
num_batches
>
FLAGS_topn
)
break
;
timer
.
tic
();
CHECK
(
predictor
->
Run
(
input_slots
,
&
output_slots
));
sum
+=
timer
.
toc
();
++
num_batches
;
}
}
PrintTime
(
sum
,
batch_size
,
FLAGS_repeat
);
PrintTime
(
sum
,
batch_size
,
num_batches
);
// Get output
LOG
(
INFO
)
<<
"get outputs "
<<
output_slots
.
size
();
...
...
@@ -100,10 +130,3 @@ void Main(int batch_size) {
TEST
(
text_classification
,
basic
)
{
Main
(
FLAGS_batch_size
);
}
}
// namespace paddle
USE_PASS
(
fc_fuse_pass
);
USE_PASS
(
seq_concat_fc_fuse_pass
);
USE_PASS
(
fc_lstm_fuse_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
infer_clean_graph_pass
);
USE_PASS
(
attention_lstm_fuse_pass
);
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.cc
浏览文件 @
06180779
...
...
@@ -263,7 +263,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
};
}
// namespace
Pass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
Analysis
Pass
*
DataFlowGraphToFluidPass
::
CreateGraphvizDebugerPass
()
const
{
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
FLAGS_IA_graphviz_log_root
,
"data_flow_graph_to_fluid_graphviz_debugger"
));
...
...
paddle/fluid/inference/analysis/data_flow_graph_to_fluid_pass.h
浏览文件 @
06180779
...
...
@@ -21,8 +21,8 @@
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -42,7 +42,7 @@ class DataFlowGraphToFluidPass final : public DataFlowGraphPass {
return
"Transform a DFG to a Fluid ProgramDesc"
;
}
Pass
*
CreateGraphvizDebugerPass
()
const
override
;
Analysis
Pass
*
CreateGraphvizDebugerPass
()
const
override
;
protected:
// Add a Fluid Op into the ProgramDesc.
...
...
paddle/fluid/inference/analysis/dfg_graphviz_draw_pass.h
浏览文件 @
06180779
...
...
@@ -21,8 +21,8 @@ limitations under the License. */
#include <fstream>
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/dot.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace
paddle
{
namespace
inference
{
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.cc
浏览文件 @
06180779
...
...
@@ -66,7 +66,7 @@ class DFG_DebuggerPass : public DFG_GraphvizDrawPass {
};
}
Pass
*
FluidToDataFlowGraphPass
::
CreateGraphvizDebugerPass
()
const
{
Analysis
Pass
*
FluidToDataFlowGraphPass
::
CreateGraphvizDebugerPass
()
const
{
return
new
DFG_DebuggerPass
(
DFG_GraphvizDrawPass
::
Config
(
FLAGS_IA_graphviz_log_root
,
"fluid-to-dfg-debuger"
));
}
...
...
paddle/fluid/inference/analysis/fluid_to_data_flow_graph_pass.h
浏览文件 @
06180779
...
...
@@ -22,8 +22,8 @@
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/data_flow_graph.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -46,7 +46,7 @@ class FluidToDataFlowGraphPass final : public DataFlowGraphPass {
return
"transform a fluid ProgramDesc to a data flow graph."
;
}
Pass
*
CreateGraphvizDebugerPass
()
const
override
;
Analysis
Pass
*
CreateGraphvizDebugerPass
()
const
override
;
private:
framework
::
proto
::
ProgramDesc
const
*
desc_
;
...
...
paddle/fluid/inference/analysis/fluid_to_ir_pass.h
浏览文件 @
06180779
...
...
@@ -14,15 +14,17 @@
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/flags.h"
#include "paddle/fluid/inference/analysis/ir_pass_manager.h"
#include "paddle/fluid/inference/analysis/pass.h"
namespace
paddle
{
namespace
inference
{
namespace
analysis
{
using
namespace
framework
;
static
const
char
kFluidToIrPassesAttr
[]
=
"__fluid_to_ir_passes__"
;
...
...
@@ -48,7 +50,8 @@ class FluidToIrPass final : public DataFlowGraphPass {
ANALYSIS_ARGUMENT_CHECK_FIELD
(
argument
->
fluid_model_program_path
);
// Load program.
auto
program
=
LoadProgramDesc
(
*
argument
->
fluid_model_program_path
);
argument
->
origin_program_desc
.
reset
(
new
proto
::
ProgramDesc
(
program
));
argument
->
origin_program_desc
.
reset
(
new
framework
::
proto
::
ProgramDesc
(
program
));
// Create main data flow graph.
if
(
!
argument
->
main_dfg
)
{
argument
->
main_dfg
.
reset
(
new
DataFlowGraph
);
...
...
@@ -78,12 +81,13 @@ class FluidToIrPass final : public DataFlowGraphPass {
IRPassManager
ir_passes
(
argument_
->
Get
<
ProgramDesc
>
(
"ir_program_desc"
),
nullptr
);
// Pass the scope from analysis to IR if needed.
if
(
argument_
->
Has
(
ir
::
kParamScopeAttr
))
{
if
(
argument_
->
Has
(
framework
::
ir
::
kParamScopeAttr
))
{
// Here the address is passed, attention that IR doesn't own the scope, so
// the real scope in analysis should live during the IR phase.
ir_passes
.
graph
().
Set
(
ir
::
kParamScopeAttr
,
new
Scope
*
(
&
argument_
->
Get
<
Scope
>
(
ir
::
kParamScopeAttr
)));
framework
::
ir
::
kParamScopeAttr
,
new
framework
::
Scope
*
(
&
argument_
->
Get
<
framework
::
Scope
>
(
framework
::
ir
::
kParamScopeAttr
)));
}
if
(
FLAGS_IA_enable_ir
)
{
...
...
@@ -95,12 +99,12 @@ class FluidToIrPass final : public DataFlowGraphPass {
PADDLE_ENFORCE
(
argument_
->
main_dfg
.
get
());
argument_
->
main_dfg
->
Build
(
ir_passes
.
graph
());
// inherit the arguments from ir.
if
(
ir_passes
.
graph
().
Has
(
ir
::
kFuseStatisAttr
))
{
if
(
ir_passes
.
graph
().
Has
(
framework
::
ir
::
kFuseStatisAttr
))
{
argument_
->
Set
(
ir
::
kFuseStatisAttr
,
framework
::
ir
::
kFuseStatisAttr
,
new
std
::
unordered_map
<
std
::
string
,
int
>
(
ir_passes
.
graph
().
Get
<
std
::
unordered_map
<
std
::
string
,
int
>>
(
ir
::
kFuseStatisAttr
)));
framework
::
ir
::
kFuseStatisAttr
)));
}
}
...
...
@@ -112,7 +116,7 @@ class FluidToIrPass final : public DataFlowGraphPass {
private:
// Load parameters from a single file or from a directory.
bool
LoadParams
(
Scope
*
scope
,
const
std
::
string
&
dir
,
bool
LoadParams
(
framework
::
Scope
*
scope
,
const
std
::
string
&
dir
,
const
std
::
string
&
prog_file
,
const
std
::
string
&
param_file
);
private:
...
...
paddle/fluid/inference/analysis/model_store_pass.h
浏览文件 @
06180779
...
...
@@ -19,7 +19,7 @@
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/
analysis_
pass.h"
namespace
paddle
{
namespace
inference
{
...
...
paddle/fluid/inference/analysis/pass_manager.cc
浏览文件 @
06180779
...
...
@@ -40,17 +40,6 @@ void DfgPassManager::RunAll() {
}
}
void
NodePassManager
::
RunAll
()
{
PADDLE_ENFORCE
(
argument_
);
PADDLE_ENFORCE
(
argument_
->
main_dfg
.
get
());
auto
trait
=
GraphTraits
<
DataFlowGraph
>
(
*
argument_
->
main_dfg
).
nodes_in_DFS
();
for
(
auto
&
node
:
trait
)
{
for
(
auto
&
pass
:
data_
)
{
pass
->
Run
(
&
node
);
}
}
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/pass_manager.h
浏览文件 @
06180779
...
...
@@ -33,7 +33,7 @@ limitations under the License. */
#include <string>
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/
analysis_
pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -43,7 +43,7 @@ namespace analysis {
* PassManager is the base class for all pass managers, a pass manager has
* several Pass-es registered, and execute them in the linear order.
*/
class
PassManager
:
public
OrderedRegistry
<
Pass
>
{
class
PassManager
:
public
OrderedRegistry
<
Analysis
Pass
>
{
public:
PassManager
()
=
default
;
// Call all the passes' Initialize methods. The desc and data_flow_graph are
...
...
@@ -89,18 +89,6 @@ class DfgPassManager : public PassManager {
virtual
~
DfgPassManager
()
=
default
;
};
/*
* A pass manager that process a Node each time.
*/
class
NodePassManager
:
public
PassManager
{
public:
NodePassManager
()
=
default
;
void
RunAll
()
override
;
virtual
~
NodePassManager
()
=
default
;
};
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/pass_manager_tester.cc
浏览文件 @
06180779
...
...
@@ -34,28 +34,6 @@ class TestDfgPassManager final : public DfgPassManager {
std
::
string
description
()
const
override
{
return
"test doc"
;
}
};
class
TestNodePassManager
final
:
public
NodePassManager
{
public:
virtual
~
TestNodePassManager
()
=
default
;
std
::
string
repr
()
const
override
{
return
"test-node-pass-manager"
;
}
std
::
string
description
()
const
override
{
return
"test doc"
;
}
};
class
TestNodePass
final
:
public
NodePass
{
public:
virtual
~
TestNodePass
()
=
default
;
bool
Initialize
(
Argument
*
argument
)
override
{
return
true
;
}
void
Run
(
Node
*
node
)
override
{
LOG
(
INFO
)
<<
"- Processing node "
<<
node
->
repr
();
}
std
::
string
repr
()
const
override
{
return
"test-node"
;
}
std
::
string
description
()
const
override
{
return
"some doc"
;
}
};
TEST
(
PassManager
,
DFG_pass_manager
)
{
TestDfgPassManager
manager
;
DFG_GraphvizDrawPass
::
Config
config
(
"./"
,
"dfg.dot"
);
...
...
@@ -71,19 +49,6 @@ TEST(PassManager, DFG_pass_manager) {
manager
.
RunAll
();
}
TEST
(
PassManager
,
Node_pass_manager
)
{
Argument
argument
(
FLAGS_inference_model_dir
);
// Pre-process: initialize the DFG with the ProgramDesc first.
FluidToDataFlowGraphPass
pass0
;
pass0
.
Initialize
(
&
argument
);
pass0
.
Run
(
argument
.
main_dfg
.
get
());
TestNodePassManager
manager
;
manager
.
Register
(
"test-node-pass"
,
new
TestNodePass
);
ASSERT_TRUE
(
manager
.
Initialize
(
&
argument
));
manager
.
RunAll
();
}
}
// namespace analysis
}
// namespace inference
}
// namespace paddle
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.cc
浏览文件 @
06180779
...
...
@@ -68,7 +68,7 @@ class DfgDebuggerPass : public DFG_GraphvizDrawPass {
}
};
Pass
*
TensorRTSubgraphNodeMarkPass
::
CreateGraphvizDebugerPass
()
const
{
Analysis
Pass
*
TensorRTSubgraphNodeMarkPass
::
CreateGraphvizDebugerPass
()
const
{
DFG_GraphvizDrawPass
::
Config
config
(
FLAGS_IA_graphviz_log_root
,
"tensorrt_marked_node"
);
return
new
DfgDebuggerPass
(
config
);
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_node_mark_pass.h
浏览文件 @
06180779
...
...
@@ -20,7 +20,7 @@
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/
analysis_
pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace
paddle
{
...
...
@@ -48,7 +48,7 @@ class TensorRTSubgraphNodeMarkPass : public DataFlowGraphPass {
return
"tensorrt sub-graph mark pass"
;
}
Pass
*
CreateGraphvizDebugerPass
()
const
override
;
Analysis
Pass
*
CreateGraphvizDebugerPass
()
const
override
;
bool
Finalize
()
override
;
private:
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.h
浏览文件 @
06180779
...
...
@@ -15,8 +15,8 @@ limitations under the License. */
#pragma once
#include <string>
#include "paddle/fluid/inference/analysis/analysis_pass.h"
#include "paddle/fluid/inference/analysis/node.h"
#include "paddle/fluid/inference/analysis/pass.h"
#include "paddle/fluid/inference/analysis/subgraph_splitter.h"
namespace
paddle
{
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
06180779
...
...
@@ -44,20 +44,7 @@ function(inference_api_test TARGET_NAME)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api
analysis
ir_pass_manager
pass
fc_fuse_pass
fc_lstm_fuse_pass
fc_gru_fuse_pass
seq_concat_fc_fuse_pass
graph_viz_pass
infer_clean_graph_pass
graph_pattern_detector
infer_clean_graph_pass
attention_lstm_fuse_pass
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis
)
cc_test
(
test_paddle_inference_api
SRCS api_tester.cc
...
...
@@ -74,7 +61,7 @@ cc_library(paddle_inference_tensorrt_subgraph_engine
inference_api_test
(
test_api_tensorrt_subgraph_engine SRC api_tensorrt_subgraph_engine_tester.cc ARGS test_word2vec
)
endif
()
if
(
WITH_ANAKIN AND WITH_
GPU
)
# only needed in CI
if
(
WITH_ANAKIN AND WITH_
MKL
)
# only needed in CI
# compile the libinference_anakin_api.a and anakin.so.
cc_library
(
inference_anakin_api SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber mklml
)
cc_library
(
inference_anakin_api_shared SHARED SRCS api.cc api_anakin_engine.cc DEPS anakin_shared anakin_saber
)
...
...
@@ -84,12 +71,24 @@ if (WITH_ANAKIN AND WITH_GPU) # only needed in CI
anakin_target
(
inference_anakin_api
)
anakin_target
(
inference_anakin_api_shared
)
if
(
WITH_TESTING
)
# TODO(luotao): ANAKIN_MODLE_URL etc will move to demo ci later.
set
(
INFERENCE_URL
"http://paddle-inference-dist.bj.bcebos.com"
)
set
(
ANAKIN_RNN_MODLE_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn.anakin2.model.bin"
)
set
(
ANAKIN_RNN_DATA_URL
"
${
INFERENCE_URL
}
/anakin_test%2Fditu_rnn_data.txt"
)
execute_process
(
COMMAND bash -c
"mkdir -p
${
ANAKIN_SOURCE_DIR
}
"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_MODLE_URL
}
-N"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_RNN_DATA_URL
}
-N"
)
if
(
WITH_GPU
)
set
(
anakin_test_extra_deps dynload_cuda
)
set
(
ANAKIN_MODLE_URL
"
${
INFERENCE_URL
}
/mobilenet_v2.anakin.bin"
)
execute_process
(
COMMAND bash -c
"cd
${
ANAKIN_SOURCE_DIR
}
; wget -q --no-check-certificate
${
ANAKIN_MODLE_URL
}
-N"
)
cc_test
(
api_anakin_engine_tester SRCS api_anakin_engine_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/mobilenet_v2.anakin.bin
DEPS inference_anakin_api_shared dynload_cuda SERIAL
)
DEPS inference_anakin_api_shared
${
anakin_test_extra_deps
}
SERIAL
)
endif
()
cc_test
(
api_anakin_engine_rnn_tester SRCS api_anakin_engine_rnn_tester.cc
ARGS --model=
${
ANAKIN_SOURCE_DIR
}
/anakin_test%2Fditu_rnn.anakin2.model.bin
--datapath=
${
ANAKIN_SOURCE_DIR
}
/anakin_test%2Fditu_rnn_data.txt
DEPS inference_anakin_api_shared
dynload_cuda
SERIAL
)
DEPS inference_anakin_api_shared
${
anakin_test_extra_deps
}
SERIAL
)
endif
(
WITH_TESTING
)
endif
()
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
06180779
...
...
@@ -48,7 +48,6 @@ bool AnalysisPredictor::Init(
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
PADDLE_ENFORCE
(
!
parent_scope
);
if
(
parent_scope
)
{
scope_
=
parent_scope
;
sub_scope_
=
&
(
parent_scope
->
NewScope
());
...
...
paddle/fluid/inference/api/api_anakin_engine.cc
浏览文件 @
06180779
...
...
@@ -193,7 +193,9 @@ PaddleInferenceAnakinPredictor<Target>::Clone() {
return
std
::
move
(
cls
);
}
#ifdef PADDLE_WITH_CUDA
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
NV
>;
#endif
template
class
PaddleInferenceAnakinPredictor
<
anakin
::
X86
>;
// A factory to help create difference predictor.
...
...
@@ -202,10 +204,15 @@ std::unique_ptr<PaddlePredictor> CreatePaddlePredictor<
AnakinConfig
,
PaddleEngineKind
::
kAnakin
>
(
const
AnakinConfig
&
config
)
{
VLOG
(
3
)
<<
"Anakin Predictor create."
;
if
(
config
.
target_type
==
AnakinConfig
::
NVGPU
)
{
#ifdef PADDLE_WITH_CUDA
VLOG
(
3
)
<<
"Anakin Predictor create on [ NVIDIA GPU ]."
;
std
::
unique_ptr
<
PaddlePredictor
>
x
(
new
PaddleInferenceAnakinPredictor
<
anakin
::
NV
>
(
config
));
return
x
;
#else
LOG
(
ERROR
)
<<
"AnakinConfig::NVGPU could not used in ONLY-CPU environment"
;
return
nullptr
;
#endif
}
else
if
(
config
.
target_type
==
AnakinConfig
::
X86
)
{
VLOG
(
3
)
<<
"Anakin Predictor create on [ Intel X86 ]."
;
std
::
unique_ptr
<
PaddlePredictor
>
x
(
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
06180779
...
...
@@ -14,6 +14,7 @@
#pragma once
#include <glog/logging.h>
#include <sys/time.h>
#include <algorithm>
#include <numeric>
...
...
@@ -88,5 +89,45 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
std
::
string
DescribeTensor
(
const
PaddleTensor
&
tensor
)
{
std
::
stringstream
os
;
os
<<
"Tensor ["
<<
tensor
.
name
<<
"]
\n
"
;
os
<<
" - type: "
;
switch
(
tensor
.
dtype
)
{
case
PaddleDType
::
FLOAT32
:
os
<<
"float32"
;
break
;
case
PaddleDType
::
INT64
:
os
<<
"int64"
;
break
;
default:
os
<<
"unset"
;
}
os
<<
'\n'
;
os
<<
" - shape: "
<<
to_string
(
tensor
.
shape
)
<<
'\n'
;
os
<<
" - lod: "
;
for
(
auto
&
l
:
tensor
.
lod
)
{
os
<<
to_string
(
l
)
<<
"; "
;
}
os
<<
"
\n
"
;
os
<<
" - data: "
;
int
dim
=
std
::
accumulate
(
tensor
.
shape
.
begin
(),
tensor
.
shape
.
end
(),
1
,
[](
int
a
,
int
b
)
{
return
a
*
b
;
});
for
(
int
i
=
0
;
i
<
dim
;
i
++
)
{
os
<<
static_cast
<
float
*>
(
tensor
.
data
.
data
())[
i
]
<<
" "
;
}
os
<<
'\n'
;
return
os
.
str
();
}
void
PrintTime
(
int
batch_size
,
int
repeat
,
int
num_threads
,
int
tid
,
double
latency
)
{
LOG
(
INFO
)
<<
"batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms"
;
}
}
// namespace inference
}
// namespace paddle
paddle/fluid/operators/fake_quantize_op.cu
浏览文件 @
06180779
...
...
@@ -119,7 +119,8 @@ struct FindRangeAbsMaxFunctor<platform::CUDADeviceContext, T> {
const
framework
::
Tensor
&
last_scale
,
const
framework
::
Tensor
&
iter
,
const
int
window_size
,
framework
::
Tensor
*
scales_arr
,
framework
::
Tensor
*
out_scale
)
{
auto
&
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
const
auto
gpu_place
=
boost
::
get
<
platform
::
CUDAPlace
>
(
ctx
.
GetPlace
());
T
*
scale_arr
=
scales_arr
->
mutable_data
<
T
>
(
gpu_place
);
T
*
out_scale_data
=
out_scale
->
mutable_data
<
T
>
(
gpu_place
);
...
...
paddle/fluid/operators/flatten_op.cc
浏览文件 @
06180779
...
...
@@ -157,6 +157,116 @@ class FlattenGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): flatten2 adds an intermediate output(XShape) based on flatten,
// the XShape is used to carry the shape and lod of X which will be used in
// flatten_grad, in this way, the framework can reuse the memory of X
// immediately the flatten2_op is finished.
// Considering compatibility issues, we could not fix flatten2_op
class
Flatten2OpInferShape
:
public
FlattenOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
FlattenOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output (XShape) of Flatten op should not be null."
);
const
auto
&
in_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
in_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
in_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
"XShape"
);
}
};
class
Flatten2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axis
=
Attr
<
int
>
(
"axis"
);
auto
in_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
const
auto
&
out_dims
=
FlattenOpInferShape
::
GetOutputShape
(
axis
,
in_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
out_dims
;
attrs
[
"inplace"
]
=
false
;
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Flatten2OpMaker
:
public
FlattenOpMaker
{
public:
void
Make
()
override
{
FlattenOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp."
)
.
AsIntermediate
();
}
};
class
Flatten2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"flatten2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Flatten2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Flatten2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
attrs
[
"inplace"
]
=
false
;
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -167,3 +277,8 @@ REGISTER_OPERATOR(flatten, ops::FlattenOp, ops::FlattenOpMaker,
ops
::
FlattenOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
flatten_grad
,
ops
::
FlattenGradOp
,
ops
::
FlattenGradInferShape
);
REGISTER_OPERATOR
(
flatten2
,
ops
::
Flatten2Op
,
ops
::
Flatten2OpMaker
,
ops
::
Flatten2OpInferShape
,
ops
::
Flatten2GradOpMaker
);
REGISTER_OPERATOR
(
flatten2_grad
,
ops
::
Flatten2GradOp
,
ops
::
Flatten2GradInferShape
);
paddle/fluid/operators/fusion_gru_op.cc
浏览文件 @
06180779
...
...
@@ -30,14 +30,7 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const {
"Input(WeightX) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"WeightH"
),
"Input(WeightH) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XX"
),
"Output(XX) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedH0"
),
"Output(ReorderedH0) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedInput"
),
"Output(BatchedInput) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedOut"
),
"Output(BatchedOut) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Hidden"
),
"Output(Hidden) of GRU should not be null."
);
...
...
@@ -80,15 +73,20 @@ void FusionGRUOp::InferShape(framework::InferShapeContext* ctx) const {
}
framework
::
DDim
out_dims
({
x_dims
[
0
],
frame_size
});
ctx
->
SetOutputDim
(
"Hidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchedInput"
,
{
x_dims
[
0
],
wx_dims
[
1
]});
ctx
->
SetOutputDim
(
"BatchedOut"
,
out_dims
);
ctx
->
ShareLoD
(
"X"
,
"Hidden"
);
int
xx_width
;
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_seq"
))
{
xx_width
=
wx_dims
[
1
];
}
else
{
xx_width
=
x_dims
[
1
]
>
wx_dims
[
1
]
?
wx_dims
[
1
]
:
x_dims
[
1
];
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedH0"
),
"Output(ReorderedH0) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedInput"
),
"Output(BatchedInput) of GRU should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedOut"
),
"Output(BatchedOut) of GRU should not be null."
);
ctx
->
SetOutputDim
(
"BatchedInput"
,
{
x_dims
[
0
],
wx_dims
[
1
]});
ctx
->
SetOutputDim
(
"BatchedOut"
,
out_dims
);
}
ctx
->
SetOutputDim
(
"XX"
,
{
x_dims
[
0
],
xx_width
});
ctx
->
ShareLoD
(
"X"
,
"XX"
);
...
...
paddle/fluid/operators/fusion_lstm_op.cc
浏览文件 @
06180779
...
...
@@ -38,16 +38,6 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
"Output(Hidden) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Cell"
),
"Output(Cell) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedInput"
),
"Output(BatchedInput) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedHidden"
),
"Output(BatchedHidden) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedCell"
),
"Output(BatchedCell) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedH0"
),
"Output(ReorderedH0) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedC0"
),
"Output(ReorderedC0) of LSTM should not be null."
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
x_dims
.
size
(),
2
,
"Input(X)'s rank must be 2."
);
...
...
@@ -88,9 +78,8 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
PADDLE_ENFORCE_EQ
(
b_dims
.
size
(),
2
,
"The rank of Input(Bias) should be 2."
);
PADDLE_ENFORCE_EQ
(
b_dims
[
0
],
1
,
"The first dimension of Input(Bias) should be 1."
);
auto
use_peepholes
=
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
);
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
(
use_peepholes
?
7
:
4
)
*
frame_size
,
PADDLE_ENFORCE_EQ
(
b_dims
[
1
],
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_peepholes"
)
?
7
:
4
)
*
frame_size
,
"The second dimension of Input(Bias) should be "
"7 * %d if enable peepholes connection or"
"4 * %d if disable peepholes"
,
...
...
@@ -99,17 +88,26 @@ void FusionLSTMOp::InferShape(framework::InferShapeContext* ctx) const {
framework
::
DDim
out_dims
({
x_dims
[
0
],
frame_size
});
ctx
->
SetOutputDim
(
"Hidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"Cell"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchedInput"
,
{
x_dims
[
0
],
wx_dims
[
1
]});
ctx
->
SetOutputDim
(
"BatchedHidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchedCell"
,
out_dims
);
ctx
->
ShareLoD
(
"X"
,
"Hidden"
);
ctx
->
ShareLoD
(
"X"
,
"Cell"
);
int
xx_width
;
if
(
ctx
->
Attrs
().
Get
<
bool
>
(
"use_seq"
))
{
xx_width
=
wx_dims
[
1
];
}
else
{
xx_width
=
x_dims
[
1
]
>
wx_dims
[
1
]
?
wx_dims
[
1
]
:
x_dims
[
1
];
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedInput"
),
"Output(BatchedInput) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedHidden"
),
"Output(BatchedHidden) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"BatchedCell"
),
"Output(BatchedCell) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedH0"
),
"Output(ReorderedH0) of LSTM should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ReorderedC0"
),
"Output(ReorderedC0) of LSTM should not be null."
);
ctx
->
SetOutputDim
(
"BatchedInput"
,
{
x_dims
[
0
],
wx_dims
[
1
]});
ctx
->
SetOutputDim
(
"BatchedHidden"
,
out_dims
);
ctx
->
SetOutputDim
(
"BatchedCell"
,
out_dims
);
}
ctx
->
SetOutputDim
(
"XX"
,
{
x_dims
[
0
],
xx_width
});
ctx
->
ShareLoD
(
"X"
,
"XX"
);
...
...
@@ -242,8 +240,8 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
auto* xx = ctx.Output<LoDTensor>("XX"); \
auto* hidden_out = ctx.Output<LoDTensor>("Hidden"); \
auto* cell_out = ctx.Output<LoDTensor>("Cell"); \
bool
use_peepholes = ctx.Attr<bool>("use_peepholes");
\
bool
is_reverse = ctx.Attr<bool>("is_reverse
");
bool
is_reverse = ctx.Attr<bool>("is_reverse");
\
bool
use_peepholes = ctx.Attr<bool>("use_peepholes
");
#define INIT_BASE_SIZES \
auto x_dims = x->dims();
/* T x M*/
\
...
...
@@ -254,172 +252,183 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
const int D3 = D * 3; \
const int D4 = wh_dims[1];
#define INIT_BASE_INPUT_DATAS \
const T* x_data = x->data<T>(); \
const T* wx_data = wx->data<T>(); \
const T* wh_data = wh->data<T>(); \
/* diagonal weight*/
\
const T* wc_data = bias->data<T>() + D4; \
/* for peephole only*/
\
Tensor checked_cell; \
T* checked_cell_data = nullptr; \
auto place = ctx.GetPlace(); \
if (use_peepholes) { \
/* w_ic * Ct-1, w_fc * Ct-1 ; w_oc * Ct => ih*/
\
checked_cell_data = checked_cell.mutable_data<T>({2, D}, place); \
}
/// Compute LSTM
#define GEMM_WH_ADDON(bs, prev, out) \
blas.GEMM(CblasNoTrans, CblasNoTrans, bs, D4, D, static_cast<T>(1), prev, D, \
wh_data, D4, static_cast<T>(1), out, D4)
// gates: W_ch, W_ih, W_fh, W_oh
#define GET_Ct(ct_1, gates, ct) \
/* C_t = C_t-1 * fgated + cand_gated * igated*/
\
act_cand(D, gates, gates); \
blas.VMUL(D, gates, gates + D, gates + D); \
blas.VMUL(D, ct_1, gates + D2, gates + D2); \
blas.VADD(D, gates + D, gates + D2, ct)
#define GET_Ht(ct, gates, ht) \
/* H_t = act_cell(C_t) * ogated */
\
act_cell(D, ct, gates + D2); \
blas.VMUL(D, gates + D2, gates + D3, ht)
#define GET_Ct_NOH0C0(gates, ct) \
/* C_t = igated * cgated*/
\
act_gate(D, gates + D, gates + D); \
act_cand(D, gates, gates); \
blas.VMUL(D, gates, gates + D, ct)
#define COMPUTE_CtHt_NOH0C0(gates, ct, ht) \
GET_Ct_NOH0C0(gates, ct); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt_PEEPHOLE_NOH0C0(gates, ct, ht) \
GET_Ct_NOH0C0(gates, ct); \
/* get outgated, put W_oc * C_t on igated */
\
blas.VMUL(D, wc_data + D2, ct, gates + D); \
blas.VADD(D, gates + D, gates + D3, gates + D3); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt(gates, ct_1, ct, ht) \
act_gate(D3, gates + D, gates + D); \
GET_Ct(ct_1, gates, ct); \
GET_Ht(ct, gates, ht)
#define COMPUTE_CtHt_PEEPHOLE(gates, ct_1, ct, ht) \
/* get fgated and igated*/
\
blas.VMUL(D, wc_data, ct_1, checked_cell_data); \
blas.VMUL(D, wc_data + D, ct_1, checked_cell_data + D); \
blas.VADD(D2, checked_cell_data, gates + D, gates + D); \
act_gate(D2, gates + D, gates + D); \
GET_Ct(ct_1, gates, ct); \
/* get ogated*/
\
blas.VMUL(D, wc_data + D2, ct, gates + D); \
blas.VADD(D, gates + D, gates + D3, gates + D3); \
act_gate(D, gates + D3, gates + D3); \
GET_Ht(ct, gates, ht)
void
SeqCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
paddle
::
platform
::
CPUDeviceContext
;
INIT_BASE_INPUT_OUTPUT
INIT_BASE_SIZES
INIT_VEC_FUNC
INIT_BASE_INPUT_DATAS
auto
x_lod
=
x
->
lod
();
const
int
total_T
=
x_dims
[
0
];
const
int
N
=
x_lod
[
0
].
size
()
-
1
;
// batch size
const
T
*
x_data
=
x
->
data
<
T
>
();
const
int
N
=
x_lod
[
0
].
size
()
-
1
;
const
T
*
h0_data
=
h0
?
h0
->
data
<
T
>
()
:
nullptr
;
const
T
*
c0_data
=
c0
?
c0
->
data
<
T
>
()
:
nullptr
;
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
wc_data
=
bias_data
+
D4
;
// w_ic, w_fc, w_oc
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
hidden_out_data
=
hidden_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
cell_out_data
=
cell_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
// use local variable
framework
::
DDim
check_dims
({
3
,
D
});
Tensor
checked_cell
;
// w_ic * Ct-1, w_fc * Ct-1, w_oc * Ct
auto
checked_cell_data
=
checked_cell
.
mutable_data
<
T
>
(
check_dims
,
ctx
.
GetPlace
());
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
h_out_data
=
hidden_out
->
mutable_data
<
T
>
(
place
);
T
*
c_out_data
=
cell_out
->
mutable_data
<
T
>
(
place
);
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
ctx
);
math
::
FCCompute
<
DeviceContext
,
T
>
(
blas
,
total_T
,
D4
,
M
,
x_data
,
wx_data
,
xx_data
,
bias
->
data
<
T
>
());
int
xx_offset
=
D4
;
int
gate_offset
=
D
;
if
(
is_reverse
)
{
const
int
offset
=
(
total_T
-
1
)
*
D
;
xx_data
=
xx_data
+
offset
*
4
;
h
idden_out_data
=
hidden
_out_data
+
offset
;
c
ell_out_data
=
cell
_out_data
+
offset
;
h
_out_data
=
h
_out_data
+
offset
;
c
_out_data
=
c
_out_data
+
offset
;
xx_offset
=
-
D4
;
gate_offset
=
-
D
;
}
auto
move_step
=
[
&
]()
{
xx_data
=
xx_data
+
xx_offset
;
hidden_out_data
=
hidden_out_data
+
gate_offset
;
cell_out_data
=
cell_out_data
+
gate_offset
;
};
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
int
bid
=
is_reverse
?
N
-
1
-
i
:
i
;
int
seq_len
=
x_lod
[
0
][
bid
+
1
]
-
x_lod
[
0
][
bid
];
const
T
*
prev_c_data
=
nullptr
;
const
T
*
prev_h_data
=
nullptr
;
int
tstart
=
0
;
if
(
h0_data
)
{
prev_h_data
=
h0_data
+
bid
*
D
;
prev_c_data
=
c0_data
+
bid
*
D
;
}
else
{
// If step == 0 and there is no initialized hidden state, that is to say
// the H0 is zeros. Then W_h * H_t-1 can be skipped
#define MOVE_ONE_STEP \
prev_h_data = h_out_data; \
prev_c_data = c_out_data; \
xx_data = xx_data + xx_offset; \
h_out_data = h_out_data + gate_offset; \
c_out_data = c_out_data + gate_offset
#define PROCESS_H0C0_DEFINES \
int bid = is_reverse ? N - 1 - i : i; \
int seq_len = x_lod[0][bid + 1] - x_lod[0][bid]; \
const T* prev_c_data = nullptr; \
const T* prev_h_data = nullptr; \
int tstart = 0
#define PROCESS_H0C0_PEEPHOLE \
PROCESS_H0C0_DEFINES; \
if (h0_data) { \
prev_h_data = h0_data + bid * D; \
prev_c_data = c0_data + bid * D; \
} else { \
COMPUTE_CtHt_PEEPHOLE_NOH0C0(xx_data, c_out_data, h_out_data); \
MOVE_ONE_STEP; \
tstart = 1; \
}
// ~C_t
act_cand
(
D
,
xx_data
,
xx_data
);
if
(
use_peepholes
)
{
// I_t, F_t
act_gate
(
D2
,
xx_data
+
D
,
xx_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
#define PROCESS_H0C0 \
PROCESS_H0C0_DEFINES; \
if (h0_data) { \
prev_h_data = h0_data + bid * D; \
prev_c_data = c0_data + bid * D; \
} else { \
COMPUTE_CtHt_NOH0C0(xx_data, c_out_data, h_out_data); \
MOVE_ONE_STEP; \
tstart = 1; \
}
// C_t = I_t * ~C_t
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
cell_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cell_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
xx_data
+
D3
,
checked_cell_data
+
D2
,
xx_data
+
D3
);
// O_t
act_gate
(
D
,
xx_data
+
D3
,
xx_data
+
D3
);
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0_PEEPHOLE
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
COMPUTE_CtHt_PEEPHOLE
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
MOVE_ONE_STEP
;
}
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
// prev
prev_h_data
=
hidden_out_data
;
prev_c_data
=
cell_out_data
;
tstart
=
1
;
move_step
();
}
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
// + W_h * H_t-1
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
1
,
D4
,
D
,
static_cast
<
T
>
(
1
),
prev_h_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
xx_data
,
D4
);
// ~C_t
act_cand
(
D
,
xx_data
,
xx_data
);
if
(
use_peepholes
)
{
// + W_ic|W_fc * C_t-1 for peephole connection
blas
.
VMUL
(
D
,
wc_data
,
prev_c_data
,
checked_cell_data
);
blas
.
VMUL
(
D
,
wc_data
+
D
,
prev_c_data
,
checked_cell_data
+
D
);
blas
.
VADD
(
D2
,
xx_data
+
D
,
checked_cell_data
,
xx_data
+
D
);
// I_t, F_t
act_gate
(
D2
,
xx_data
+
D
,
xx_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
xx_data
+
D
,
xx_data
+
D
);
for
(
int
i
=
0
;
i
<
N
;
++
i
)
{
PROCESS_H0C0
for
(
int
step
=
tstart
;
step
<
seq_len
;
++
step
)
{
GEMM_WH_ADDON
(
1
,
prev_h_data
,
xx_data
);
COMPUTE_CtHt
(
xx_data
,
prev_c_data
,
c_out_data
,
h_out_data
);
MOVE_ONE_STEP
;
}
// F_t * C_t-1
blas
.
VMUL
(
D
,
xx_data
+
D2
,
prev_c_data
,
xx_data
+
D2
);
// I_t * ~C_t
blas
.
VMUL
(
D
,
xx_data
,
xx_data
+
D
,
xx_data
+
D
);
// C_t = F_t * C_t-1 + I_t * ~C_t
blas
.
VADD
(
D
,
xx_data
+
D
,
xx_data
+
D2
,
cell_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cell_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
xx_data
+
D3
,
checked_cell_data
+
D2
,
xx_data
+
D3
);
// O_t
act_gate
(
D
,
xx_data
+
D3
,
xx_data
+
D3
);
}
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cell_out_data
,
xx_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
xx_data
+
D2
,
xx_data
+
D3
,
hidden_out_data
);
// prev
prev_h_data
=
hidden_out_data
;
prev_c_data
=
cell_out_data
;
move_step
();
}
// for each step in batch
}
// for each batch
}
#undef PROCESS_H0C0_DEFINES
#undef PROCESS_H0C0_PEEPHOLE
#undef PROCESS_H0C0
#undef MOVE_ONE_STEP
}
void
BatchCompute
(
const
framework
::
ExecutionContext
&
ctx
)
const
{
using
DeviceContext
=
platform
::
CPUDeviceContext
;
INIT_BASE_INPUT_OUTPUT
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
// batch size == 1
if
(
x
->
lod
()[
0
].
size
()
==
2
)
{
SeqCompute
(
ctx
);
return
;
}
INIT_BASE_SIZES
INIT_VEC_FUNC
INIT_BASE_INPUT_DATAS
auto
*
reordered_h0
=
ctx
.
Output
<
Tensor
>
(
"ReorderedH0"
);
auto
*
reordered_c0
=
ctx
.
Output
<
Tensor
>
(
"ReorderedC0"
);
auto
*
batched_input
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedInput"
);
auto
*
batched_c_out
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedCell"
);
auto
*
batched_h_out
=
ctx
.
Output
<
LoDTensor
>
(
"BatchedHidden"
);
const
T
*
x_data
=
x
->
data
<
T
>
();
const
T
*
wx_data
=
wx
->
data
<
T
>
();
const
T
*
wh_data
=
wh
->
data
<
T
>
();
const
T
*
bias_data
=
bias
->
data
<
T
>
();
const
T
*
wc_data
=
bias_data
+
D4
;
// w_ic, w_fc, w_oc
auto
place
=
ctx
.
GetPlace
();
T
*
xx_data
=
xx
->
mutable_data
<
T
>
(
place
);
T
*
batched_input_data
=
batched_input
->
mutable_data
<
T
>
(
place
);
T
*
batched_c_out_data
=
batched_c_out
->
mutable_data
<
T
>
(
place
);
...
...
@@ -427,12 +436,6 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
hidden_out
->
mutable_data
<
T
>
(
place
);
cell_out
->
mutable_data
<
T
>
(
place
);
// use local variable
framework
::
DDim
check_dims
({
3
,
D
});
Tensor
checked_cell
;
// w_ic * Ct-1, w_fc * Ct-1, w_oc * Ct
auto
checked_cell_data
=
checked_cell
.
mutable_data
<
T
>
(
check_dims
,
ctx
.
GetPlace
());
math
::
LoDTensor2BatchFunctor
<
DeviceContext
,
T
>
to_batch
;
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
auto
blas
=
math
::
GetBlas
<
DeviceContext
,
T
>
(
dev_ctx
);
...
...
@@ -454,27 +457,17 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
reordered_h0
->
Resize
({
max_bs
,
D
});
reordered_c0
->
Resize
({
max_bs
,
D
});
T
*
prev_batch_h_data
=
nullptr
;
T
*
prev_batch_c_data
=
nullptr
;
T
*
cur_batch_in_data
=
batched_input_data
;
T
*
cur_batch_h_out_data
=
batched_h_out_data
;
T
*
cur_batch_c_out_data
=
batched_c_out_data
;
auto
move_step
=
[
&
](
int
bs
)
{
cur_batch_in_data
+=
bs
*
D4
;
cur_batch_c_out_data
+=
bs
*
D
;
cur_batch_h_out_data
+=
bs
*
D
;
};
int
tstart
=
0
;
T
*
prev_h_data
=
nullptr
;
T
*
prev_c_data
=
nullptr
;
if
(
h0
)
{
// reorder h0, c0
T
*
reordered_h0_data
=
reordered_h0
->
mutable_data
<
T
>
(
place
);
T
*
reordered_c0_data
=
reordered_c0
->
mutable_data
<
T
>
(
place
);
const
T
*
h0_data
=
h0
->
data
<
T
>
();
const
T
*
c0_data
=
c0
->
data
<
T
>
();
prev_
batch_
h_data
=
reordered_h0_data
;
prev_
batch_
c_data
=
reordered_c0_data
;
prev_h_data
=
reordered_h0_data
;
prev_c_data
=
reordered_c0_data
;
size_t
sz
=
sizeof
(
T
)
*
D
;
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
std
::
memcpy
(
reordered_h0_data
,
h0_data
+
seq_order
[
i
]
*
D
,
sz
);
...
...
@@ -483,123 +476,80 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
reordered_c0_data
+=
D
;
}
}
else
{
// Compute with no H0/C0
T
*
cur_in_data
=
cur_batch_in_data
;
T
*
cur_c_out_data
=
cur_batch_c_out_data
;
T
*
cur_h_out_data
=
cur_batch_h_out_data
;
// If step == 0 and there is no initialized hidden state, that is to say
// the H0 is zeros. Then W_h * H_t-1 can be skiped
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
// iterate each data in 1st batch
// ~C_t
act_cand
(
D
,
cur_in_data
,
cur_in_data
);
// compute without h0, c0
T
*
cur_in_data
=
batched_input_data
;
T
*
cur_h_out_data
=
batched_h_out_data
;
T
*
cur_c_out_data
=
batched_c_out_data
;
for
(
int
i
=
0
;
i
<
max_bs
;
++
i
)
{
GET_Ct_NOH0C0
(
cur_in_data
,
cur_c_out_data
);
if
(
use_peepholes
)
{
// I_t, F_t
act_gate
(
D2
,
cur_in_data
+
D
,
cur_in_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
cur_in_data
+
D
,
cur_in_data
+
D
);
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
cur_in_data
+
D
);
blas
.
VADD
(
D
,
cur_in_data
+
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
}
// C_t = I_t * ~C_t
blas
.
VMUL
(
D
,
cur_in_data
,
cur_in_data
+
D
,
cur_c_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
cur_in_data
+
D3
,
checked_cell_data
+
D2
,
cur_in_data
+
D3
);
// O_t
act_gate
(
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
}
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cur_c_out_data
,
cur_in_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_in_data
+
D3
,
cur_h_out_data
);
// move to next data in the same batch
GET_Ht
(
cur_c_out_data
,
cur_in_data
,
cur_h_out_data
);
cur_in_data
+=
D4
;
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
}
// move to data for next timestep
prev_batch_h_data
=
cur_batch_h_out_data
;
prev_batch_c_data
=
cur_batch_c_out_data
;
move_step
(
max_bs
);
tstart
=
1
;
prev_h_data
=
batched_h_out_data
;
prev_c_data
=
batched_c_out_data
;
}
const
auto
&
batch_starts
=
batched_lod
[
0
];
const
int
max_seq_len
=
batch_starts
.
size
()
-
1
;
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
]
;
// + W_h * H_t-1
blas
.
GEMM
(
CblasNoTrans
,
CblasNoTrans
,
cur_bs
,
D4
,
D
,
static_cast
<
T
>
(
1
),
prev_batch_h_data
,
D
,
wh_data
,
D4
,
static_cast
<
T
>
(
1
),
cur_batch_in_data
,
D4
);
T
*
cur_in_data
=
cur_batch_in_data
;
T
*
cur_c_out_data
=
cur_batch_c_out_data
;
T
*
cur_h_out_data
=
cur_batch_h_out_data
;
T
*
prev_c_data
=
prev_batch_c_data
;
// NULL if no C0 in step0
T
*
prev_h_data
=
prev_batch_h_data
;
// NULL if no H0 in step0
auto
next_data_in_batch
=
[
&
]()
{
cur_in_data
+=
D4
;
cur_c_out_data
+=
D
;
cur_h_out_data
+=
D
;
prev_c_data
=
prev_c_data
?
prev_c_data
+
D
:
nullptr
;
prev_h_data
=
prev_h_data
?
prev_h_data
+
D
:
nullptr
;
};
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
// iterate each data in same batch
// ~C_t
act_cand
(
D
,
cur_in_data
,
cur_in_data
);
const
int
offset
=
tstart
*
max_bs
*
D
;
batched_input_data
=
batched_input_data
+
offset
*
4
;
batched_h_out_data
=
batched_h_out_data
+
offset
;
batched_c_out_data
=
batched_c_out_data
+
offset
;
#define DEFINE_CUR \
T* cur_in_data = batched_input_data; \
T* cur_prev_c_data = prev_c_data; \
T* cur_c_out_data = batched_c_out_data; \
T* cur_h_out_data = batched_h_out_data
#define MOVE_ONE_BATCH \
cur_in_data += D4; \
cur_prev_c_data += D; \
cur_c_out_data += D; \
cur_h_out_data += D
#define MOVE_ONE_STEP \
prev_c_data = batched_c_out_data; \
prev_h_data = batched_h_out_data; \
batched_c_out_data = cur_c_out_data; \
batched_h_out_data = cur_h_out_data; \
batched_input_data = cur_in_data
if
(
use_peepholes
)
{
// + W_ic|W_fc * C_t-1 for peephole connection
blas
.
VMUL
(
D
,
wc_data
,
prev_c_data
,
checked_cell_data
);
blas
.
VMUL
(
D
,
wc_data
+
D
,
prev_c_data
,
checked_cell_data
+
D
);
blas
.
VADD
(
D2
,
cur_in_data
+
D
,
checked_cell_data
,
cur_in_data
+
D
);
// I_t, F_t
act_gate
(
D2
,
cur_in_data
+
D
,
cur_in_data
+
D
);
}
else
{
// I_t, F_t, O_t
act_gate
(
D3
,
cur_in_data
+
D
,
cur_in_data
+
D
);
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
GEMM_WH_ADDON
(
cur_bs
,
prev_h_data
,
batched_input_data
);
DEFINE_CUR
;
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
COMPUTE_CtHt_PEEPHOLE
(
cur_in_data
,
cur_prev_c_data
,
cur_c_out_data
,
cur_h_out_data
);
MOVE_ONE_BATCH
;
}
// F_t * C_t-1
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
prev_c_data
,
cur_in_data
+
D2
);
// I_t * ~C_t
blas
.
VMUL
(
D
,
cur_in_data
,
cur_in_data
+
D
,
cur_in_data
+
D
);
// C_t = F_t * C_t-1 + I_t * ~C_t
blas
.
VADD
(
D
,
cur_in_data
+
D
,
cur_in_data
+
D2
,
cur_c_out_data
);
if
(
use_peepholes
)
{
// + W_oc * C_t for peephole connection
blas
.
VMUL
(
D
,
wc_data
+
D2
,
cur_c_out_data
,
checked_cell_data
+
D2
);
blas
.
VADD
(
D
,
cur_in_data
+
D3
,
checked_cell_data
+
D2
,
cur_in_data
+
D3
);
// O_t
act_gate
(
D
,
cur_in_data
+
D3
,
cur_in_data
+
D3
);
MOVE_ONE_STEP
;
}
// hidden out= act_state(cellout) * outgate
act_cell
(
D
,
cur_c_out_data
,
cur_in_data
+
D2
);
// H_t = O_t * act_state(C_t)
blas
.
VMUL
(
D
,
cur_in_data
+
D2
,
cur_in_data
+
D3
,
cur_h_out_data
);
// move to next data in same batch
next_data_in_batch
();
}
else
{
for
(
int
step
=
tstart
;
step
<
max_seq_len
;
++
step
)
{
const
int
cur_bs
=
batch_starts
[
step
+
1
]
-
batch_starts
[
step
];
GEMM_WH_ADDON
(
cur_bs
,
prev_h_data
,
batched_input_data
);
DEFINE_CUR
;
for
(
int
i
=
0
;
i
<
cur_bs
;
++
i
)
{
COMPUTE_CtHt
(
cur_in_data
,
cur_prev_c_data
,
cur_c_out_data
,
cur_h_out_data
);
MOVE_ONE_BATCH
;
}
MOVE_ONE_STEP
;
}
// move to data for next timestep
prev_batch_h_data
=
cur_batch_h_out_data
;
prev_batch_c_data
=
cur_batch_c_out_data
;
move_step
(
cur_bs
);
}
#undef MOVE_ONE_STEP
#undef MOVE_ONE_BATCH
#undef DEFINE_CUR
math
::
Batch2LoDTensorFunctor
<
DeviceContext
,
T
>
to_seq
;
batched_h_out
->
set_lod
(
batched_lod
);
...
...
@@ -615,6 +565,16 @@ class FuisonLSTMKernel : public framework::OpKernel<T> {
BatchCompute
(
ctx
);
}
}
#undef COMPUTE_CtHt_PEEPHOLE
#undef COMPUTE_CtHt
#undef GET_Ct_NOH0C0
#undef COMPUTE_CtHt_NOH0C0
#undef COMPUTE_CtHt_PEEPHOLE_NOH0C0
#undef GET_Ht
#undef GET_Ct
#undef GEMM_WH_ADDON
#undef INIT_BASE_INPUT_DATAS
#undef INIT_BASE_SIZES
#undef INIT_BASE_INPUT_OUTPUT
#undef INIT_VEC_FUNC
...
...
paddle/fluid/operators/layer_norm_op.cu
浏览文件 @
06180779
...
...
@@ -67,27 +67,27 @@ template <typename T, int BlockDim>
__global__
void
LayerNormForward
(
const
T
*
x
,
const
T
*
scale
,
const
T
*
bias
,
T
*
y
,
T
*
mean
,
T
*
var
,
float
epsilon
,
int
feature_size
)
{
using
BlockReduce
=
cub
::
BlockReduce
<
PairForLayerNorm
<
T
>
,
BlockDim
>
;
using
BlockReduce
=
cub
::
BlockReduce
<
PairForLayerNorm
<
double
>
,
BlockDim
>
;
__shared__
typename
BlockReduce
::
TempStorage
temp_storage
;
int
beg_idx
=
blockIdx
.
x
*
feature_size
+
threadIdx
.
x
;
int
end_idx
=
(
blockIdx
.
x
+
1
)
*
feature_size
;
// Step 1: Reduce to calculate mean and var
T
mean_val
=
static_cast
<
T
>
(
0
)
;
T
var_val
=
static_cast
<
T
>
(
0
)
;
double
mean_val
=
0
;
double
var_val
=
0
;
for
(
int
i
=
beg_idx
;
i
<
end_idx
;
i
+=
BlockDim
)
{
T
tmp
=
x
[
i
];
mean_val
+=
tmp
;
var_val
+=
(
tmp
*
tmp
);
}
auto
pair
=
BlockReduce
(
temp_storage
)
.
Reduce
(
PairForLayerNorm
<
T
>
(
mean_val
,
var_val
),
PairForLayerNormAddFunctor
<
T
>
());
.
Reduce
(
PairForLayerNorm
<
double
>
(
mean_val
,
var_val
),
PairForLayerNormAddFunctor
<
double
>
());
if
(
threadIdx
.
x
==
0
)
{
auto
tmp
=
pair
.
first_
/
feature_size
;
mean
[
blockIdx
.
x
]
=
tmp
;
var
[
blockIdx
.
x
]
=
pair
.
second_
/
feature_size
-
tmp
*
tmp
;
mean
[
blockIdx
.
x
]
=
static_cast
<
T
>
(
tmp
)
;
var
[
blockIdx
.
x
]
=
static_cast
<
T
>
(
pair
.
second_
/
feature_size
-
tmp
*
tmp
)
;
}
__syncthreads
();
mean_val
=
mean
[
blockIdx
.
x
];
...
...
paddle/fluid/operators/reshape_op.cc
浏览文件 @
06180779
...
...
@@ -246,6 +246,88 @@ class ReshapeGradKernel {
}
};
// FIXME(zcd): reshape2 adds an intermediate output(XShape) based on reshape,
// the XShape is used to carry the shape and lod of X which will be used in
// reshape_grad, in this way, the framework can reuse the memory of X
// immediately the reshape_op is finished.
// Considering compatibility issues, we could not fix reshape_op
class
Reshape2Op
:
public
ReshapeOp
{
public:
Reshape2Op
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
ReshapeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
ReshapeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of ReshapeOp should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Reshape2OpMaker
:
public
ReshapeOpMaker
{
public:
void
Make
()
override
{
ReshapeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in FlattenGradOp."
)
.
AsIntermediate
();
}
};
class
Reshape2GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"reshape2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Reshape2GradOp
:
public
framework
::
OperatorWithKernel
{
public:
Reshape2GradOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
ctx
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
ctx
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
()),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
...
...
@@ -261,6 +343,17 @@ REGISTER_OP_CPU_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
REGISTER_OPERATOR
(
reshape2
,
ops
::
Reshape2Op
,
ops
::
Reshape2OpMaker
,
ops
::
Reshape2GradMaker
);
REGISTER_OPERATOR
(
reshape2_grad
,
ops
::
Reshape2GradOp
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
reshape2
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
int64_t
,
ops
::
ReshapeKernel
);
REGISTER_OP_CPU_KERNEL_FUNCTOR
(
reshape2_grad
,
float
,
ops
::
ReshapeGradKernel
,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
#ifdef PADDLE_WITH_CUDA
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
...
...
@@ -269,4 +362,11 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape2
,
float
,
ops
::
ReshapeKernel
,
double
,
ops
::
ReshapeKernel
,
int
,
ops
::
ReshapeKernel
,
int64_t
,
ops
::
ReshapeKernel
);
REGISTER_OP_CUDA_KERNEL_FUNCTOR
(
reshape2_grad
,
float
,
ops
::
ReshapeGradKernel
,
double
,
ops
::
ReshapeGradKernel
,
int
,
ops
::
ReshapeGradKernel
,
int64_t
,
ops
::
ReshapeGradKernel
);
#endif
paddle/fluid/operators/squeeze_op.cc
浏览文件 @
06180779
...
...
@@ -181,6 +181,113 @@ class SqueezeGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): squeeze2 adds an intermediate output(XShape) based on squeeze,
// the XShape is used to carry the shape and lod of X which will be used in
// squeeze_grad, in this way, the framework can reuse the memory of X
// immediately the squeeze2_op is finished.
// Considering compatibility issues, we could not fix squeeze2_op
class
Squeeze2OpMaker
:
public
SqueezeOpMaker
{
public:
void
Make
()
override
{
SqueezeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in SqueezeGradOp."
)
.
AsIntermediate
();
}
};
class
Squeeze2OpInferShape
:
public
SqueezeOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
SqueezeOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of Squeeze operator should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Squeeze2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
Squeeze2OpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
// Invoke Reshape Op
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Squeeze2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"squeeze2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Squeeze2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Squeeze2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -192,3 +299,8 @@ REGISTER_OPERATOR(squeeze, ops::SqueezeOp, ops::SqueezeOpMaker,
ops
::
SqueezeOpInferShape
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
squeeze_grad
,
ops
::
SqueezeGradOp
,
ops
::
SqueezeGradInferShape
);
REGISTER_OPERATOR
(
squeeze2
,
ops
::
Squeeze2Op
,
ops
::
Squeeze2OpMaker
,
ops
::
Squeeze2OpInferShape
,
ops
::
Squeeze2GradOpMaker
);
REGISTER_OPERATOR
(
squeeze2_grad
,
ops
::
Squeeze2GradOp
,
ops
::
Squeeze2GradInferShape
);
paddle/fluid/operators/transpose_op.cc
浏览文件 @
06180779
...
...
@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/transpose_op.h"
#include <string>
#include <vector>
namespace
paddle
{
...
...
@@ -24,7 +25,7 @@ class TransposeOp : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) should not be null"
);
auto
x_dims
=
ctx
->
GetInputDim
(
"X"
);
...
...
@@ -101,7 +102,7 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
...
...
@@ -113,6 +114,93 @@ class TransposeOpGrad : public framework::OperatorWithKernel {
}
};
// FIXME(zcd): transpose2 adds an intermediate output(XShape) based on
// transpose, the XShape is used to carry the shape and lod of X which
// will be used in transpose_grad, in this way, the framework can reuse
// the memory of X immediately the transpose2_op is finished.
// Considering compatibility issues, we could not fix transpose2_op
class
Transpose2Op
:
public
TransposeOp
{
public:
Transpose2Op
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
TransposeOp
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
TransposeOp
::
InferShape
(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) should not be null"
);
const
auto
&
in_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
x_shape_dim
(
in_dims
.
size
()
+
1
);
x_shape_dim
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
in_dims
.
size
();
++
i
)
{
x_shape_dim
[
i
+
1
]
=
in_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
x_shape_dim
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
()),
ctx
.
device_context
());
}
};
class
Transpose2OpMaker
:
public
TransposeOpMaker
{
public:
void
Make
()
override
{
TransposeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"(Tensor)The output tensor."
).
AsIntermediate
();
}
};
class
Transpose2GradMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"transpose2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Transpose2OpGrad
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"XShape"
),
"Input(XShape) should not be null"
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) should not be null"
);
if
(
ctx
->
HasOutput
(
framework
::
GradVarName
(
"X"
)))
{
auto
xshape_dim
=
ctx
->
GetInputDim
(
"XShape"
);
auto
x_shape_dim
=
framework
::
slice_ddim
(
xshape_dim
,
1
,
xshape_dim
.
size
());
ctx
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_shape_dim
);
ctx
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
))
->
type
()),
ctx
.
device_context
());
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -120,8 +208,20 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
transpose
,
ops
::
TransposeOp
,
ops
::
TransposeOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
transpose_grad
,
ops
::
TransposeOpGrad
);
REGISTER_OP_CPU_KERNEL
(
transpose
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
transpose_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OPERATOR
(
transpose2
,
ops
::
Transpose2Op
,
ops
::
Transpose2OpMaker
,
ops
::
Transpose2GradMaker
);
REGISTER_OPERATOR
(
transpose2_grad
,
ops
::
Transpose2OpGrad
);
REGISTER_OP_CPU_KERNEL
(
transpose2
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
REGISTER_OP_CPU_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
);
paddle/fluid/operators/transpose_op.cu.cc
浏览文件 @
06180779
...
...
@@ -21,3 +21,10 @@ REGISTER_OP_CUDA_KERNEL(
REGISTER_OP_CUDA_KERNEL
(
transpose_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
transpose2
,
ops
::
TransposeKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
REGISTER_OP_CUDA_KERNEL
(
transpose2_grad
,
ops
::
TransposeGradKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
);
paddle/fluid/operators/unsqueeze_op.cc
浏览文件 @
06180779
...
...
@@ -168,6 +168,112 @@ class UnsqueezeGradOp : public framework::OperatorBase {
}
};
// FIXME(zcd): unsqueeze2 adds an intermediate output(XShape) based on
// unsqueeze, the XShape is used to carry the shape and lod of X which
// will be used in unsqueeze_grad, in this way, the framework can reuse
// the memory of X immediately the unsqueeze2_op is finished.
// Considering compatibility issues, we could not fix unsqueeze2_op
class
Unsqueeze2OpInferShape
:
public
UnsqueezeOpInferShape
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
UnsqueezeOpInferShape
::
operator
()(
ctx
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"XShape"
),
"Output(XShape) of Unsqueeze operator should not be null."
);
const
auto
&
x_dims
=
ctx
->
GetInputDim
(
"X"
);
std
::
vector
<
int64_t
>
xshape_dims
(
x_dims
.
size
()
+
1
);
xshape_dims
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
x_dims
.
size
();
++
i
)
{
xshape_dims
[
i
+
1
]
=
x_dims
[
i
];
}
ctx
->
SetOutputDim
(
"XShape"
,
framework
::
make_ddim
(
xshape_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"XShape"
);
}
};
class
Unsqueeze2OpMaker
:
public
UnsqueezeOpMaker
{
public:
void
Make
()
override
{
UnsqueezeOpMaker
::
Make
();
AddOutput
(
"XShape"
,
"XShape is just used to store the shape and lod of X, which will "
"be used in UnsqueezeGradOp."
)
.
AsIntermediate
();
}
};
class
Unsqueeze2Op
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
&
axes
=
Attr
<
std
::
vector
<
int
>>
(
"axes"
);
auto
x_dims
=
scope
.
FindVar
(
Input
(
"X"
))
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
out_dims
=
Unsqueeze2OpInferShape
::
GetOutputShape
(
axes
,
x_dims
);
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
out_dims
);
// Invoke Reshape op.
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
Input
(
"X"
)}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
Output
(
"Out"
)}},
{
"XShape"
,
{
Output
(
"XShape"
)}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
class
Unsqueeze2GradOpMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
auto
*
grad_op
=
new
framework
::
OpDesc
();
grad_op
->
SetType
(
"unsqueeze2_grad"
);
grad_op
->
SetInput
(
"XShape"
,
Output
(
"XShape"
));
grad_op
->
SetInput
(
framework
::
GradVarName
(
"Out"
),
OutputGrad
(
"Out"
));
grad_op
->
SetOutput
(
framework
::
GradVarName
(
"X"
),
InputGrad
(
"X"
));
grad_op
->
SetAttrMap
(
Attrs
());
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad_op
);
}
};
class
Unsqueeze2GradInferShape
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
context
)
const
override
{
PADDLE_ENFORCE
(
context
->
HasInput
(
"XShape"
),
"Input(XShape) shouldn't be null."
);
PADDLE_ENFORCE
(
context
->
HasInput
(
framework
::
GradVarName
(
"Out"
)),
"Input(Out@GRAD) shouldn't be null."
);
auto
xshape_dims
=
context
->
GetInputDim
(
"XShape"
);
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
context
->
SetOutputDim
(
framework
::
GradVarName
(
"X"
),
x_dims
);
context
->
ShareLoD
(
"XShape"
,
framework
::
GradVarName
(
"X"
));
}
};
class
Unsqueeze2GradOp
:
public
framework
::
OperatorBase
{
public:
using
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
dx_name
=
Output
(
framework
::
GradVarName
(
"X"
));
auto
dout_name
=
Input
(
framework
::
GradVarName
(
"Out"
));
auto
xshape_name
=
Input
(
"XShape"
);
auto
xshape_dims
=
scope
.
FindVar
(
xshape_name
)
->
Get
<
framework
::
LoDTensor
>
().
dims
();
auto
x_dims
=
framework
::
slice_ddim
(
xshape_dims
,
1
,
xshape_dims
.
size
());
framework
::
AttributeMap
attrs
;
attrs
[
"shape"
]
=
framework
::
vectorize2int
(
x_dims
);
auto
reshape_op
=
framework
::
OpRegistry
::
CreateOp
(
"reshape2"
,
{{
"X"
,
{
dout_name
}},
{
"Shape"
,
{}}},
{{
"Out"
,
{
dx_name
}},
{
"XShape"
,
{
xshape_name
}}},
attrs
);
reshape_op
->
Run
(
scope
,
place
);
}
};
}
// namespace operators
}
// namespace paddle
...
...
@@ -180,3 +286,8 @@ REGISTER_OPERATOR(unsqueeze, ops::UnsqueezeOp, ops::UnsqueezeOpMaker,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
unsqueeze_grad
,
ops
::
UnsqueezeGradOp
,
ops
::
UnsqueezeGradInferShape
);
REGISTER_OPERATOR
(
unsqueeze2
,
ops
::
Unsqueeze2Op
,
ops
::
Unsqueeze2OpMaker
,
ops
::
Unsqueeze2OpInferShape
,
ops
::
Unsqueeze2GradOpMaker
);
REGISTER_OPERATOR
(
unsqueeze2_grad
,
ops
::
Unsqueeze2GradOp
,
ops
::
Unsqueeze2GradInferShape
);
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
06180779
...
...
@@ -121,6 +121,12 @@ static inline void* GetDsoHandleFromSearchPath(const std::string& search_root,
if
(
nullptr
==
dso_handle
)
{
LOG
(
WARNING
)
<<
"Failed to find dynamic library: "
<<
dlPath
<<
" ("
<<
dlerror
()
<<
")"
;
if
(
dlPath
.
find
(
"nccl"
)
!=
std
::
string
::
npos
)
{
std
::
cout
<<
"You may need to install 'nccl2' from NVIDIA official website: "
<<
"https://developer.nvidia.com/nccl/nccl-download"
<<
"before install PaddlePaddle"
<<
std
::
endl
;
}
dlPath
=
dso_name
;
dso_handle
=
GetDsoHandleFromDefaultPath
(
dlPath
,
dynload_flags
);
}
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
06180779
...
...
@@ -115,6 +115,7 @@ function cmake_gen() {
-DWITH_FLUID_ONLY=
${
WITH_FLUID_ONLY
:-
OFF
}
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
-DWITH_CONTRIB=
${
WITH_CONTRIB
:-
ON
}
-DWITH_INFERENCE=
${
WITH_INFERENCE
:-
ON
}
-DWITH_ANAKIN=
${
WITH_ANAKIN
:-
OFF
}
-DPY_VERSION=
${
PY_VERSION
:-
2
.7
}
========================================
...
...
@@ -144,6 +145,7 @@ EOF
-DWITH_FLUID_ONLY
=
${
WITH_FLUID_ONLY
:-
OFF
}
\
-DCMAKE_EXPORT_COMPILE_COMMANDS
=
ON
\
-DWITH_CONTRIB
=
${
WITH_CONTRIB
:-
ON
}
\
-DWITH_INFERENCE
=
${
WITH_INFERENCE
:-
ON
}
\
-DWITH_ANAKIN
=
${
WITH_ANAKIN
:-
OFF
}
\
-DPY_VERSION
=
${
PY_VERSION
:-
2
.7
}
}
...
...
@@ -498,7 +500,7 @@ EOF
EOF
if
[[
${
WITH_GPU
}
==
"ON"
]]
;
then
NCCL_DEPS
=
"apt-get install -y --allow-downgrades libnccl2=2.
1.2-1+cuda
${
CUDA_MAJOR
}
libnccl-dev=2.1.2
-1+cuda
${
CUDA_MAJOR
}
&&"
NCCL_DEPS
=
"apt-get install -y --allow-downgrades libnccl2=2.
2.13-1+cuda
${
CUDA_MAJOR
}
libnccl-dev=2.2.13
-1+cuda
${
CUDA_MAJOR
}
&&"
else
NCCL_DEPS
=
""
fi
...
...
python/paddle/dataset/image.py
浏览文件 @
06180779
...
...
@@ -104,7 +104,7 @@ def batch_images_from_tar(data_file,
pickle
.
dump
(
output
,
open
(
'%s/batch_%d'
%
(
out_path
,
file_id
),
'wb'
),
protocol
=
pickle
.
HIGHEST_PROTOCOL
)
protocol
=
2
)
file_id
+=
1
data
=
[]
labels
=
[]
...
...
@@ -113,9 +113,7 @@ def batch_images_from_tar(data_file,
output
[
'label'
]
=
labels
output
[
'data'
]
=
data
pickle
.
dump
(
output
,
open
(
'%s/batch_%d'
%
(
out_path
,
file_id
),
'wb'
),
protocol
=
pickle
.
HIGHEST_PROTOCOL
)
output
,
open
(
'%s/batch_%d'
%
(
out_path
,
file_id
),
'wb'
),
protocol
=
2
)
with
open
(
meta_file
,
'a'
)
as
meta
:
for
file
in
os
.
listdir
(
out_path
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
06180779
...
...
@@ -4025,10 +4025,12 @@ def transpose(x, perm, name=None):
helper
=
LayerHelper
(
'transpose'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'transpose'
,
type
=
'transpose
2
'
,
inputs
=
{
'X'
:
[
x
]},
outputs
=
{
'Out'
:
[
out
]},
outputs
=
{
'Out'
:
[
out
],
'XShape'
:
[
x_shape
]},
attrs
=
{
'axis'
:
perm
})
return
out
...
...
@@ -4520,13 +4522,15 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
"Each dimension size given in shape must not be negtive "
"except one unknown dimension."
)
helper
=
LayerHelper
(
"reshape"
,
**
locals
())
helper
=
LayerHelper
(
"reshape
2
"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
x
.
dtype
)
helper
.
append_op
(
type
=
"reshape"
,
type
=
"reshape
2
"
,
inputs
=
inputs
,
attrs
=
{
"shape"
:
shape
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
helper
.
append_activation
(
out
)
...
...
@@ -4570,11 +4574,13 @@ def squeeze(input, axes, name=None):
"""
helper
=
LayerHelper
(
"squeeze"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
"squeeze"
,
type
=
"squeeze
2
"
,
inputs
=
{
"X"
:
input
},
attrs
=
{
"axes"
:
axes
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
out
...
...
@@ -4605,11 +4611,13 @@ def unsqueeze(input, axes, name=None):
"""
helper
=
LayerHelper
(
"unsqueeze"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
dtype
=
input
.
dtype
)
helper
.
append_op
(
type
=
"unsqueeze"
,
type
=
"unsqueeze
2
"
,
inputs
=
{
"X"
:
input
},
attrs
=
{
"axes"
:
axes
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
,
"XShape"
:
x_shape
})
return
out
...
...
@@ -5811,10 +5819,12 @@ def flatten(x, axis=1, name=None):
raise
ValueError
(
"The axis should be a int, and in range [0, rank(x)]"
)
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
x_shape
=
helper
.
create_tmp_variable
(
x
.
dtype
)
helper
.
append_op
(
type
=
'flatten'
,
type
=
'flatten
2
'
,
inputs
=
{
"X"
:
x
},
outputs
=
{
'Out'
:
out
},
outputs
=
{
'Out'
:
out
,
'XShape'
:
x_shape
},
attrs
=
{
"axis"
:
axis
})
return
out
...
...
python/paddle/fluid/tests/book/high-level-api/fit_a_line/test_fit_a_line.py
浏览文件 @
06180779
...
...
@@ -47,14 +47,14 @@ def train_program():
loss
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
return
avg_loss
return
[
avg_loss
,
y_predict
]
def
optimizer_func
():
return
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
def
train
(
use_cuda
,
train_program
,
params_dirname
):
def
train
(
use_cuda
,
train_program
,
params_dirname
,
inference_model_dirname
):
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
trainer
=
fluid
.
Trainer
(
...
...
@@ -74,6 +74,8 @@ def train(use_cuda, train_program, params_dirname):
'''
if
params_dirname
is
not
None
:
trainer
.
save_params
(
params_dirname
)
trainer
.
save_inference_model
(
inference_model_dirname
,
[
'x'
],
[
1
])
trainer
.
stop
()
trainer
.
train
(
...
...
@@ -99,15 +101,55 @@ def infer(use_cuda, inference_program, params_dirname=None):
print
(
"infer results: "
,
results
[
0
])
def
infer_by_saved_model
(
use_cuda
,
save_dirname
=
None
):
if
save_dirname
is
None
:
return
place
=
fluid
.
CUDAPlace
(
0
)
if
use_cuda
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
inference_scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
inference_scope
):
# Use fluid.io.load_inference_model to obtain the inference program desc,
# the feed_target_names (the names of variables that will be feeded
# data using feed operators), and the fetch_targets (variables that
# we want to obtain data from using fetch operators).
[
inference_program
,
feed_target_names
,
fetch_targets
]
=
fluid
.
io
.
load_inference_model
(
save_dirname
,
exe
)
# The input's dimension should be 2-D and the second dim is 13
# The input data should be >= 0
batch_size
=
10
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
uci_housing
.
test
(),
batch_size
=
batch_size
)
test_data
=
next
(
test_reader
())
test_feat
=
numpy
.
array
(
[
data
[
0
]
for
data
in
test_data
]).
astype
(
"float32"
)
test_label
=
numpy
.
array
(
[
data
[
1
]
for
data
in
test_data
]).
astype
(
"float32"
)
assert
feed_target_names
[
0
]
==
'x'
results
=
exe
.
run
(
inference_program
,
feed
=
{
feed_target_names
[
0
]:
numpy
.
array
(
test_feat
)},
fetch_list
=
fetch_targets
)
print
(
"infer shape: "
,
results
[
0
].
shape
)
print
(
"infer results: "
,
results
[
0
])
print
(
"ground truth: "
,
test_label
)
def
main
(
use_cuda
):
if
use_cuda
and
not
fluid
.
core
.
is_compiled_with_cuda
():
return
# Directory for saving the trained model
params_dirname
=
"fit_a_line.inference.model"
params_dirname
=
"fit_a_line.model"
inference_model_dirname
=
"fit_a_line.inference_model"
train
(
use_cuda
,
train_program
,
params_dirname
)
train
(
use_cuda
,
train_program
,
params_dirname
,
inference_model_dirname
)
infer
(
use_cuda
,
inference_program
,
params_dirname
)
infer_by_saved_model
(
use_cuda
,
inference_model_dirname
)
class
TestFitALine
(
unittest
.
TestCase
):
...
...
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
06180779
...
...
@@ -36,6 +36,7 @@ import paddle.fluid as fluid
import
paddle.fluid.layers
as
layers
from
paddle.fluid
import
core
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
import
paddle.compat
as
cpt
from
paddle.compat
import
long_type
import
hashlib
...
...
@@ -315,7 +316,8 @@ def pad_batch_data(insts,
"""
return_list
=
[]
max_len
=
max
(
len
(
inst
)
for
inst
in
insts
)
num_token
=
reduce
(
lambda
x
,
y
:
x
+
y
,
num_token
=
six
.
moves
.
reduce
(
lambda
x
,
y
:
x
+
y
,
[
len
(
inst
)
for
inst
in
insts
])
if
return_num_token
else
0
# Any token included in dict can be used to pad, since the paddings' loss
# will be masked out by weights and make no effect on parameter gradients.
...
...
@@ -328,7 +330,7 @@ def pad_batch_data(insts,
return_list
+=
[
inst_weight
.
astype
(
"float32"
).
reshape
([
-
1
,
1
])]
else
:
# position data
inst_pos
=
np
.
array
([
range
(
1
,
len
(
inst
)
+
1
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
list
(
range
(
1
,
len
(
inst
)
+
1
)
)
+
[
0
]
*
(
max_len
-
len
(
inst
))
for
inst
in
insts
])
return_list
+=
[
inst_pos
.
astype
(
"int64"
).
reshape
([
-
1
,
1
])]
...
...
@@ -385,10 +387,11 @@ def prepare_batch_input(insts, data_input_names, src_pad_idx, trg_pad_idx,
return_num_token
=
True
)
data_input_dict
=
dict
(
list
(
zip
(
data_input_names
,
[
src_word
,
src_pos
,
src_slf_attn_bias
,
trg_word
,
trg_pos
,
trg_slf_attn_bias
,
trg_src_attn_bias
,
lbl_word
,
lbl_weight
]
))
])
))
return
data_input_dict
,
np
.
asarray
([
num_token
],
dtype
=
"float32"
)
...
...
@@ -561,7 +564,7 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
np
.
log
(
TrainTaskConfig
.
label_smooth_eps
/
(
ModelHyperParams
.
trg_vocab_size
-
1
)
+
1e-20
))
init
=
False
for
pass_id
in
xrange
(
TrainTaskConfig
.
pass_num
):
for
pass_id
in
six
.
moves
.
xrange
(
TrainTaskConfig
.
pass_num
):
pass_start_time
=
time
.
time
()
for
batch_id
,
data
in
enumerate
(
train_data
()):
if
batch_id
>=
5
:
...
...
@@ -587,11 +590,11 @@ def train_loop(exe, train_progm, dev_count, sum_cost, avg_cost, lr_scheduler,
ModelHyperParams
.
eos_idx
,
ModelHyperParams
.
n_head
,
ModelHyperParams
.
d_model
)
total_num_token
+=
num_token
feed_kv_pairs
=
data_input_dict
.
items
(
)
feed_kv_pairs
=
list
(
data_input_dict
.
items
()
)
if
TrainTaskConfig
.
local
:
feed_kv_pairs
+=
{
feed_kv_pairs
+=
list
(
{
lr_scheduler
.
learning_rate
.
name
:
lr_rate
}.
items
()
}.
items
()
)
feed_list
.
append
(
dict
(
feed_kv_pairs
))
if
not
init
:
...
...
@@ -873,6 +876,7 @@ class DataReader(object):
f
=
tarfile
.
open
(
fpaths
[
0
],
"r"
)
for
line
in
f
.
extractfile
(
tar_fname
):
line
=
cpt
.
to_text
(
line
)
fields
=
line
.
strip
(
"
\n
"
).
split
(
self
.
_field_delimiter
)
if
(
not
self
.
_only_src
and
len
(
fields
)
==
2
)
or
(
self
.
_only_src
and
len
(
fields
)
==
1
):
...
...
@@ -882,8 +886,9 @@ class DataReader(object):
if
not
os
.
path
.
isfile
(
fpath
):
raise
IOError
(
"Invalid file: %s"
%
fpath
)
with
open
(
fpath
,
"r"
)
as
f
:
with
open
(
fpath
,
"r
b
"
)
as
f
:
for
line
in
f
:
line
=
cpt
.
to_text
(
line
)
fields
=
line
.
strip
(
"
\n
"
).
split
(
self
.
_field_delimiter
)
if
(
not
self
.
_only_src
and
len
(
fields
)
==
2
)
or
(
self
.
_only_src
and
len
(
fields
)
==
1
):
...
...
@@ -892,8 +897,9 @@ class DataReader(object):
@
staticmethod
def
load_dict
(
dict_path
,
reverse
=
False
):
word_dict
=
{}
with
open
(
dict_path
,
"r"
)
as
fdict
:
with
open
(
dict_path
,
"r
b
"
)
as
fdict
:
for
idx
,
line
in
enumerate
(
fdict
):
line
=
cpt
.
to_text
(
line
)
if
reverse
:
word_dict
[
idx
]
=
line
.
strip
(
"
\n
"
)
else
:
...
...
@@ -1034,7 +1040,7 @@ def multi_head_attention(queries,
# size of the input as the output dimension size.
return
layers
.
reshape
(
x
=
trans_x
,
shape
=
map
(
int
,
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]]
))
shape
=
list
(
map
(
int
,
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]])
))
def
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_model
,
dropout_rate
):
"""
...
...
python/paddle/fluid/tests/unittests/op_test.py
浏览文件 @
06180779
...
...
@@ -249,7 +249,7 @@ class OpTest(unittest.TestCase):
outs
,
_
=
self
.
_calc_output
(
place
)
return
outs
def
_calc_output
(
self
,
place
,
parallel
=
False
):
def
_calc_output
(
self
,
place
,
parallel
=
False
,
no_check_set
=
None
):
program
=
Program
()
block
=
program
.
global_block
()
...
...
@@ -273,6 +273,8 @@ class OpTest(unittest.TestCase):
# if not, fill the fetch_list by the user configured outputs in test.
if
len
(
fetch_list
)
==
0
:
for
var_name
,
var
in
six
.
iteritems
(
outputs
):
if
no_check_set
is
not
None
and
var_name
in
no_check_set
:
continue
if
isinstance
(
var
,
list
):
for
v
in
var
:
fetch_list
.
append
(
v
)
...
...
@@ -291,11 +293,17 @@ class OpTest(unittest.TestCase):
return_numpy
=
False
)
return
outs
,
fetch_list
def
check_output_with_place
(
self
,
place
,
atol
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
)
def
check_output_with_place
(
self
,
place
,
atol
,
no_check_set
=
None
,
equal_nan
=
False
):
outs
,
fetch_list
=
self
.
_calc_output
(
place
,
no_check_set
=
no_check_set
)
for
out_name
,
out_dup
in
Operator
.
get_op_outputs
(
self
.
op_type
):
if
out_name
not
in
self
.
outputs
:
continue
if
no_check_set
is
not
None
and
out_name
in
no_check_set
:
continue
def
find_actual
(
target_name
,
fetch_list
):
found
=
[
...
...
@@ -360,10 +368,10 @@ class OpTest(unittest.TestCase):
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
def
check_output
(
self
,
atol
=
1e-5
,
equal_nan
=
False
):
def
check_output
(
self
,
atol
=
1e-5
,
no_check_set
=
None
,
equal_nan
=
False
):
places
=
self
.
_get_places
()
for
place
in
places
:
self
.
check_output_with_place
(
place
,
atol
,
equal_nan
)
self
.
check_output_with_place
(
place
,
atol
,
no_check_set
,
equal_nan
)
def
check_output_customized
(
self
,
checker
):
places
=
self
.
_get_places
()
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
06180779
...
...
@@ -55,6 +55,7 @@ class TestDistRunnerBase(object):
pserver_prog
=
t
.
get_pserver_program
(
args
.
current_endpoint
)
startup_prog
=
t
.
get_startup_program
(
args
.
current_endpoint
,
pserver_prog
)
place
=
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup_prog
)
...
...
@@ -147,6 +148,8 @@ def runtime_main(test_class):
import
paddle.compat
as
cpt
import
socket
from
contextlib
import
closing
class
TestDistBase
(
unittest
.
TestCase
):
...
...
@@ -156,13 +159,19 @@ class TestDistBase(unittest.TestCase):
def
setUp
(
self
):
self
.
_trainers
=
2
self
.
_pservers
=
2
self
.
_ps_endpoints
=
"127.0.0.1:9123,127.0.0.1:9124"
self
.
_ps_endpoints
=
"127.0.0.1:%s,127.0.0.1:%s"
%
(
self
.
_find_free_port
(),
self
.
_find_free_port
())
self
.
_python_interp
=
"python"
self
.
_sync_mode
=
True
self
.
_mem_opt
=
False
self
.
_use_reduce
=
False
self
.
_setup_config
()
def
_find_free_port
(
self
):
with
closing
(
socket
.
socket
(
socket
.
AF_INET
,
socket
.
SOCK_STREAM
))
as
s
:
s
.
bind
((
''
,
0
))
return
s
.
getsockname
()[
1
]
def
start_pserver
(
self
,
model_file
,
check_error_log
):
ps0_ep
,
ps1_ep
=
self
.
_ps_endpoints
.
split
(
","
)
ps_cmd
=
"%s %s --role pserver --endpoints %s --trainer_id 0 --current_endpoint %s --trainers %d --is_dist"
...
...
python/paddle/fluid/tests/unittests/test_flatten_op.py
浏览文件 @
06180779
...
...
@@ -22,14 +22,17 @@ from op_test import OpTest
class
TestFlattenOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"flatten"
self
.
op_type
=
"flatten
2
"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
in_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
in_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
"XShape"
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/tests/unittests/test_fusion_lstm_op.py
浏览文件 @
06180779
...
...
@@ -53,12 +53,11 @@ class TestFusionLSTMOp(OpTest):
self
.
M
=
8
self
.
D
=
16
self
.
has_initial_state
=
False
self
.
use_peepholes
=
False
self
.
is_reverse
=
False
self
.
act_gate
=
'sigmoid'
self
.
act_cell
=
'tanh'
self
.
act_cand
=
'tanh'
self
.
use_peepholes
=
False
self
.
use_seq
=
False
self
.
set_conf
()
T
=
sum
(
self
.
lod
[
0
])
...
...
@@ -108,7 +107,6 @@ class TestFusionLSTMOp(OpTest):
}
self
.
attrs
=
{
'use_peepholes'
:
self
.
use_peepholes
,
'use_seq'
:
self
.
use_seq
,
'is_reverse'
:
self
.
is_reverse
,
'gate_activation'
:
self
.
act_gate
,
'cell_activation'
:
self
.
act_cell
,
...
...
@@ -178,50 +176,18 @@ class TestFusionLSTMOpPeepholesReverse(TestFusionLSTMOp):
self
.
is_reverse
=
True
class
TestFusionLSTMOpP
oopholesBS1
(
TestFusionLSTMOp
):
class
TestFusionLSTMOpP
eepholesInitReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_peepholes
=
True
self
.
lod
=
[[
3
]]
self
.
D
=
16
class
TestFusionLSTMOpSeqInit
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
has_initial_state
=
True
class
TestFusionLSTMOpSeqReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
is_reverse
=
True
class
TestFusionLSTMOpSeqInitReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
has_initial_state
=
True
self
.
is_reverse
=
True
class
TestFusionLSTMOp
SeqPeepholes
(
TestFusionLSTMOp
):
class
TestFusionLSTMOp
PeepholesBS1
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
use_peepholes
=
True
class
TestFusionLSTMOpSeqPeepholesInit
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
use_peepholes
=
True
self
.
has_initial_state
=
True
class
TestFusionLSTMOpSeqPeepholesReverse
(
TestFusionLSTMOp
):
def
set_conf
(
self
):
self
.
use_seq
=
True
self
.
use_peepholes
=
True
self
.
is_reverse
=
True
self
.
lod
=
[[
2
]]
self
.
D
=
8
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/unittests/test_parallel_executor_fetch_feed.py
浏览文件 @
06180779
...
...
@@ -85,6 +85,7 @@ class TestFetchOp(unittest.TestCase):
assert
not
math
.
isnan
(
np
.
sum
(
ret
[
i
]))
and
\
not
math
.
isinf
(
np
.
sum
(
ret
[
i
]))
@
unittest
.
skip
(
reason
=
"CI timeout"
)
def
test_fetch_op
(
self
):
tst_reader
=
paddle
.
batch
(
flowers
.
test
(
use_xmap
=
False
),
batch_size
=
16
)
tst_reader_iter
=
tst_reader
()
...
...
@@ -139,6 +140,7 @@ class TestFeedParallel(unittest.TestCase):
if
batch_id
==
2
:
break
@
unittest
.
skip
(
reason
=
"CI timeout"
)
def
test_feed_op
(
self
):
os
.
environ
[
'CPU_NUM'
]
=
str
(
4
)
if
core
.
is_compiled_with_cuda
():
...
...
python/paddle/fluid/tests/unittests/test_prelu_op.py
浏览文件 @
06180779
...
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
unittest
import
numpy
as
np
import
six
from
op_test
import
OpTest
...
...
@@ -62,17 +63,20 @@ class PReluTest(OpTest):
# TODO(minqiyang): Resume these test cases after fixing Python3 CI job issues
# class TestCase1(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "all"}
if
six
.
PY2
:
# class TestCase2(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "channel"}
class
TestCase1
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"all"
}
class
TestCase2
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"channel"
}
class
TestCase3
(
PReluTest
):
def
initTestCase
(
self
):
self
.
attrs
=
{
'mode'
:
"element"
}
# class TestCase3(PReluTest):
# def initTestCase(self):
# self.attrs = {'mode': "element"}
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_reshape_op.py
浏览文件 @
06180779
...
...
@@ -22,106 +22,39 @@ from op_test import OpTest
class
TestReshapeOp
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
25
)
new_shape
=
(
5
,
10
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer1
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
5
,
10
)
new_shape
=
(
5
,
-
1
,
5
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
attrs
[
"shape"
])}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer2
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
2
,
6
)
new_shape
=
(
2
,
0
,
3
,
-
1
)
infered_shape
=
(
2
,
2
,
3
,
-
1
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
infered_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpInplace
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
25
)
new_shape
=
(
5
,
10
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInferInplace1
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
5
,
10
)
new_shape
=
(
5
,
-
1
,
5
)
self
.
init_data
()
self
.
op_type
=
"reshape2"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
self
.
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
infered_shape
),
'XShape'
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
new_shape
)}
def
init_data
(
self
):
self
.
ori_shape
=
(
2
,
25
)
self
.
new_shape
=
(
5
,
10
)
self
.
infered_shape
=
(
5
,
10
)
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInferInplace2
(
OpTest
):
def
setUp
(
self
):
ori_shape
=
(
2
,
2
,
6
)
new_shape
=
(
2
,
0
,
3
,
-
1
)
infered_shape
=
(
2
,
2
,
3
,
-
1
)
self
.
op_type
=
"reshape"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
infered_shape
)}
class
TestReshapeOpDimInfer1
(
TestReshapeOp
):
def
init_data
(
self
):
self
.
ori_shape
=
(
5
,
10
)
self
.
new_shape
=
(
5
,
-
1
,
5
)
self
.
infered_shape
=
(
5
,
-
1
,
5
)
def
test_check_output
(
self
):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
class
TestReshapeOpDimInfer2
(
TestReshapeOp
):
def
init_data
(
self
):
self
.
ori_shape
=
(
2
,
2
,
6
)
self
.
new_shape
=
(
2
,
0
,
3
,
-
1
)
self
.
infered_shape
=
(
2
,
2
,
3
,
-
1
)
class
TestReshapeOpWithInputShape
(
OpTest
):
...
...
@@ -130,20 +63,23 @@ class TestReshapeOpWithInputShape(OpTest):
new_shape
=
(
0
,
-
1
,
5
)
actual_shape
=
(
2
,
3
,
5
)
self
.
op_type
=
"reshape"
self
.
op_type
=
"reshape
2
"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
),
"Shape"
:
np
.
array
(
actual_shape
,
dtype
=
"int32"
)
}
self
.
attrs
=
{
"shape"
:
new_shape
}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
actual_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
actual_shape
),
'XShape'
:
np
.
random
.
random
(
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
self
.
check_grad
([
"X"
],
"Out"
,
sum_outputs
=
[
"Out"
]
)
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_squeeze_op.py
浏览文件 @
06180779
...
...
@@ -23,14 +23,17 @@ from op_test import OpTest
# Correct: General.
class
TestSqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"squeeze"
self
.
op_type
=
"squeeze
2
"
self
.
init_test_case
()
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/tests/unittests/test_transpose_op.py
浏览文件 @
06180779
...
...
@@ -22,16 +22,19 @@ from op_test import OpTest
class
TestTransposeOp
(
OpTest
):
def
setUp
(
self
):
self
.
initTestCase
()
self
.
op_type
=
"transpose"
self
.
op_type
=
"transpose
2
"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
)}
self
.
outputs
=
{
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)}
self
.
outputs
=
{
'XShape'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
),
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
'XShape'
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
'X'
],
'Out'
)
self
.
check_grad
([
'X'
],
'Out'
,
sum_outputs
=
[
'Out'
]
)
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
)
...
...
python/paddle/fluid/tests/unittests/test_unsqueeze_op.py
浏览文件 @
06180779
...
...
@@ -24,13 +24,16 @@ from op_test import OpTest
class
TestUnsqueezeOp
(
OpTest
):
def
setUp
(
self
):
self
.
init_test_case
()
self
.
op_type
=
"unsqueeze"
self
.
op_type
=
"unsqueeze
2
"
self
.
inputs
=
{
"X"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)}
self
.
init_attrs
()
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
)}
self
.
outputs
=
{
"Out"
:
self
.
inputs
[
"X"
].
reshape
(
self
.
new_shape
),
"XShape"
:
np
.
random
.
random
(
self
.
ori_shape
).
astype
(
"float32"
)
}
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
(
no_check_set
=
[
"XShape"
]
)
def
test_check_grad
(
self
):
self
.
check_grad
([
"X"
],
"Out"
)
...
...
python/paddle/fluid/trainer.py
浏览文件 @
06180779
...
...
@@ -431,6 +431,28 @@ class Trainer(object):
exe
=
executor
.
Executor
(
self
.
place
)
io
.
save_persistables
(
exe
,
dirname
=
param_path
)
def
save_inference_model
(
self
,
param_path
,
feeded_var_names
,
target_var_indexes
):
"""
Save model for cpp inference into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
feeded_var_names(list(str)): The name of the vars that you
need to feed in before run program.
target_var_indexes(list(int)): the index of target var that
you need to return in trainer.train_func.
Returns:
None
"""
with
self
.
_prog_and_scope_guard
():
exe
=
executor
.
Executor
(
self
.
place
)
target_vars
=
[
self
.
train_func_outputs
[
index
]
for
index
in
target_var_indexes
]
io
.
save_inference_model
(
param_path
,
feeded_var_names
,
target_vars
,
exe
)
@
contextlib
.
contextmanager
def
_prog_and_scope_guard
(
self
):
with
framework
.
program_guard
(
...
...
python/paddle/fluid/transpiler/details/program_utils.py
浏览文件 @
06180779
...
...
@@ -153,7 +153,7 @@ def block_to_code(block, block_idx):
indent
+=
1
# sort all vars
all_vars
=
sorted
(
block
.
vars
.
iteritems
(
),
key
=
lambda
x
:
x
[
0
])
all_vars
=
sorted
(
six
.
iteritems
(
block
.
vars
),
key
=
lambda
x
:
x
[
0
])
for
var
in
all_vars
:
print
(
"{}{}"
.
format
(
get_indent_space
(
indent
),
variable_to_code
(
var
[
1
])))
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
06180779
...
...
@@ -300,7 +300,7 @@ class DistributeTranspiler(object):
input_deps
=
grad_name_to_send_dummy_out
.
values
()
program
.
global_block
().
append_op
(
type
=
"send_barrier"
,
inputs
=
{
"X"
:
input_deps
},
inputs
=
{
"X"
:
list
(
input_deps
)
},
outputs
=
{
"Out"
:
send_barrier_out
},
attrs
=
{
"endpoints"
:
pserver_endpoints
,
...
...
@@ -455,7 +455,7 @@ class DistributeTranspiler(object):
if
len
(
splited_var
)
<=
1
:
continue
# NOTE: if enable memory optimization, origin vars maybe removed.
if
startup_program
.
global_block
().
vars
.
has_key
(
varname
)
:
if
varname
in
startup_program
.
global_block
().
vars
:
orig_param
=
startup_program
.
global_block
().
vars
[
varname
]
else
:
origin_param_var
=
self
.
origin_program
.
global_block
().
vars
[
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录