Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
s920243400
PaddleDetection
提交
9e3155e0
P
PaddleDetection
项目概览
s920243400
/
PaddleDetection
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleDetection
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9e3155e0
编写于
12月 27, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into imperative_mnist
test=develop
上级
6bb84490
3e840842
变更
78
展开全部
隐藏空白更改
内联
并排
Showing
78 changed file
with
2230 addition
and
1127 deletion
+2230
-1127
Dockerfile
Dockerfile
+38
-38
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+2
-2
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+1
-1
paddle/fluid/framework/CMakeLists.txt
paddle/fluid/framework/CMakeLists.txt
+9
-4
paddle/fluid/framework/data_device_transform_test.cu
paddle/fluid/framework/data_device_transform_test.cu
+1
-0
paddle/fluid/framework/details/eager_deletion_op_handle.cc
paddle/fluid/framework/details/eager_deletion_op_handle.cc
+1
-1
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+198
-207
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+11
-8
paddle/fluid/framework/details/variable_visitor.cc
paddle/fluid/framework/details/variable_visitor.cc
+2
-2
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+1
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
+222
-0
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
+49
-0
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+0
-58
paddle/fluid/framework/ir/graph_pattern_detector.cc
paddle/fluid/framework/ir/graph_pattern_detector.cc
+72
-0
paddle/fluid/framework/ir/graph_pattern_detector.h
paddle/fluid/framework/ir/graph_pattern_detector.h
+32
-0
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
+25
-4
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+14
-17
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+6
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+2
-3
paddle/fluid/framework/scope.cc
paddle/fluid/framework/scope.cc
+1
-3
paddle/fluid/framework/var_type.h
paddle/fluid/framework/var_type.h
+20
-22
paddle/fluid/framework/var_type_inference_test.cc
paddle/fluid/framework/var_type_inference_test.cc
+1
-1
paddle/fluid/framework/var_type_traits.cc
paddle/fluid/framework/var_type_traits.cc
+119
-0
paddle/fluid/framework/var_type_traits.h
paddle/fluid/framework/var_type_traits.h
+195
-0
paddle/fluid/framework/var_type_traits_test.cc
paddle/fluid/framework/var_type_traits_test.cc
+120
-0
paddle/fluid/framework/variable.h
paddle/fluid/framework/variable.h
+35
-31
paddle/fluid/framework/variable_test.cc
paddle/fluid/framework/variable_test.cc
+12
-11
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+76
-139
paddle/fluid/imperative/layer.h
paddle/fluid/imperative/layer.h
+77
-29
paddle/fluid/imperative/tracer.h
paddle/fluid/imperative/tracer.h
+98
-42
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+1
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+2
-0
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
...id/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
+24
-4
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+7
-3
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+1
-0
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+1
-1
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+6
-3
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+10
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+12
-1
paddle/fluid/inference/api/paddle_pass_builder.h
paddle/fluid/inference/api/paddle_pass_builder.h
+7
-5
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
+1
-3
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
+9
-30
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
+11
-27
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
...le/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
+15
-61
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+12
-0
paddle/fluid/operators/clip_by_norm_op.h
paddle/fluid/operators/clip_by_norm_op.h
+1
-1
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+3
-4
paddle/fluid/operators/conv_fusion_op.cu.cc
paddle/fluid/operators/conv_fusion_op.cu.cc
+1
-3
paddle/fluid/operators/cudnn_lstm_op.cu.cc
paddle/fluid/operators/cudnn_lstm_op.cu.cc
+4
-237
paddle/fluid/operators/cudnn_rnn_cache.h
paddle/fluid/operators/cudnn_rnn_cache.h
+255
-0
paddle/fluid/operators/distributed_ops/split_ids_op.h
paddle/fluid/operators/distributed_ops/split_ids_op.h
+1
-1
paddle/fluid/operators/elementwise/elementwise_mul_op.h
paddle/fluid/operators/elementwise/elementwise_mul_op.h
+1
-1
paddle/fluid/operators/fill_constant_op.cc
paddle/fluid/operators/fill_constant_op.cc
+29
-51
paddle/fluid/operators/fill_constant_op.cu.cc
paddle/fluid/operators/fill_constant_op.cu.cc
+22
-0
paddle/fluid/operators/fill_constant_op.h
paddle/fluid/operators/fill_constant_op.h
+64
-0
paddle/fluid/operators/optimizers/adadelta_op.h
paddle/fluid/operators/optimizers/adadelta_op.h
+4
-2
paddle/fluid/operators/optimizers/adagrad_op.h
paddle/fluid/operators/optimizers/adagrad_op.h
+2
-1
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+2
-1
paddle/fluid/operators/optimizers/adamax_op.h
paddle/fluid/operators/optimizers/adamax_op.h
+4
-2
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
+4
-2
paddle/fluid/operators/optimizers/ftrl_op.h
paddle/fluid/operators/optimizers/ftrl_op.h
+4
-2
paddle/fluid/operators/optimizers/momentum_op.h
paddle/fluid/operators/optimizers/momentum_op.h
+1
-1
paddle/fluid/operators/optimizers/sgd_op.cu
paddle/fluid/operators/optimizers/sgd_op.cu
+2
-1
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+1
-1
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+1
-1
paddle/fluid/operators/sum_op.h
paddle/fluid/operators/sum_op.h
+1
-1
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+1
-4
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+3
-5
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+10
-8
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+16
-18
python/paddle/fluid/imperative/base.py
python/paddle/fluid/imperative/base.py
+1
-2
python/paddle/fluid/layer_helper.py
python/paddle/fluid/layer_helper.py
+17
-6
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+2
-2
python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
.../image_classification/test_image_classification_resnet.py
+7
-5
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+15
-0
python/paddle/fluid/tests/unittests/test_imperative.py
python/paddle/fluid/tests/unittests/test_imperative.py
+3
-1
python/paddle/fluid/tests/unittests/test_weight_decay.py
python/paddle/fluid/tests/unittests/test_weight_decay.py
+188
-0
未找到文件。
Dockerfile
浏览文件 @
9e3155e0
...
...
@@ -94,52 +94,52 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
# version(1.7.1 for now), which causes building documentation failed.
RUN
pip3
install
-U
wheel
&&
\
pip3
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.6
install
-U
wheel
&&
\
pip3.6
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.6
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.7
install
-U
wheel
&&
\
pip3.7
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.7
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
RUN
pip3
--no-cache-dir
install
-U
wheel
&&
\
pip3
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.6
--no-cache-dir
install
-U
wheel
&&
\
pip3.6
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.6
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
pip3.7
--no-cache-dir
install
-U
wheel
&&
\
pip3.7
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip3.7
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
&&
\
easy_install
-U
pip
&&
\
pip
install
-U
pip setuptools wheel
&&
\
pip
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip3
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
install
opencv-python
&&
\
pip3.6
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.6
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.6
install
opencv-python
&&
\
pip3.7
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.7
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.7
install
opencv-python
&&
\
pip
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
install
opencv-python
pip
--no-cache-dir
install
-U
pip setuptools wheel
&&
\
pip
--no-cache-dir
install
-U
docopt PyYAML
sphinx
==
1.5.6
&&
\
pip
--no-cache-dir
install
sphinx-rtd-theme
==
0.1.9 recommonmark
RUN
pip3
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3
--no-cache-dir
install
opencv-python
&&
\
pip3.6
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.6
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.6
--no-cache-dir
install
opencv-python
&&
\
pip3.7
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip3.7
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip3.7
--no-cache-dir
install
opencv-python
&&
\
pip
--no-cache-dir
install
'pre-commit==1.10.4'
'ipython==5.3.0'
&&
\
pip
--no-cache-dir
install
'ipykernel==4.6.0'
'jupyter==1.0.0'
&&
\
pip
--no-cache-dir
install
opencv-python
#For docstring checker
RUN
pip3
install
pylint pytest astroid isort
RUN
pip3.6
install
pylint pytest astroid isort
RUN
pip3.7
install
pylint pytest astroid isort
RUN
pip
install
pylint pytest astroid isort LinkChecker
RUN
pip3
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip3.6
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip3.7
--no-cache-dir
install
pylint pytest astroid isort
RUN
pip
--no-cache-dir
install
pylint pytest astroid isort LinkChecker
COPY
./python/requirements.txt /root/
RUN
pip3
install
-r
/root/requirements.txt
RUN
pip3.6
install
-r
/root/requirements.txt
RUN
pip3.7
install
-r
/root/requirements.txt
RUN
pip
install
-r
/root/requirements.txt
RUN
pip3
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip3.6
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip3.7
--no-cache-dir
install
-r
/root/requirements.txt
RUN
pip
--no-cache-dir
install
-r
/root/requirements.txt
# To fix https://github.com/PaddlePaddle/Paddle/issues/1954, we use
# the solution in https://urllib3.readthedocs.io/en/latest/user-guide.html#ssl-py2
RUN
apt-get
install
-y
libssl-dev libffi-dev
RUN
pip3
install
certifi urllib3[secure]
RUN
pip3.6
install
certifi urllib3[secure]
RUN
pip3.7
install
certifi urllib3[secure]
RUN
pip
install
certifi urllib3[secure]
RUN
apt-get
install
-y
libssl-dev libffi-dev
&&
apt-get clean
-y
RUN
pip3
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip3.6
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip3.7
--no-cache-dir
install
certifi urllib3[secure]
RUN
pip
--no-cache-dir
install
certifi urllib3[secure]
# Install woboq_codebrowser to /woboq
...
...
cmake/external/mkldnn.cmake
浏览文件 @
9e3155e0
...
...
@@ -106,10 +106,10 @@ else(WIN32)
SET
(
MKLDNN_SHARED_LIB
${
MKLDNN_INSTALL_DIR
}
/libmkldnn.so.0
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
MKLDNN_SHARED_LIB
}
COMMAND
${
CMAKE_COMMAND
}
-E copy
${
MKLDNN_LIB
}
${
MKLDNN_SHARED_LIB
}
DEPENDS mkldnn
)
DEPENDS mkldnn
shared_mkldnn
)
endif
(
WIN32
)
ADD_CUSTOM_TARGET
(
mkldnn_shared_lib ALL DEPENDS
${
MKLDNN_SHARED_LIB
}
)
ADD_DEPENDENCIES
(
mkldnn_shared_lib
${
MKLDNN_PROJECT
}
mkldnn
)
IF
(
WITH_C_API
)
INSTALL
(
FILES
${
MKLDNN_SHARED_LIB
}
DESTINATION lib
)
ENDIF
()
...
...
cmake/inference_lib.cmake
浏览文件 @
9e3155e0
...
...
@@ -136,7 +136,7 @@ if (WITH_MKLDNN)
copy
(
mkldnn_lib
SRCS
${
MKLDNN_INC_DIR
}
${
MKLDNN_SHARED_LIB
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS mkldnn
DEPS mkldnn
_shared_lib
)
endif
()
...
...
paddle/fluid/framework/CMakeLists.txt
浏览文件 @
9e3155e0
...
...
@@ -68,18 +68,23 @@ cc_library(garbage_collector SRCS garbage_collector.cc DEPS device_context memor
cc_library
(
reader SRCS reader.cc DEPS lod_tensor ddim
)
cc_test
(
reader_test SRCS reader_test.cc DEPS reader
)
cc_test
(
variable_test SRCS variable_test.cc
)
cc_library
(
threadpool SRCS threadpool.cc DEPS enforce
)
cc_test
(
threadpool_test SRCS threadpool_test.cc DEPS threadpool
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool
)
cc_library
(
var_type_traits SRCS var_type_traits DEPS lod_tensor selected_rows framework_proto
)
if
(
WITH_GPU
)
target_link_libraries
(
var_type_traits dynload_cuda
)
endif
()
cc_test
(
var_type_traits_test SRCS var_type_traits_test.cc DEPS var_type_traits
)
cc_library
(
scope SRCS scope.cc DEPS glog threadpool var_type_traits
)
cc_library
(
scope_pool SRCS scope_pool.cc DEPS scope
)
cc_test
(
scope_test SRCS scope_test.cc DEPS scope
)
cc_test
(
variable_test SRCS variable_test.cc DEPS tensor var_type_traits
)
cc_library
(
data_device_transform SRCS data_device_transform.cc DEPS tensor
)
nv_test
(
data_device_transform_test SRCS data_device_transform_test.cu
DEPS operator op_registry device_context math_function
)
DEPS operator op_registry device_context math_function
scope
)
if
(
WITH_GPU
)
if
(
WIN32
)
...
...
paddle/fluid/framework/data_device_transform_test.cu
浏览文件 @
9e3155e0
...
...
@@ -17,6 +17,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/device_context.h"
...
...
paddle/fluid/framework/details/eager_deletion_op_handle.cc
浏览文件 @
9e3155e0
...
...
@@ -88,7 +88,7 @@ void EagerDeletionOpHandle::RunImpl() {
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
9e3155e0
此差异已折叠。
点击以展开。
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
9e3155e0
...
...
@@ -45,7 +45,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
#endif
int
GetVarDeviceID
(
const
ir
::
Graph
&
graph
,
const
std
::
string
&
varname
,
const
std
::
string
&
varname
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
bool
IsScaleLossOp
(
ir
::
Node
*
node
)
const
;
...
...
@@ -57,12 +57,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
std
::
unordered_map
<
std
::
string
,
int
>
*
sharded_var_device
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainSendVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
const
;
void
CreateComputationalOps
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
size_t
num_places
)
const
;
...
...
@@ -77,7 +71,7 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
int
dev_id
)
const
;
int
GetOpDeviceID
(
const
ir
::
Graph
&
graph
,
ir
::
Node
*
node
,
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
sharded_var_device
)
const
;
void
InsertAllReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
)
const
;
...
...
@@ -100,6 +94,15 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
;
std
::
vector
<
ir
::
Node
*>
SortForReduceMode
(
const
std
::
vector
<
ir
::
Node
*>
&
)
const
;
int
GetOpDeviceID
(
ir
::
Node
*
node
,
const
std
::
unordered_map
<
std
::
string
,
int
>
&
shared_var_device
,
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
*
delay_ops
)
const
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
...
...
paddle/fluid/framework/details/variable_visitor.cc
浏览文件 @
9e3155e0
...
...
@@ -24,7 +24,7 @@ static void VisitVariable(Variable* var, Func* func) {
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
->
GetMutable
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
->
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -35,7 +35,7 @@ static void VisitVariable(const Variable& var, Func* func) {
}
else
if
(
var
.
IsType
<
SelectedRows
>
())
{
(
*
func
)(
var
.
Get
<
SelectedRows
>
());
}
else
{
PADDLE_THROW
(
"Not supported type %s"
,
var
.
Type
().
name
(
));
PADDLE_THROW
(
"Not supported type %s"
,
ToTypeName
(
var
.
Type
()
));
}
}
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
9e3155e0
...
...
@@ -119,7 +119,7 @@ static void DeleteUnusedTensors(
}
}
else
{
PADDLE_THROW
(
"Type %s of %s is not supported eager deletion"
,
var
->
Type
().
name
(
),
name
);
framework
::
ToTypeName
(
var
->
Type
()
),
name
);
}
}
}
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
9e3155e0
...
...
@@ -45,6 +45,7 @@ pass_library(is_test_pass base)
pass_library
(
conv_elementwise_add_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add2_act_fuse_pass inference
)
pass_library
(
conv_elementwise_add_fuse_pass inference
)
pass_library
(
conv_affine_channel_fuse_pass inference
)
if
(
WITH_MKLDNN
)
pass_library
(
mkldnn_placement_pass base
)
pass_library
(
depthwise_conv_mkldnn_pass base
)
...
...
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.cc
0 → 100644
浏览文件 @
9e3155e0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h"
#include <functional>
#include <string>
#include <vector>
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/operators/math/cpu_vec.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
#define GET_CONV_BN_NODES(pattern_name) \
/* OPERATORS */
\
GET_IR_NODE_FROM_SUBGRAPH(conv, conv, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(affine_channel, affine_channel, pattern_name); \
/* CONV inputs */
\
GET_IR_NODE_FROM_SUBGRAPH(conv_weight, conv_weight, pattern_name); \
/* CONV outputs */
\
GET_IR_NODE_FROM_SUBGRAPH(conv_out, conv_out, pattern_name); \
/* Affine Channel inputs */
\
GET_IR_NODE_FROM_SUBGRAPH(ac_scale, ac_scale, pattern_name); \
GET_IR_NODE_FROM_SUBGRAPH(ac_bias, ac_bias, pattern_name); \
/* Affine channel outputs */
\
GET_IR_NODE_FROM_SUBGRAPH(ac_out, ac_out, pattern_name);
/* Out */
void
recompute_bias_and_weights
(
const
Scope
*
scope
,
ir
::
Node
*
conv_weight
,
const
ir
::
Node
&
ac_scale
,
const
LoDTensor
&
ac_bias_tensor
,
LoDTensor
*
eltwise_y_in_tensor
)
{
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
1
>>
;
using
EigenMatrixArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
float
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
,
Eigen
::
RowMajor
>>
;
// Re-compute bias of conv2d from AffineChannel
PADDLE_ENFORCE_EQ
(
eltwise_y_in_tensor
->
dims
(),
ac_bias_tensor
.
dims
());
auto
*
scale_tensor
=
scope
->
FindVar
(
ac_scale
.
Name
())
->
GetMutable
<
LoDTensor
>
();
ConstEigenVectorArrayMap
scale_array
(
scale_tensor
->
data
<
float
>
(),
scale_tensor
->
numel
(),
1
);
ConstEigenVectorArrayMap
ac_bias_array
(
ac_bias_tensor
.
data
<
float
>
(),
ac_bias_tensor
.
numel
(),
1
);
EigenVectorArrayMap
eltwise_y_in_array
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
1
);
eltwise_y_in_array
=
(
eltwise_y_in_array
*
scale_array
)
+
ac_bias_array
;
// Re-compute weight of conv2d from AffineChannel
auto
*
weights
=
scope
->
FindVar
(
conv_weight
->
Name
())
->
GetMutable
<
LoDTensor
>
();
auto
weights_shape
=
weights
->
dims
();
auto
weights_shape_2d
=
flatten_to_2d
(
weights_shape
,
1
);
EigenMatrixArrayMap
weights_array_2d
(
weights
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
weights_shape_2d
[
0
],
weights_shape_2d
[
1
]);
weights_array_2d
.
colwise
()
*=
scale_array
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvAffineChannelFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvAffineChannel
conv_ac_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_ac_pattern
(
conv_input
,
false
/*with_eltwise_add*/
);
int
found_conv_ac_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvAffineChannel fuse"
;
GET_CONV_BN_NODES
(
conv_ac_pattern
);
// check if fuse can be done and if MKL-DNN should be used
FuseOptions
fuse_option
=
FindFuseOption
(
*
conv
,
*
affine_channel
);
if
(
fuse_option
==
DO_NOT_FUSE
)
{
VLOG
(
3
)
<<
"do not perform conv+affinechannel fuse"
;
return
;
}
// Create eltwise_y (conv bias) variable
VarDesc
eltwise_y_in_desc
(
patterns
::
PDNodeName
(
name_scope_
,
"eltwise_y_in"
));
eltwise_y_in_desc
.
SetPersistable
(
true
);
auto
*
eltwise_y_in_node
=
g
->
CreateVarNode
(
&
eltwise_y_in_desc
);
auto
*
eltwise_y_in_tensor
=
scope
->
Var
(
eltwise_y_in_node
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Get affine_channel bias
auto
*
ac_bias_tensor
=
scope
->
FindVar
(
ac_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Initialize eltwise_y
eltwise_y_in_tensor
->
Resize
(
ac_bias_tensor
->
dims
());
std
::
fill_n
(
eltwise_y_in_tensor
->
mutable_data
<
float
>
(
platform
::
CPUPlace
()),
eltwise_y_in_tensor
->
numel
(),
0.0
f
);
// update weights and biases
recompute_bias_and_weights
(
scope
,
conv_weight
,
*
ac_scale
,
*
ac_bias_tensor
,
eltwise_y_in_tensor
);
// create an elementwise add node.
OpDesc
desc
;
desc
.
SetInput
(
"X"
,
std
::
vector
<
std
::
string
>
({
conv_out
->
Name
()}));
desc
.
SetInput
(
"Y"
,
std
::
vector
<
std
::
string
>
({
eltwise_y_in_node
->
Name
()}));
desc
.
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
ac_out
->
Name
()}));
desc
.
SetType
(
"elementwise_add"
);
desc
.
SetAttr
(
"axis"
,
1
);
auto
eltwise_op
=
g
->
CreateOpNode
(
&
desc
);
// OpDesc will be copied.
GraphSafeRemoveNodes
(
graph
.
get
(),
{
ac_scale
,
ac_bias
,
affine_channel
});
IR_NODE_LINK_TO
(
conv_out
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_y_in_node
,
eltwise_op
);
IR_NODE_LINK_TO
(
eltwise_op
,
ac_out
);
found_conv_ac_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_ac_count
);
return
graph
;
}
std
::
unique_ptr
<
ir
::
Graph
>
ConvEltwiseAddAffineChannelFusePass
::
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
{
PADDLE_ENFORCE
(
graph
.
get
());
FusePassBase
::
Init
(
name_scope_
,
graph
.
get
());
auto
*
scope
=
param_scope
();
PADDLE_ENFORCE
(
scope
);
GraphPatternDetector
gpd
;
auto
*
conv_input
=
gpd
.
mutable_pattern
()
->
NewNode
(
patterns
::
PDNodeName
(
name_scope_
,
"conv_input"
))
->
AsInput
()
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
patterns
::
ConvAffineChannel
conv_ac_pattern
(
gpd
.
mutable_pattern
(),
name_scope_
);
conv_ac_pattern
(
conv_input
,
true
/*with_eltwise_add*/
);
int
found_conv_ac_count
=
0
;
auto
handler
=
[
&
](
const
GraphPatternDetector
::
subgraph_t
&
subgraph
,
Graph
*
g
)
{
VLOG
(
4
)
<<
"handle ConvBN fuse"
;
GET_CONV_BN_NODES
(
conv_ac_pattern
);
// OPERATORS
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise
,
eltwise
,
conv_ac_pattern
);
// BIAS inputs
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_y_in
,
eltwise_y_in
,
conv_ac_pattern
);
// BIAS outputs
GET_IR_NODE_FROM_SUBGRAPH
(
eltwise_out
,
eltwise_out
,
conv_ac_pattern
);
// Get eltwise_y (conv bias) variable
auto
*
eltwise_y_in_tensor
=
scope
->
FindVar
(
eltwise_y_in
->
Name
())
->
GetMutable
<
LoDTensor
>
();
// Get batch norm bias
auto
*
ac_bias_tensor
=
scope
->
FindVar
(
ac_bias
->
Name
())
->
GetMutable
<
LoDTensor
>
();
recompute_bias_and_weights
(
scope
,
conv_weight
,
*
ac_scale
,
*
ac_bias_tensor
,
eltwise_y_in_tensor
);
// Update the elementwise_add node
eltwise
->
Op
()
->
SetAttr
(
"axis"
,
1
);
eltwise
->
Op
()
->
SetOutput
(
"Out"
,
std
::
vector
<
std
::
string
>
({
ac_out
->
Name
()}));
GraphSafeRemoveNodes
(
graph
.
get
(),
{
ac_scale
,
ac_bias
,
affine_channel
,
eltwise_out
});
IR_NODE_LINK_TO
(
eltwise
,
ac_out
);
found_conv_ac_count
++
;
};
gpd
(
graph
.
get
(),
handler
);
AddStatis
(
found_conv_ac_count
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
conv_affine_channel_fuse_pass
,
paddle
::
framework
::
ir
::
ConvAffineChannelFusePass
);
REGISTER_PASS
(
conv_eltwiseadd_affine_channel_fuse_pass
,
paddle
::
framework
::
ir
::
ConvEltwiseAddAffineChannelFusePass
);
paddle/fluid/framework/ir/conv_affine_channel_fuse_pass.h
0 → 100644
浏览文件 @
9e3155e0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
/*
* Fuse the Conv and ConvAffineChannel.
*/
class
ConvAffineChannelFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvAffineChannelFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_affine_channel_fuse"
};
};
class
ConvEltwiseAddAffineChannelFusePass
:
public
FusePassBase
{
public:
virtual
~
ConvEltwiseAddAffineChannelFusePass
()
{}
protected:
std
::
unique_ptr
<
ir
::
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
)
const
;
const
std
::
string
name_scope_
{
"conv_eltwiseadd_affine_channel_fuse"
};
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph.cc
浏览文件 @
9e3155e0
...
...
@@ -23,66 +23,8 @@ limitations under the License. */
namespace
paddle
{
namespace
framework
{
namespace
ir
{
namespace
{
void
CheckProgram
(
const
ProgramDesc
&
program
)
{
#define _INT(role) static_cast<int>(role)
std
::
map
<
int
,
bool
>
visit
;
for
(
OpDesc
*
op
:
program
.
Block
(
0
).
AllOps
())
{
// For backward compatibility, some program doesn't have role added.
if
(
!
op
->
HasAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
continue
;
int
role_id
=
boost
::
get
<
int
>
(
op
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
visit
[
role_id
]
=
true
;
switch
(
role_id
)
{
case
_INT
(
OpRole
::
kForward
):
if
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
())
{
LOG
(
ERROR
)
<<
"Cannot add backward operator before forward operator "
<<
op
->
Type
();
}
break
;
case
_INT
(
OpRole
::
kBackward
):
case
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add backward operator %s after optimize operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kForward
)
|
_INT
(
OpRole
::
kLoss
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
)
|
_INT
(
OpRole
::
kLoss
))
==
visit
.
end
(),
"Cannot add backward|loss operator before "
"forward|loss operator %s."
,
op
->
Type
());
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kOptimize
))
==
visit
.
end
(),
"Cannot add forward|loss operator %s after optimize operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kOptimize
):
case
_INT
(
OpRole
::
kOptimize
)
|
_INT
(
OpRole
::
kLRSched
):
PADDLE_ENFORCE
(
visit
.
find
(
_INT
(
OpRole
::
kBackward
))
!=
visit
.
end
(),
"Optimize operators %s must follow backward operator."
,
op
->
Type
());
break
;
case
_INT
(
OpRole
::
kLRSched
):
case
_INT
(
OpRole
::
kDist
):
case
_INT
(
OpRole
::
kRPC
):
case
_INT
(
OpRole
::
kNotSpecified
):
break
;
default:
LOG
(
FATAL
)
<<
"Unknown operator role. Don't add new role because "
"you don't know what you are doing."
;
}
}
#undef _INT
}
}
// namespace
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
CheckProgram
(
program_
);
auto
var_nodes
=
InitFromProgram
(
program_
);
ResolveHazard
(
var_nodes
);
}
...
...
paddle/fluid/framework/ir/graph_pattern_detector.cc
浏览文件 @
9e3155e0
...
...
@@ -1234,6 +1234,78 @@ PDNode *patterns::ConvElementwiseadd::operator()(PDNode *conv_in) {
return
elementwise_add_out
;
}
PDNode
*
patterns
::
ConvAffineChannel
::
operator
()(
paddle
::
framework
::
ir
::
PDNode
*
conv_input
,
bool
with_eltwise_add
)
{
// Create Operators
conv_input
->
assert_is_op_input
(
"conv2d"
,
"Input"
);
auto
*
conv_op
=
pattern
->
NewNode
(
conv_repr
())
->
assert_is_op
(
"conv2d"
);
PDNode
*
eltwise_op
=
nullptr
;
if
(
with_eltwise_add
)
{
eltwise_op
=
pattern
->
NewNode
(
eltwise_repr
())
->
assert_is_op
(
"elementwise_add"
);
}
auto
*
affine_channel_op
=
pattern
->
NewNode
(
affine_channel_repr
())
->
assert_is_op
(
"affine_channel"
);
// Create variables
// Conv Filter
auto
*
conv_weight_var
=
pattern
->
NewNode
(
conv_weight_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"conv2d"
,
"Filter"
);
auto
*
conv_out_var
=
pattern
->
NewNode
(
conv_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"conv2d"
);
PDNode
*
eltwise_y_in_var
=
nullptr
;
PDNode
*
eltwise_out_var
=
nullptr
;
if
(
with_eltwise_add
)
{
// Conv output as Bias input
conv_out_var
->
assert_is_op_input
(
"elementwise_add"
,
"X"
);
// Bias
eltwise_y_in_var
=
pattern
->
NewNode
(
eltwise_y_in_repr
())
->
assert_is_op_input
(
"elementwise_add"
,
"Y"
)
->
AsInput
();
eltwise_out_var
=
pattern
->
NewNode
(
eltwise_out_repr
())
->
AsIntermediate
()
->
assert_is_only_output_of_op
(
"elementwise_add"
);
}
else
{
// Conv output as AffineChannel input
conv_out_var
->
assert_is_op_input
(
"affine_channel"
,
"X"
);
}
// AC Scale
auto
*
ac_scale_var
=
pattern
->
NewNode
(
ac_scale_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"affine_channel"
,
"Scale"
);
// AC Bias
auto
*
ac_bias_var
=
pattern
->
NewNode
(
ac_bias_repr
())
->
AsInput
()
->
assert_is_persistable_var
()
->
assert_is_op_input
(
"affine_channel"
,
"Bias"
);
// AC output
auto
*
ac_out_var
=
pattern
->
NewNode
(
ac_out_repr
())
->
AsOutput
()
->
assert_is_op_output
(
"affine_channel"
);
conv_op
->
LinksFrom
({
conv_input
,
conv_weight_var
}).
LinksTo
({
conv_out_var
});
if
(
with_eltwise_add
)
{
eltwise_op
->
LinksFrom
({
conv_out_var
,
eltwise_y_in_var
})
.
LinksTo
({
eltwise_out_var
});
affine_channel_op
->
LinksFrom
({
eltwise_out_var
,
ac_scale_var
,
ac_bias_var
})
.
LinksTo
({
ac_out_var
});
}
else
{
affine_channel_op
->
LinksFrom
({
conv_out_var
,
ac_scale_var
,
ac_bias_var
})
.
LinksTo
({
ac_out_var
});
}
return
ac_out_var
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/graph_pattern_detector.h
浏览文件 @
9e3155e0
...
...
@@ -734,6 +734,38 @@ struct ConvElementwiseadd : public PatternBase {
PATTERN_DECL_NODE
(
elementwise_add_out
);
};
// Conv with affine_channel
// op: conv + (elementwise_add +) affine_channel
// named nodes:
// conv_weight, conv_out, conv,
// ac_x, ac_scale, ac_bias
// affine_channel, ac_out
struct
ConvAffineChannel
:
public
PatternBase
{
ConvAffineChannel
(
PDPattern
*
pattern
,
const
std
::
string
&
name_scope
)
:
PatternBase
(
pattern
,
name_scope
,
"conv_affine_channel"
)
{}
PDNode
*
operator
()(
PDNode
*
conv_input
,
bool
with_eltwise_add
);
// declare operator node's name
PATTERN_DECL_NODE
(
conv
);
PATTERN_DECL_NODE
(
affine_channel
);
PATTERN_DECL_NODE
(
eltwise
);
// ELEMENTWISE_ADD
// CONV inputs
PATTERN_DECL_NODE
(
conv_weight
);
// Filter
// CONV outputs
PATTERN_DECL_NODE
(
conv_out
);
// tmp
// ELTWISE inputs
PATTERN_DECL_NODE
(
eltwise_y_in
);
// ELTWISE outputs
PATTERN_DECL_NODE
(
eltwise_out
);
// tmp
// AC(Affine_Channel) inputs
PATTERN_DECL_NODE
(
ac_scale
);
PATTERN_DECL_NODE
(
ac_bias
);
// AC outputs
PATTERN_DECL_NODE
(
ac_out
);
// Out
};
}
// namespace patterns
// Link two ir::Nodes from each other.
...
...
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
浏览文件 @
9e3155e0
...
...
@@ -75,6 +75,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
std
::
vector
<
Node
*>
optimize_ops
;
std
::
vector
<
Node
*>
lr_ops
;
// ops other than forward/backward/optimize
std
::
unordered_set
<
std
::
string
>
grad_names
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>
gradname2paramname
;
std
::
vector
<
ir
::
Node
*>
nodes
=
TopologySortOperations
(
*
graph
);
auto
origin_nodes
=
graph
->
ReleaseNodes
();
...
...
@@ -99,6 +100,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
auto
op_role_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op_role_var
);
for
(
size_t
i
=
0
;
i
<
op_role_vars
.
size
();
i
+=
2
)
{
grad_names
.
insert
(
op_role_vars
[
i
+
1
]);
gradname2paramname
[
op_role_vars
[
i
+
1
]]
=
op_role_vars
[
i
];
}
}
else
if
(
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kLRSched
))
{
lr_ops
.
push_back
(
node
);
...
...
@@ -109,7 +111,7 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
// 2. copy forward backward
ir
::
Node
*
prev_repeat_last_op_node
=
nullptr
;
// record origin_grad -> repeated
grad
list map.
// record origin_grad -> repeated
_grad_
list map.
std
::
map
<
ir
::
Node
*
,
std
::
vector
<
ir
::
Node
*>>
grad_repeated_map
;
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
created
;
std
::
unordered_set
<
std
::
string
>
bn_vars_need_rename
;
...
...
@@ -124,10 +126,16 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
if
(
grad_names
.
find
(
outname
)
!=
grad_names
.
end
())
{
std
::
string
new_gname
=
string
::
Sprintf
(
"%s.repeat.%d"
,
outname
,
i
);
repeated_op
.
RenameOutput
(
outname
,
new_gname
);
// remove op_role_var for backward ops that outputs grad for a
// parameter.
repeated_op
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
(),
std
::
vector
<
std
::
string
>
());
}
}
// 3.5 let batch_norm ops use independent vars, note batch_norm_grad do
// not need this update
// not need this update, because only moving mean and variance should be
// differ, trainable parameter scale and bias is the same as other
// parameters.
if
(
node
->
Name
()
==
"batch_norm"
)
{
// NOTE: assume bn op created by layers use save var as output mean and
// variance
...
...
@@ -224,16 +232,25 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
var
->
inputs
.
push_back
(
repeated_node
);
}
}
}
}
// end copy forward backward
// 5. create GRAD merge op node
// 5. create GRAD merge op node: sum(repeat.0...repeat.n) ->
// scale(1/num_repeats)
for
(
auto
kv
:
grad_repeated_map
)
{
OpDesc
sum_op
;
sum_op
.
SetType
(
"sum"
);
std
::
vector
<
std
::
string
>
repeated_grad_names
;
std
::
vector
<
std
::
string
>
param_grad_op_role_var
;
for
(
auto
r
:
kv
.
second
)
{
repeated_grad_names
.
push_back
(
r
->
Var
()
->
Name
());
}
// NOTE: use op_role_var to control allreduce op appending in
// multi_devices_graph_pass, we want to append op_role_var
// only once for the merged gradient, so break after first call.
param_grad_op_role_var
.
push_back
(
gradname2paramname
.
at
(
kv
.
first
->
Var
()
->
Name
()));
// param
param_grad_op_role_var
.
push_back
(
kv
.
first
->
Var
()
->
Name
());
// grad
sum_op
.
SetInput
(
"X"
,
repeated_grad_names
);
sum_op
.
SetOutput
(
"Out"
,
{
kv
.
first
->
Var
()
->
Name
()});
sum_op
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
...
...
@@ -256,6 +273,10 @@ std::unique_ptr<Graph> BatchMergePass::ApplyImpl(
scale_op
.
SetAttr
(
"scale"
,
static_cast
<
float
>
(
1.0
f
/
num_repeats
));
scale_op
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
static_cast
<
int
>
(
OpRole
::
kBackward
));
scale_op
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
(),
param_grad_op_role_var
);
auto
scale_op_node
=
result
.
CreateOpNode
(
&
scale_op
);
scale_op_node
->
inputs
.
push_back
(
sum_out_var_node
);
sum_out_var_node
->
outputs
.
push_back
(
scale_op_node
);
...
...
paddle/fluid/framework/operator.cc
浏览文件 @
9e3155e0
...
...
@@ -16,7 +16,6 @@ limitations under the License. */
#include <glog/logging.h>
#include <algorithm>
#include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h"
...
...
@@ -380,7 +379,7 @@ const Tensor* GetLoDTensorOrSelectedRowsValueFromVar(const Variable& var) {
return
&
(
var
.
Get
<
SelectedRows
>
().
value
());
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
.
Type
().
name
(
));
ToTypeName
(
var
.
Type
()
));
}
}
...
...
@@ -391,7 +390,7 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var) {
return
var
->
GetMutable
<
SelectedRows
>
()
->
mutable_value
();
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -485,7 +484,7 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"should be LoDTensor, but the received type is %s"
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -504,7 +503,7 @@ const std::vector<const Tensor*> ExecutionContext::LegacyMultiInput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
&
(
var
->
Get
<
LoDTensor
>
());
});
return
res
;
...
...
@@ -533,7 +532,7 @@ std::vector<Tensor*> ExecutionContext::MultiOutput<Tensor>(
PADDLE_ENFORCE
(
var
->
IsType
<
LoDTensor
>
(),
"%s should be LoDTensor, but the received type is %s"
,
sub_name
,
var
->
Type
().
name
(
));
sub_name
,
ToTypeName
(
var
->
Type
()
));
return
var
->
GetMutable
<
LoDTensor
>
();
});
return
res
;
...
...
@@ -775,7 +774,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -798,7 +797,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
var
->
Type
().
name
(
));
ToTypeName
(
var
->
Type
()
));
}
}
...
...
@@ -1041,12 +1040,11 @@ Scope* OperatorWithKernel::PrepareData(
proto
::
VarType
::
Type
OperatorWithKernel
::
IndicateDataType
(
const
ExecutionContext
&
ctx
)
const
{
auto
&
scope
=
ctx
.
scope
();
int
data_type
=
-
1
;
std
::
string
last_input_name
;
for
(
auto
&
input
:
this
->
inputs_
)
{
for
(
auto
&
ipt_name
:
input
.
second
)
{
auto
*
var
=
scope
.
FindVar
(
ipt_name
);
const
std
::
vector
<
const
Variable
*>
vars
=
ctx
.
MultiInputVar
(
input
.
first
);
for
(
size_t
i
=
0
;
i
<
vars
.
size
();
++
i
)
{
const
Variable
*
var
=
vars
[
i
];
if
(
var
!=
nullptr
)
{
const
Tensor
*
t
=
nullptr
;
if
(
var
->
IsType
<
Tensor
>
())
{
...
...
@@ -1057,15 +1055,14 @@ proto::VarType::Type OperatorWithKernel::IndicateDataType(
t
=
&
(
var
->
Get
<
SelectedRows
>
().
value
());
}
if
(
t
!=
nullptr
)
{
PADDLE_ENFORCE
(
t
->
IsInitialized
(),
"Input %s
is not initialized"
,
i
pt_name
);
PADDLE_ENFORCE
(
t
->
IsInitialized
(),
"Input %s
(%lu)
is not initialized"
,
i
nput
.
first
,
i
);
int
tmp
=
static_cast
<
int
>
(
t
->
type
());
PADDLE_ENFORCE
(
tmp
==
data_type
||
data_type
==
-
1
,
"DataType of Paddle Op %s must be the same. Get
%s(%d) != %s
(%d)"
,
Type
(),
last_input_name
,
data_type
,
ipt_nam
e
,
tmp
);
"DataType of Paddle Op %s must be the same. Get
(%d) !=
(%d)"
,
Type
(),
data_typ
e
,
tmp
);
data_type
=
tmp
;
last_input_name
=
ipt_name
;
}
}
}
...
...
paddle/fluid/framework/operator.h
浏览文件 @
9e3155e0
...
...
@@ -81,6 +81,10 @@ class RuntimeContext {
RuntimeContext
(
const
VariableNameMap
&
innames
,
const
VariableNameMap
&
outnames
,
const
Scope
&
scope
);
RuntimeContext
(
const
VariableValueMap
&
invars
,
const
VariableValueMap
&
outvars
)
:
inputs
(
invars
),
outputs
(
outvars
)
{}
VariableValueMap
inputs
;
VariableValueMap
outputs
;
};
...
...
@@ -447,8 +451,9 @@ class OperatorWithKernel : public OperatorBase {
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
RuntimeContext
&
ctx
)
const
override
;
protected:
virtual
OpKernelType
GetExpectedKernelType
(
const
ExecutionContext
&
ctx
)
const
;
protected:
virtual
OpKernelType
GetKernelTypeForVar
(
const
std
::
string
&
var_name
,
const
Tensor
&
tensor
,
const
OpKernelType
&
expected_kernel_type
)
const
;
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
9e3155e0
...
...
@@ -320,6 +320,7 @@ void ParallelExecutor::BCastParamsToDevices(
if
(
paddle
::
platform
::
is_gpu_place
(
main_tensor
.
place
()))
{
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
vector
<
void
*>
buffers
;
buffers
.
reserve
(
member_
->
places_
.
size
());
size_t
numel
=
main_tensor
.
numel
();
ncclDataType_t
data_type
=
platform
::
ToNCCLDataType
(
main_tensor
.
type
());
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
...
...
@@ -353,9 +354,7 @@ void ParallelExecutor::BCastParamsToDevices(
#endif
}
else
{
platform
::
CPUPlace
cpu
;
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
if
(
i
==
0
)
continue
;
for
(
size_t
i
=
1
;
i
<
member_
->
places_
.
size
();
++
i
)
{
auto
local_scope
=
member_
->
local_scopes_
[
i
];
auto
*
t
=
local_scope
->
Var
(
var
)
->
GetMutable
<
LoDTensor
>
();
...
...
paddle/fluid/framework/scope.cc
浏览文件 @
9e3155e0
...
...
@@ -165,11 +165,9 @@ std::string Scope::Rename(const std::string& origin_name) const {
Variable
*
Scope
::
VarInternal
(
const
std
::
string
&
name
)
{
auto
*
v
=
FindVarLocally
(
name
);
if
(
v
!=
nullptr
)
return
v
;
v
=
new
Variable
();
vars_
[
name
].
reset
(
v
);
vars_
.
emplace
(
name
,
std
::
unique_ptr
<
Variable
>
(
v
)
);
VLOG
(
3
)
<<
"Create variable "
<<
name
;
v
->
name_
=
&
(
vars_
.
find
(
name
)
->
first
);
return
v
;
}
...
...
paddle/fluid/framework/var_type.h
浏览文件 @
9e3155e0
...
...
@@ -19,52 +19,50 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/variable.h"
namespace
paddle
{
namespace
framework
{
template
<
typename
T
>
inline
bool
IsType
(
const
std
::
type_index
&
type
_index
)
{
return
type
_index
==
std
::
type_index
(
typeid
(
T
)
);
inline
bool
IsType
(
const
std
::
type_index
&
type
)
{
return
type
==
typeid
(
T
);
}
inline
proto
::
VarType
::
Type
ToVarType
(
std
::
type_index
type
)
{
if
(
IsType
<
LoDTensor
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR
;
}
else
if
(
IsType
<
LoDRankTable
>
(
type
))
{
return
proto
::
VarType_Type_LOD_RANK_TABLE
;
}
else
if
(
IsType
<
LoDTensorArray
>
(
type
))
{
return
proto
::
VarType_Type_LOD_TENSOR_ARRAY
;
}
else
if
(
IsType
<
SelectedRows
>
(
type
))
{
return
proto
::
VarType_Type_SELECTED_ROWS
;
}
else
if
(
IsType
<
ReaderHolder
>
(
type
))
{
return
proto
::
VarType_Type_READER
;
}
else
{
PADDLE_THROW
(
"ToVarType:Unsupported type %s"
,
type
.
name
());
inline
proto
::
VarType
::
Type
ToVarType
(
int
type
)
{
switch
(
type
)
{
case
proto
::
VarType
::
LOD_TENSOR
:
case
proto
::
VarType
::
SELECTED_ROWS
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
READER
:
return
static_cast
<
proto
::
VarType
::
Type
>
(
type
);
default:
PADDLE_THROW
(
"ToVarType:Unsupported type %d"
,
type
);
}
}
template
<
typename
Visitor
>
inline
void
VisitVarType
(
const
framework
::
Variable
&
var
,
Visitor
visitor
)
{
switch
(
ToVarType
(
var
.
Type
()
))
{
case
proto
::
VarType
_Type_
LOD_TENSOR
:
switch
(
var
.
Type
(
))
{
case
proto
::
VarType
::
LOD_TENSOR
:
visitor
(
var
.
Get
<
LoDTensor
>
());
return
;
case
proto
::
VarType
_Type_
LOD_RANK_TABLE
:
case
proto
::
VarType
::
LOD_RANK_TABLE
:
visitor
(
var
.
Get
<
LoDRankTable
>
());
return
;
case
proto
::
VarType
_Type_
LOD_TENSOR_ARRAY
:
case
proto
::
VarType
::
LOD_TENSOR_ARRAY
:
visitor
(
var
.
Get
<
LoDTensorArray
>
());
return
;
case
proto
::
VarType
_Type_
SELECTED_ROWS
:
case
proto
::
VarType
::
SELECTED_ROWS
:
visitor
(
var
.
Get
<
SelectedRows
>
());
return
;
case
proto
::
VarType
_Type_
READER
:
case
proto
::
VarType
::
READER
:
visitor
(
var
.
Get
<
ReaderHolder
>
());
return
;
default:
PADDLE_THROW
(
"Not supported visit type, %
d"
,
ToVarTyp
e
(
var
.
Type
()));
PADDLE_THROW
(
"Not supported visit type, %
s"
,
ToTypeNam
e
(
var
.
Type
()));
}
}
...
...
paddle/fluid/framework/var_type_inference_test.cc
浏览文件 @
9e3155e0
...
...
@@ -108,7 +108,7 @@ TEST(InferVarType, sum_op_without_infer_var_type) {
op
->
InferVarType
(
prog
.
MutableBlock
(
0
));
ASSERT_EQ
(
proto
::
VarType
_Type_
LOD_TENSOR
,
ASSERT_EQ
(
proto
::
VarType
::
LOD_TENSOR
,
prog
.
MutableBlock
(
0
)
->
Var
(
"test2_out"
)
->
GetType
());
}
...
...
paddle/fluid/framework/var_type_traits.cc
0 → 100644
浏览文件 @
9e3155e0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#include "paddle/fluid/platform/macros.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include <cudnn.h>
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
// Besides registering variable type id, it is helpful to register a
// var_id -> std::type_index map (for example, get type names according to id)
namespace
detail
{
template
<
int
kStart
,
int
kEnd
,
bool
kStop
>
struct
VarIdToTypeIndexMapInitializerImpl
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
id_to_type
,
MapType2
*
type_to_id
)
{
using
Type
=
typename
std
::
tuple_element
<
kStart
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
!
std
::
is_same
<
Type
,
void
>::
value
,
"Type cannot be void"
);
constexpr
int
kId
=
VarTypeTrait
<
Type
>::
kId
;
auto
type
=
std
::
type_index
(
typeid
(
Type
));
PADDLE_ENFORCE
(
id_to_type
->
count
(
kId
)
==
0
,
"Registered duplicate type id %d for type %s"
,
kId
,
type
.
name
());
PADDLE_ENFORCE
(
type_to_id
->
count
(
type
)
==
0
,
"Registered duplicate type_index %s for id %d"
,
type
.
name
(),
kId
);
id_to_type
->
emplace
(
kId
,
type
);
type_to_id
->
emplace
(
type
,
kId
);
VarIdToTypeIndexMapInitializerImpl
<
kStart
+
1
,
kEnd
,
kStart
+
1
==
kEnd
>::
Init
(
id_to_type
,
type_to_id
);
}
};
template
<
int
kStart
,
int
kEnd
>
struct
VarIdToTypeIndexMapInitializerImpl
<
kStart
,
kEnd
,
true
>
{
template
<
typename
MapType1
,
typename
MapType2
>
static
void
Init
(
MapType1
*
,
MapType2
*
)
{}
};
// VarIdToTypeIndexMapInitializer is designed to initialize var_id ->
// std::type_index map and std::type_index -> var_id map
using
VarIdToTypeIndexMapInitializer
=
VarIdToTypeIndexMapInitializerImpl
<
0
,
VarTypeRegistry
::
kRegisteredTypeNum
,
VarTypeRegistry
::
kRegisteredTypeNum
==
0
>
;
struct
VarIdToTypeIndexMapHolder
{
DISABLE_COPY_AND_ASSIGN
(
VarIdToTypeIndexMapHolder
);
public:
static
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
auto
it
=
Instance
().
id_to_type_map_
.
find
(
var_id
);
PADDLE_ENFORCE
(
it
!=
Instance
().
id_to_type_map_
.
end
(),
"VarId %d is not registered."
,
var_id
);
return
it
->
second
;
}
static
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
auto
it
=
Instance
().
type_to_id_map_
.
find
(
type
);
PADDLE_ENFORCE
(
it
!=
Instance
().
type_to_id_map_
.
end
(),
"VarType %s is not registered."
,
type
.
name
());
return
it
->
second
;
}
private:
VarIdToTypeIndexMapHolder
()
{
VarIdToTypeIndexMapInitializer
::
Init
(
&
id_to_type_map_
,
&
type_to_id_map_
);
}
static
const
VarIdToTypeIndexMapHolder
&
Instance
()
{
static
const
VarIdToTypeIndexMapHolder
instance
;
return
instance
;
}
std
::
unordered_map
<
int
,
std
::
type_index
>
id_to_type_map_
;
std
::
unordered_map
<
std
::
type_index
,
int
>
type_to_id_map_
;
};
}
// namespace detail
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeIndex
(
var_id
);
}
const
char
*
ToTypeName
(
int
var_id
)
{
return
ToTypeIndex
(
var_id
).
name
();
}
int
ToTypeId
(
const
std
::
type_index
&
type
)
{
return
detail
::
VarIdToTypeIndexMapHolder
::
ToTypeId
(
type
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits.h
0 → 100644
浏览文件 @
9e3155e0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <tuple>
#include <typeindex>
#include <vector>
#include "paddle/fluid/framework/framework.pb.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/platform/place.h"
#ifdef PADDLE_WITH_CUDA
#include <cudnn.h>
#ifndef _WIN32
#include <nccl.h>
#endif
#endif
// Users should add forward declarations here
namespace
paddle
{
namespace
platform
{
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
class
Communicator
;
#endif
#endif
}
// namespace platform
namespace
framework
{
class
Tensor
;
class
LoDTensor
;
class
SelectedRows
;
class
LoDRankTable
;
class
ReaderHolder
;
class
Scope
;
}
// namespace framework
namespace
operators
{
template
<
typename
T
>
class
AlgorithmsCache
;
class
CudnnRNNCache
;
namespace
reader
{
class
LoDTensorBlockingQueueHolder
;
}
// namespace reader
}
// namespace operators
}
// namespace paddle
namespace
paddle
{
namespace
framework
{
const
char
*
ToTypeName
(
int
var_id
);
const
std
::
type_index
&
ToTypeIndex
(
int
var_id
);
int
ToTypeId
(
const
std
::
type_index
&
type
);
namespace
detail
{
template
<
bool
kStop
,
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
,
typename
...
Args
>
struct
TypePosFinderImpl
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
TypePosFinderImpl
<
kStart
+
2
==
kEnd
,
kStart
+
1
,
kEnd
,
T1
,
Args
...
>::
kPos
;
};
template
<
int
kStart
,
int
kEnd
,
typename
T1
,
typename
T2
>
struct
TypePosFinderImpl
<
true
,
kStart
,
kEnd
,
T1
,
T2
>
{
static
constexpr
int
kPos
=
std
::
is_same
<
T1
,
T2
>::
value
?
kStart
:
-
1
;
};
// TypePosFinder helps to find the position in which T is inside Args...
// If T is not inside Args..., kPos would be -1
template
<
typename
T
,
typename
...
Args
>
struct
TypePosFinder
{
static
constexpr
int
kPos
=
TypePosFinderImpl
<
sizeof
...(
Args
)
==
1
,
0
,
sizeof
...(
Args
),
T
,
Args
...
>::
kPos
;
};
template
<
typename
...
Args
>
struct
VarTypeRegistryImpl
{
static
constexpr
size_t
kRegisteredTypeNum
=
sizeof
...(
Args
);
using
ArgTuple
=
std
::
tuple
<
Args
...
>
;
// TypePos() returns the position in which T is inside Args...
// If T is not inside Args..., return -1
template
<
typename
T
>
static
constexpr
int
TypePos
()
{
return
TypePosFinder
<
T
,
Args
...
>::
kPos
;
}
// IsRegistered() returns whether T is registered inside RegistryImpl
template
<
typename
T
>
static
constexpr
bool
IsRegistered
()
{
return
TypePos
<
T
>
()
>=
0
;
}
};
}
// namespace detail
#define REG_PROTO_VAR_TYPE_TRAIT(type, proto_id) \
template <> \
struct VarTypeTrait<type> { \
static_assert(VarTypeRegistry::IsRegistered<type>(), \
"Must be registered type"); \
using Type = type; \
static constexpr int kId = static_cast<int>(proto_id); \
}
/**
* The following codes are designed to register variable types.
* Only registered types can be stored in Variable.
* This registry mechanism is designed to speed up Variable.
*
* Caution: If you want to add more var types, please consider carefully
* whether you really need to add it.
*/
// Users should add other variable types below.
// Paddle would generate unique Ids for each registered variable types.
using
VarTypeRegistry
=
detail
::
VarTypeRegistryImpl
<
Tensor
,
LoDTensor
,
SelectedRows
,
std
::
vector
<
Scope
*>
,
LoDRankTable
,
LoDTensorArray
,
platform
::
PlaceList
,
ReaderHolder
,
std
::
string
,
Scope
*
,
std
::
map
<
size_t
,
Tensor
>
,
operators
::
reader
::
LoDTensorBlockingQueueHolder
,
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
ncclUniqueId
,
platform
::
Communicator
,
#endif
operators
::
AlgorithmsCache
<
cudnnConvolutionFwdAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdDataAlgo_t
>
,
operators
::
AlgorithmsCache
<
cudnnConvolutionBwdFilterAlgo_t
>
,
operators
::
CudnnRNNCache
,
#endif
int
,
float
>
;
template
<
typename
T
>
struct
VarTypeTrait
{
static_assert
(
VarTypeRegistry
::
IsRegistered
<
T
>
(),
"Must be registered type"
);
using
Type
=
T
;
/**
* Unique VarType Id generation.
*
* The auto-generated id should not be the same as any protobuf id defined in
* framework.proto. Therefore, we generate id by adding the type pos and
* maximum protobuf id (i.e., proto::VarType::TUPLE).
*
* However, we may need more protobuf id in the future.
* To avoid changing this auto id generation algorithm frequently, we
* generate id by adding the type pos and twice of maximum protobuf id (i.e.,
* proto::VarType::TUPLE).
*/
static
constexpr
int
kId
=
VarTypeRegistry
::
TypePos
<
T
>
()
+
static_cast
<
int
>
(
proto
::
VarType
::
TUPLE
)
*
2
;
};
// Users should set some of variable type ids to be what is defined in
// framework.proto below
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensor
,
proto
::
VarType
::
LOD_TENSOR
);
REG_PROTO_VAR_TYPE_TRAIT
(
SelectedRows
,
proto
::
VarType
::
SELECTED_ROWS
);
REG_PROTO_VAR_TYPE_TRAIT
(
std
::
vector
<
Scope
*>
,
proto
::
VarType
::
STEP_SCOPES
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDRankTable
,
proto
::
VarType
::
LOD_RANK_TABLE
);
REG_PROTO_VAR_TYPE_TRAIT
(
LoDTensorArray
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
REG_PROTO_VAR_TYPE_TRAIT
(
platform
::
PlaceList
,
proto
::
VarType
::
PLACE_LIST
);
REG_PROTO_VAR_TYPE_TRAIT
(
ReaderHolder
,
proto
::
VarType
::
READER
);
REG_PROTO_VAR_TYPE_TRAIT
(
int
,
proto
::
VarType
::
INT32
);
REG_PROTO_VAR_TYPE_TRAIT
(
float
,
proto
::
VarType
::
FP32
);
/** End of variable type registration */
template
<
typename
T
>
inline
constexpr
bool
IsRegisteredVarType
()
{
return
VarTypeRegistry
::
IsRegistered
<
T
>
();
}
#undef REG_PROTO_VAR_TYPE_TRAIT
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/var_type_traits_test.cc
0 → 100644
浏览文件 @
9e3155e0
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <gtest/gtest.h>
#include <cstdint>
#include <iostream>
#include <unordered_set>
#include "paddle/fluid/framework/lod_rank_table.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type_traits.h"
#include "paddle/fluid/operators/reader/lod_tensor_blocking_queue.h"
#ifdef PADDLE_WITH_CUDA
#ifndef _WIN32
#include "paddle/fluid/operators/nccl/nccl_gpu_common.h"
#endif
#include "paddle/fluid/operators/conv_cudnn_op_cache.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#endif
namespace
paddle
{
namespace
framework
{
template
<
int
kPos
,
int
kEnd
,
bool
kStop
>
struct
TypeIndexChecker
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
var_id_set
,
SetType2
*
type_index_set
)
{
using
Type
=
typename
std
::
tuple_element
<
kPos
,
VarTypeRegistry
::
ArgTuple
>::
type
;
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
Type
>::
Type
,
Type
>::
value
,
"Type must be the same"
);
constexpr
auto
kId
=
VarTypeTrait
<
Type
>::
kId
;
std
::
type_index
actual_type
(
typeid
(
Type
));
EXPECT_EQ
(
std
::
string
(
ToTypeName
(
kId
)),
std
::
string
(
actual_type
.
name
()));
EXPECT_EQ
(
ToTypeIndex
(
kId
),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
actual_type
),
kId
);
EXPECT_EQ
(
ToTypeIndex
(
ToTypeId
(
actual_type
)),
actual_type
);
EXPECT_EQ
(
ToTypeId
(
ToTypeIndex
(
kId
)),
kId
);
EXPECT_TRUE
(
var_id_set
->
count
(
kId
)
==
0
);
// NOLINT
EXPECT_TRUE
(
type_index_set
->
count
(
actual_type
)
==
0
);
// NOLINT
var_id_set
->
insert
(
kId
);
type_index_set
->
insert
(
std
::
type_index
(
typeid
(
Type
)));
TypeIndexChecker
<
kPos
+
1
,
kEnd
,
kPos
+
1
==
kEnd
>::
Check
(
var_id_set
,
type_index_set
);
}
};
template
<
int
kPos
,
int
kEnd
>
struct
TypeIndexChecker
<
kPos
,
kEnd
,
true
>
{
template
<
typename
SetType1
,
typename
SetType2
>
static
void
Check
(
SetType1
*
,
SetType2
*
)
{}
};
TEST
(
var_type_traits
,
check_no_duplicate_registry
)
{
constexpr
size_t
kRegisteredNum
=
VarTypeRegistry
::
kRegisteredTypeNum
;
std
::
unordered_set
<
int
>
var_id_set
;
std
::
unordered_set
<
std
::
type_index
>
type_index_set
;
TypeIndexChecker
<
0
,
kRegisteredNum
,
kRegisteredNum
==
0
>::
Check
(
&
var_id_set
,
&
type_index_set
);
}
template
<
typename
T
>
bool
CheckVarId
(
int
proto_id
)
{
static_assert
(
std
::
is_same
<
typename
VarTypeTrait
<
T
>::
Type
,
T
>::
value
,
"Type must be the same"
);
return
VarTypeTrait
<
T
>::
kId
==
proto_id
;
}
TEST
(
var_type_traits
,
check_proto_type_id
)
{
ASSERT_TRUE
(
CheckVarId
<
LoDTensor
>
(
proto
::
VarType
::
LOD_TENSOR
));
ASSERT_TRUE
(
CheckVarId
<
SelectedRows
>
(
proto
::
VarType
::
SELECTED_ROWS
));
ASSERT_TRUE
(
CheckVarId
<
std
::
vector
<
Scope
*>>
(
proto
::
VarType
::
STEP_SCOPES
));
ASSERT_TRUE
(
CheckVarId
<
LoDRankTable
>
(
proto
::
VarType
::
LOD_RANK_TABLE
));
ASSERT_TRUE
(
CheckVarId
<
LoDTensorArray
>
(
proto
::
VarType
::
LOD_TENSOR_ARRAY
));
ASSERT_TRUE
(
CheckVarId
<
platform
::
PlaceList
>
(
proto
::
VarType
::
PLACE_LIST
));
ASSERT_TRUE
(
CheckVarId
<
ReaderHolder
>
(
proto
::
VarType
::
READER
));
ASSERT_TRUE
(
CheckVarId
<
int
>
(
proto
::
VarType
::
INT32
));
ASSERT_TRUE
(
CheckVarId
<
float
>
(
proto
::
VarType
::
FP32
));
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR
,
proto
::
VarType
::
LOD_TENSOR
);
ASSERT_EQ
(
proto
::
VarType_Type_SELECTED_ROWS
,
proto
::
VarType
::
SELECTED_ROWS
);
ASSERT_EQ
(
proto
::
VarType_Type_STEP_SCOPES
,
proto
::
VarType
::
STEP_SCOPES
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_RANK_TABLE
,
proto
::
VarType
::
LOD_RANK_TABLE
);
ASSERT_EQ
(
proto
::
VarType_Type_LOD_TENSOR_ARRAY
,
proto
::
VarType
::
LOD_TENSOR_ARRAY
);
ASSERT_EQ
(
proto
::
VarType_Type_PLACE_LIST
,
proto
::
VarType
::
PLACE_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_READER
,
proto
::
VarType
::
READER
);
ASSERT_EQ
(
proto
::
VarType_Type_FEED_MINIBATCH
,
proto
::
VarType
::
FEED_MINIBATCH
);
ASSERT_EQ
(
proto
::
VarType_Type_FETCH_LIST
,
proto
::
VarType
::
FETCH_LIST
);
ASSERT_EQ
(
proto
::
VarType_Type_RAW
,
proto
::
VarType
::
RAW
);
ASSERT_EQ
(
proto
::
VarType_Type_TUPLE
,
proto
::
VarType
::
TUPLE
);
ASSERT_EQ
(
proto
::
VarType_Type_INT32
,
proto
::
VarType
::
INT32
);
ASSERT_EQ
(
proto
::
VarType_Type_FP32
,
proto
::
VarType
::
FP32
);
}
TEST
(
var_type_traits
,
test_registry
)
{
using
Registry
=
detail
::
VarTypeRegistryImpl
<
int8_t
,
int32_t
,
size_t
,
double
>
;
ASSERT_TRUE
(
Registry
::
TypePos
<
int8_t
>
()
==
0
);
ASSERT_TRUE
(
Registry
::
TypePos
<
int32_t
>
()
==
1
);
ASSERT_TRUE
(
Registry
::
TypePos
<
size_t
>
()
==
2
);
ASSERT_TRUE
(
Registry
::
TypePos
<
double
>
()
==
3
);
ASSERT_TRUE
(
Registry
::
TypePos
<
float
>
()
==
-
1
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/variable.h
浏览文件 @
9e3155e0
...
...
@@ -18,7 +18,7 @@
#include <typeindex>
#include <typeinfo>
#include "paddle/fluid/
platform/enforce
.h"
#include "paddle/fluid/
framework/var_type_traits
.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -27,10 +27,14 @@ class Variable {
public:
template
<
typename
T
>
const
T
&
Get
()
const
{
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Variable must hold some thing"
);
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
return
*
static_cast
<
const
T
*>
(
holder_
->
Ptr
());
}
...
...
@@ -39,61 +43,61 @@ class Variable {
template
<
typename
T
>
T
*
GetMutable
()
{
if
(
!
holder_
)
{
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
(
new
T
()
));
holder_
.
reset
(
new
PlaceholderImpl
<
T
>
());
}
else
{
PADDLE_ENFORCE
(
IsType
<
T
>
()
,
PADDLE_ENFORCE
(
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
,
"Variable must be type %s, the holding type is %s"
,
typeid
(
T
).
name
(),
holder_
->
Type
().
name
());
ToTypeName
(
VarTypeTrait
<
T
>::
kId
),
ToTypeName
(
holder_
->
Type
()));
}
return
static_cast
<
T
*>
(
holder_
->
Ptr
());
}
template
<
typename
T
>
bool
IsType
()
const
{
return
holder_
!=
nullptr
&&
std
::
type_index
(
typeid
(
T
))
==
std
::
type_index
(
holder_
->
Type
());
return
holder_
&&
holder_
->
Type
()
==
VarTypeTrait
<
T
>::
kId
;
}
void
Clear
()
{
holder_
.
reset
();
}
std
::
type_index
Type
()
const
{
int
Type
()
const
{
PADDLE_ENFORCE
(
holder_
!=
nullptr
,
"Must hold memory"
);
return
holder_
->
Type
();
}
private:
struct
Placeholder
{
virtual
~
Placeholder
()
{}
virtual
const
std
::
type_info
&
Type
()
const
=
0
;
virtual
void
*
Ptr
()
const
=
0
;
virtual
~
Placeholder
()
=
default
;
inline
int
Type
()
const
{
return
type_
;
}
inline
const
void
*
Ptr
()
const
{
return
ptr_
;
}
inline
void
*
Ptr
()
{
return
ptr_
;
}
protected:
inline
void
Init
(
void
*
p
,
int
type
)
{
ptr_
=
p
;
type_
=
type
;
}
void
*
ptr_
;
int
type_
;
};
// Placeholder hides type T, so it doesn't appear as a template
// parameter of Variable.
template
<
typename
T
>
struct
PlaceholderImpl
:
public
Placeholder
{
explicit
PlaceholderImpl
(
T
*
ptr
)
:
ptr_
(
ptr
),
type_
(
typeid
(
T
))
{}
virtual
const
std
::
type_info
&
Type
()
const
{
return
type_
;
}
virtual
void
*
Ptr
()
const
{
return
static_cast
<
void
*>
(
ptr_
.
get
()
);
}
static_assert
(
IsRegisteredVarType
<
T
>
(),
"Not registered type. Please register T inside var_type_traits.h"
);
PlaceholderImpl
()
{
this
->
Init
(
&
obj_
,
VarTypeTrait
<
T
>::
kId
);
}
std
::
unique_ptr
<
T
>
ptr_
;
const
std
::
type_info
&
type
_
;
private:
T
obj
_
;
};
std
::
unique_ptr
<
Placeholder
>
holder_
;
// pointers to a PlaceholderImpl object indeed.
// name_ is only meaningful with a Scope and accessible by it.
//
// NOTE: Please don't expose name_ by adding methods like
// Variable::Name or Scope::VarName! A variable could have a human
// readable name or an auto-generated scope-unique name. In the
// former case, the caller knows the name and doesn't need to access
// the name; in the latter case, the variable should be identified
// by its address but not the unreadable name.
friend
class
Scope
;
const
std
::
string
*
name_
;
// pointers to a PlaceholderImpl object indeed.
std
::
unique_ptr
<
Placeholder
>
holder_
;
};
}
// namespace framework
...
...
paddle/fluid/framework/variable_test.cc
浏览文件 @
9e3155e0
...
...
@@ -16,27 +16,28 @@
#include <string>
#include "gtest/gtest.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/framework/variable.h"
TEST
(
Variable
,
GetMutable
)
{
using
paddle
::
framework
::
Variable
;
struct
Tensor
{
int
content_
;
};
namespace
paddle
{
namespace
framework
{
TEST
(
Variable
,
GetMutable
)
{
std
::
unique_ptr
<
Variable
>
v
(
new
Variable
());
Tensor
*
t
=
v
->
GetMutable
<
Tensor
>
();
t
->
content_
=
1234
;
auto
*
t
=
v
->
GetMutable
<
std
::
string
>
();
*
t
=
"1234"
;
const
Tensor
&
tt
=
v
->
Get
<
Tensor
>
();
EXPECT_EQ
(
1234
,
tt
.
content_
);
const
auto
&
tt
=
v
->
Get
<
std
::
string
>
();
EXPECT_EQ
(
"1234"
,
tt
);
try
{
v
->
GetMutable
<
std
::
string
>
();
v
->
GetMutable
<
Tensor
>
();
}
catch
(
std
::
exception
&
e
)
{
return
;
}
EXPECT_TRUE
(
false
);
}
}
// namespace framework
}
// namespace paddle
paddle/fluid/imperative/layer.cc
浏览文件 @
9e3155e0
...
...
@@ -42,12 +42,12 @@ void AddTo(Variable* src, Variable* dst) {
class
Autograd
{
public:
explicit
Autograd
(
framework
::
Scope
*
scope
)
:
scope_
(
scope
)
{}
Autograd
(
)
{}
void
RunBackward
(
VarBase
*
var
)
{
PADDLE_ENFORCE
(
var
->
pre_op_
->
op_desc_
);
// TODO(panyx0718): Only create for vars that "require_grad"
(
*
var
->
pre_op_
->
output_vars_
)[
var
->
pre_op_out_idx_
]
->
grads_
=
var
->
grads_
;
if
(
var
->
stop_gradient_
)
{
return
;
}
std
::
deque
<
OpBase
*>
ready
;
ready
.
push_back
(
var
->
pre_op_
);
...
...
@@ -57,21 +57,25 @@ class Autograd {
while
(
!
ready
.
empty
())
{
OpBase
*
ready_op
=
ready
.
front
();
ready
.
pop_front
();
std
::
vector
<
Variable
*>
input_grads
=
ready_op
->
ApplyGrad
(
scope_
);
for
(
size_t
i
=
0
;
i
<
input_grads
.
size
();
++
i
)
{
if
(
!
input_grads
[
i
])
continue
;
if
(
ready_op
->
input_vars_
->
at
(
i
)
->
stop_gradient_
)
{
continue
;
}
OpBase
*
pre_op
=
ready_op
->
pre_ops_
->
at
(
i
);
if
(
!
pre_op
)
continue
;
dep_counts
[
pre_op
]
-=
1
;
PADDLE_ENFORCE
(
dep_counts
[
pre_op
]
>=
0
);
bool
pre_op_ready
=
dep_counts
[
pre_op
]
==
0
;
if
(
pre_op_ready
)
{
ready
.
push_back
(
pre_op
);
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
input_grads
=
ready_op
->
ApplyGrad
();
for
(
auto
it
:
input_grads
)
{
const
std
::
vector
<
VarBase
*>&
ingrads
=
it
.
second
;
for
(
size_t
i
=
0
;
i
<
ingrads
.
size
();
++
i
)
{
if
(
!
ingrads
[
i
])
continue
;
if
(
ready_op
->
input_vars_
[
it
.
first
][
i
]
->
stop_gradient_
)
{
continue
;
}
OpBase
*
pre_op
=
ready_op
->
pre_ops_
[
it
.
first
][
i
];
if
(
!
pre_op
)
continue
;
dep_counts
[
pre_op
]
-=
1
;
PADDLE_ENFORCE
(
dep_counts
[
pre_op
]
>=
0
);
bool
pre_op_ready
=
dep_counts
[
pre_op
]
==
0
;
if
(
pre_op_ready
)
{
ready
.
push_back
(
pre_op
);
}
}
}
}
...
...
@@ -88,92 +92,41 @@ class Autograd {
while
(
!
queue
.
empty
())
{
OpBase
*
candidate
=
queue
.
front
();
queue
.
pop_front
();
for
(
OpBase
*
pre_op
:
*
(
candidate
->
pre_ops_
))
{
if
(
!
pre_op
)
continue
;
if
(
visited
.
find
(
pre_op
)
==
visited
.
end
())
{
visited
.
insert
(
pre_op
);
queue
.
push_back
(
pre_op
);
for
(
auto
it
:
candidate
->
pre_ops_
)
{
for
(
OpBase
*
pre_op
:
it
.
second
)
{
if
(
!
pre_op
)
continue
;
if
(
visited
.
find
(
pre_op
)
==
visited
.
end
())
{
visited
.
insert
(
pre_op
);
queue
.
push_back
(
pre_op
);
}
ret
[
pre_op
]
+=
1
;
}
ret
[
pre_op
]
+=
1
;
}
}
return
ret
;
}
framework
::
Scope
*
scope_
;
};
framework
::
Variable
*
CreateVariable
(
const
std
::
string
&
name
,
const
framework
::
DDim
&
dim
,
float
val
,
framework
::
Scope
*
scope
,
bool
random_name
=
false
)
{
std
::
string
varname
=
name
;
if
(
random_name
)
{
std
::
mt19937
rng
;
rng
.
seed
(
std
::
random_device
()());
std
::
uniform_int_distribution
<
std
::
mt19937
::
result_type
>
dist6
(
1
,
std
::
numeric_limits
<
int
>::
max
());
int
id
=
dist6
(
rng
);
varname
=
string
::
Sprintf
(
"%s@%d"
,
varname
,
id
);
}
VLOG
(
3
)
<<
"creating var "
<<
varname
;
framework
::
Variable
*
var
=
scope
->
Var
(
varname
);
framework
::
LoDTensor
*
tensor
=
var
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
data
=
tensor
->
mutable_data
<
float
>
(
dim
,
platform
::
CPUPlace
());
std
::
fill
(
data
,
data
+
tensor
->
numel
(),
val
);
return
var
;
}
framework
::
LoDTensor
&
VarBase
::
Grad
()
{
VLOG
(
3
)
<<
"get var grad "
<<
var_desc_
->
Name
();
return
*
grads_
->
GetMutable
<
framework
::
LoDTensor
>
();
}
void
VarBase
::
ApplyGrad
(
framework
::
Scope
*
scope
,
Variable
*
grad
)
{
PADDLE_ENFORCE
(
grad
->
IsInitialized
(),
"grad %s must be initialized"
,
var_desc_
->
Name
());
PADDLE_ENFORCE
(
grad
->
Get
<
framework
::
LoDTensor
>
().
IsInitialized
(),
"variable %s has NO gradient, please set stop_gradient to it"
,
var_desc_
->
Name
());
VLOG
(
3
)
<<
"apply var grad "
<<
var_desc_
->
Name
()
<<
" "
<<
grad
->
Get
<
framework
::
LoDTensor
>
().
data
<
float
>
()[
0
];
if
(
!
grads_
)
{
grads_
=
CreateVariable
(
string
::
Sprintf
(
"%s@IGrad"
,
var_desc_
->
Name
()),
var_
->
Get
<
framework
::
LoDTensor
>
().
dims
(),
0.0
,
scope
);
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
OpBase
::
ApplyGrad
()
{
if
(
!
grad_op_desc_
)
{
VLOG
(
3
)
<<
"op with no grad: "
<<
op_desc_
->
Type
();
return
{};
}
AddTo
(
grad
,
grads_
);
VLOG
(
3
)
<<
"grad_ after apply var grad "
<<
var_desc_
->
Name
()
<<
" "
<<
grads_
->
Get
<
framework
::
LoDTensor
>
().
data
<
float
>
()[
0
];
}
std
::
vector
<
Variable
*>
OpBase
::
ApplyGrad
(
framework
::
Scope
*
scope
)
{
VLOG
(
3
)
<<
"op grad type: "
<<
grad_op_desc_
->
Type
();
for
(
const
std
::
string
&
grad_invar
:
grad_op_desc_
->
InputArgumentNames
())
{
if
(
grad_to_var_
->
find
(
grad_invar
)
==
grad_to_var_
->
end
())
{
// grad op inputs can be forward inputs, so not in grad_to_var.
continue
;
}
VLOG
(
3
)
<<
"op grad input var "
<<
grad_invar
;
framework
::
VarDesc
&
grad_invar_desc
=
block_
->
FindRecursiveOrCreateVar
(
grad_invar
);
framework
::
Variable
*
var
=
scope
->
Var
(
grad_invar
);
const
std
::
string
&
invar
=
grad_to_var_
->
at
(
grad_invar
);
for
(
VarBase
*
varbase
:
*
output_vars_
)
{
// Use the accumulated grads_ by sharing the input with grads_.
if
(
varbase
->
var_desc_
->
Name
()
==
invar
)
{
var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
ShareDataWith
(
varbase
->
grads_
->
Get
<
framework
::
LoDTensor
>
());
break
;
}
VLOG
(
3
)
<<
"op grad "
<<
grad_op_desc_
->
Type
();
std
::
vector
<
std
::
unique_ptr
<
framework
::
Variable
>>
tmp_vars
;
std
::
map
<
std
::
string
,
std
::
vector
<
framework
::
Variable
*>>
grad_outputs
;
for
(
auto
it
:
grad_output_vars_
)
{
auto
&
outputs
=
grad_outputs
[
it
.
first
];
for
(
size_t
i
=
0
;
i
<
it
.
second
.
size
();
++
i
)
{
tmp_vars
.
emplace_back
(
new
framework
::
Variable
());
outputs
.
push_back
(
tmp_vars
.
back
().
get
());
outputs
.
back
()
->
GetMutable
<
framework
::
LoDTensor
>
();
}
grad_invar_desc
.
SetShape
(
framework
::
vectorize
(
var
->
Get
<
framework
::
LoDTensor
>
().
dims
()));
...
...
@@ -182,62 +135,46 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
<<
framework
::
vectorize
(
var
->
Get
<
framework
::
LoDTensor
>
().
dims
()).
size
();
}
for
(
const
std
::
string
&
outvar
:
grad_op_desc_
->
OutputArgumentNames
())
{
VLOG
(
3
)
<<
"op grad output var "
<<
outvar
;
block_
->
FindRecursiveOrCreateVar
(
outvar
);
framework
::
Variable
*
var
=
scope
->
Var
(
outvar
);
if
(
!
var
->
IsInitialized
())
{
VLOG
(
3
)
<<
"init op grad output var "
<<
outvar
;
framework
::
VarDesc
*
var_desc
=
block_
->
FindVar
(
outvar
);
if
(
var_desc
->
GetType
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
var
->
GetMutable
<
framework
::
LoDTensor
>
();
}
else
{
LOG
(
ERROR
)
<<
"tracer doesn't support yet"
;
}
}
}
framework
::
RuntimeContext
ctx
(
grad_input_vars_
,
grad_outputs
);
grad_op_desc_
->
InferShape
(
*
block_
);
// No need to do static infer shape here.
// grad_op_desc_->InferShape(*block_);
grad_op_desc_
->
InferVarType
(
block_
);
std
::
unique_ptr
<
framework
::
OperatorBase
>
opbase
=
framework
::
OpRegistry
::
CreateOp
(
*
grad_op_desc_
);
opbase
->
Run
(
*
scope
,
platform
::
CPUPlace
());
// `ret` matches exactly with `input_vars_` of forward op.
std
::
vector
<
Variable
*>
ret
;
for
(
size_t
i
=
0
;
i
<
input_vars_
->
size
();
++
i
)
{
bool
found
=
false
;
VarBase
*
origin_var
=
(
*
input_vars_
)[
i
];
for
(
const
std
::
string
&
outvar
:
grad_op_desc_
->
OutputArgumentNames
())
{
Variable
*
var
=
scope
->
FindVar
(
outvar
);
std
::
string
orig_var_name
=
grad_to_var_
->
at
(
outvar
);
if
(
origin_var
->
var_desc_
->
Name
()
!=
orig_var_name
||
origin_var
->
stop_gradient_
)
{
continue
;
}
VLOG
(
3
)
<<
"apply grad "
<<
outvar
<<
" with origin "
<<
orig_var_name
;
origin_var
->
ApplyGrad
(
scope
,
var
);
found
=
true
;
ret
.
push_back
(
var
);
// TODO(panyx0718): There might be another outvar with the same name.
// In that case, it doesn't matter the first one or the second one is
// used.
break
;
}
if
(
!
found
)
{
ret
.
push_back
(
nullptr
);
framework
::
OperatorWithKernel
*
op_kernel
=
dynamic_cast
<
framework
::
OperatorWithKernel
*>
(
opbase
.
get
());
PADDLE_ENFORCE_NOT_NULL
(
op_kernel
,
"only support op with kernel"
);
framework
::
Scope
scope
;
platform
::
CPUPlace
place
;
PreparedOp
p
=
PreparedOp
::
Prepare
(
ctx
,
*
op_kernel
,
place
);
p
.
op
.
RuntimeInferShape
(
scope
,
place
,
ctx
);
p
.
func
(
framework
::
ExecutionContext
(
p
.
op
,
scope
,
*
p
.
dev_ctx
,
p
.
ctx
));
for
(
auto
it
:
grad_output_vars_
)
{
auto
&
outputs
=
grad_outputs
[
it
.
first
];
auto
&
origin_outputs
=
it
.
second
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
framework
::
Variable
*
orig_grad
=
origin_outputs
[
i
];
AddTo
(
outputs
[
i
],
orig_grad
);
}
}
return
ret
;
return
input_vars_
;
}
void
VarBase
::
RunBackward
(
framework
::
Scope
*
scope
)
{
grads_
=
CreateVariable
(
framework
::
GradVarName
(
var_desc_
->
Name
()),
var_
->
Get
<
framework
::
LoDTensor
>
().
dims
(),
1.0
,
scope
,
false
);
void
VarBase
::
RunBackward
()
{
if
(
!
pre_op_
)
return
;
Autograd
(
scope
).
RunBackward
(
this
);
auto
grads_t
=
grads_
->
GetMutable
<
framework
::
LoDTensor
>
();
float
*
data
=
grads_t
->
mutable_data
<
float
>
(
platform
::
CPUPlace
());
std
::
fill
(
data
,
data
+
grads_t
->
numel
(),
1.0
);
PADDLE_ENFORCE
(
grads_
==
pre_op_
->
output_vars_
[
pre_op_out_name_
][
pre_op_out_idx_
]
->
grads_
);
Autograd
().
RunBackward
(
this
);
}
}
// namespace imperative
...
...
paddle/fluid/imperative/layer.h
浏览文件 @
9e3155e0
...
...
@@ -14,17 +14,69 @@
#pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/var_desc.h"
#include "paddle/fluid/platform/enforce.h"
namespace
paddle
{
namespace
imperative
{
class
PreparedOp
{
public:
PreparedOp
(
const
framework
::
OperatorBase
&
op
,
const
framework
::
RuntimeContext
&
ctx
,
framework
::
OperatorWithKernel
::
OpKernelFunc
func
,
platform
::
DeviceContext
*
dev_ctx
)
:
op
(
op
),
ctx
(
ctx
),
func
(
func
),
dev_ctx
(
dev_ctx
)
{}
static
PreparedOp
Prepare
(
const
framework
::
RuntimeContext
&
ctx
,
const
framework
::
OperatorWithKernel
&
op
,
const
platform
::
Place
&
place
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
// check if op[type] has kernel registered.
auto
&
all_op_kernels
=
op
.
AllOpKernels
();
auto
kernels_iter
=
all_op_kernels
.
find
(
op
.
Type
());
if
(
kernels_iter
==
all_op_kernels
.
end
())
{
PADDLE_THROW
(
"There are no kernels which are registered in the %s operator."
,
op
.
Type
());
}
framework
::
OperatorWithKernel
::
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
auto
expected_kernel_key
=
op
.
GetExpectedKernelType
(
framework
::
ExecutionContext
(
op
,
framework
::
Scope
(),
*
dev_ctx
,
ctx
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
#ifdef PADDLE_WITH_MKLDNN
// workaround for missing MKLDNN kernel when FLAGS_use_mkldnn env var is set
if
(
kernel_iter
==
kernels
.
end
()
&&
expected_kernel_key
.
library_type_
==
framework
::
LibraryType
::
kMKLDNN
)
{
VLOG
(
3
)
<<
"missing MKLDNN kernel: fallbacking to PLAIN one"
;
expected_kernel_key
.
library_type_
=
framework
::
LibraryType
::
kPlain
;
expected_kernel_key
.
data_layout_
=
framework
::
DataLayout
::
kAnyLayout
;
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
}
#endif
if
(
kernel_iter
==
kernels
.
end
())
{
PADDLE_THROW
(
"op %s does not have kernel for %s"
,
op
.
Type
(),
KernelTypeToString
(
expected_kernel_key
));
}
return
PreparedOp
(
op
,
ctx
,
kernel_iter
->
second
,
dev_ctx
);
}
const
framework
::
OperatorBase
&
op
;
const
framework
::
RuntimeContext
&
ctx
;
framework
::
OperatorWithKernel
::
OpKernelFunc
func
;
platform
::
DeviceContext
*
dev_ctx
;
};
class
OpBase
;
class
VarBase
{
...
...
@@ -33,15 +85,22 @@ class VarBase {
:
pre_op_
(
nullptr
),
pre_op_out_idx_
(
-
1
),
var_desc_
(
nullptr
),
var_
(
n
ullptr
),
grads_
(
n
ullptr
),
var_
(
n
ew
framework
::
Variable
()
),
grads_
(
n
ew
framework
::
Variable
()
),
stop_gradient_
(
stop_gradient
)
{}
virtual
~
VarBase
()
{}
void
ApplyGrad
(
framework
::
Scope
*
scope
,
framework
::
Variable
*
grad
);
virtual
~
VarBase
()
{
if
(
var_
)
{
delete
var_
;
var_
=
nullptr
;
}
if
(
grads_
)
{
delete
grads_
;
grads_
=
nullptr
;
}
}
void
RunBackward
(
framework
::
Scope
*
scope
);
void
RunBackward
();
framework
::
LoDTensor
&
Grad
();
...
...
@@ -55,6 +114,7 @@ class VarBase {
}
OpBase
*
pre_op_
;
std
::
string
pre_op_out_name_
;
int
pre_op_out_idx_
;
framework
::
VarDesc
*
var_desc_
;
...
...
@@ -66,36 +126,24 @@ class VarBase {
class
OpBase
{
public:
OpBase
()
:
input_vars_
(
new
std
::
vector
<
VarBase
*>
()),
output_vars_
(
new
std
::
vector
<
VarBase
*>
()),
pre_ops_
(
new
std
::
vector
<
OpBase
*>
()),
pre_ops_out_idx_
(
new
std
::
vector
<
int
>
()),
op_desc_
(
nullptr
),
grad_op_desc_
(
nullptr
),
grad_to_var_
(
nullptr
)
{}
OpBase
()
:
op_desc_
(
nullptr
),
grad_op_desc_
(
nullptr
)
{}
virtual
~
OpBase
()
{
delete
input_vars_
;
delete
output_vars_
;
delete
pre_ops_
;
delete
pre_ops_out_idx_
;
if
(
grad_op_desc_
)
delete
grad_op_desc_
;
if
(
grad_to_var_
)
delete
grad_to_var_
;
}
std
::
vector
<
framework
::
Variable
*>
ApplyGrad
(
framework
::
Scope
*
scope
);
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
ApplyGrad
(
);
std
::
vector
<
VarBase
*>*
input_vars_
;
std
::
vector
<
VarBase
*>*
output_vars_
;
std
::
vector
<
OpBase
*>*
pre_ops_
;
std
::
vector
<
int
>*
pre_ops_out_idx_
;
framework
::
OpDesc
*
op_desc_
;
framework
::
OpDesc
*
grad_op_desc_
;
std
::
unordered_map
<
std
::
string
,
std
::
string
>*
grad_to_var_
;
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
input_vars_
;
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>
output_vars_
;
std
::
map
<
std
::
string
,
std
::
vector
<
OpBase
*>>
pre_ops_
;
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
pre_ops_out_idx_
;
std
::
map
<
std
::
string
,
std
::
vector
<
framework
::
Variable
*>>
grad_input_vars_
;
std
::
map
<
std
::
string
,
std
::
vector
<
framework
::
Variable
*>>
grad_output_vars_
;
framework
::
BlockDesc
*
block_
;
};
...
...
paddle/fluid/imperative/tracer.h
浏览文件 @
9e3155e0
...
...
@@ -20,7 +20,6 @@
#include "paddle/fluid/framework/op_desc.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/imperative/engine.h"
#include "paddle/fluid/imperative/layer.h"
...
...
@@ -41,16 +40,26 @@ void CreateGradOp(const framework::OpDesc& op_desc,
*
grad_op_desc
=
grad_op_descs
[
0
].
release
();
}
void
InitVar
(
framework
::
Variable
*
var
,
framework
::
Variable
*
grad_var
)
{
auto
&
var_t
=
var
->
Get
<
framework
::
LoDTensor
>
();
float
*
data
=
grad_var
->
GetMutable
<
framework
::
LoDTensor
>
()
->
mutable_data
<
float
>
(
var_t
.
dims
(),
platform
::
CPUPlace
());
std
::
fill
(
data
,
data
+
var_t
.
numel
(),
0.0
);
}
class
Tracer
{
public:
explicit
Tracer
(
framework
::
BlockDesc
*
root_block
)
:
root_scope_
(
new
framework
::
Scope
())
{}
explicit
Tracer
(
framework
::
BlockDesc
*
root_block
)
:
root_block_
(
root_block
)
{}
virtual
~
Tracer
()
{}
void
Trace
(
OpBase
*
op
,
const
std
::
vector
<
VarBase
*>&
inputs
,
const
std
::
vector
<
VarBase
*>&
outputs
,
framework
::
BlockDesc
*
block
,
const
bool
stop_gradient
)
{
void
Trace
(
OpBase
*
op
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>&
inputs
,
const
std
::
map
<
std
::
string
,
std
::
vector
<
VarBase
*>>&
outputs
,
framework
::
BlockDesc
*
block
)
{
std
::
map
<
std
::
string
,
VarBase
*>
vars
;
framework
::
OpDesc
*
op_desc
=
op
->
op_desc_
;
VLOG
(
3
)
<<
"tracer tracing "
<<
op_desc
->
Type
();
op_desc
->
InferShape
(
*
block
);
...
...
@@ -58,65 +67,112 @@ class Tracer {
std
::
unique_ptr
<
framework
::
OperatorBase
>
op_base
=
framework
::
OpRegistry
::
CreateOp
(
*
op_desc
);
*
op
->
input_vars_
=
inputs
;
for
(
VarBase
*
input
:
inputs
)
{
const
std
::
string
vname
=
input
->
var_desc_
->
Name
();
framework
::
Variable
*
var
=
root_scope_
->
Var
(
vname
);
input
->
var_
=
var
;
if
(
!
var
->
IsInitialized
())
{
framework
::
VarDesc
*
var_desc
=
block
->
FindVar
(
vname
);
if
(
var_desc
->
GetType
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
var
->
GetMutable
<
framework
::
LoDTensor
>
();
framework
::
VariableValueMap
invars_map
;
framework
::
VariableValueMap
outvars_map
;
op
->
input_vars_
=
inputs
;
for
(
auto
it
:
op
->
input_vars_
)
{
auto
&
invars
=
invars_map
[
it
.
first
];
for
(
VarBase
*
inp
:
it
.
second
)
{
PADDLE_ENFORCE_NOT_NULL
(
inp
->
var_
,
"op %s input %s nullptr"
,
op
->
op_desc_
->
Type
(),
inp
->
var_desc_
->
Name
());
invars
.
push_back
(
inp
->
var_
);
vars
[
inp
->
var_desc_
->
Name
()]
=
inp
;
if
(
inp
->
pre_op_
)
{
op
->
pre_ops_
[
it
.
first
].
push_back
(
inp
->
pre_op_
);
op
->
pre_ops_out_idx_
[
it
.
first
].
push_back
(
inp
->
pre_op_out_idx_
);
}
else
{
LOG
(
ERROR
)
<<
"tracer doesn't support yet"
;
op
->
pre_ops_
[
it
.
first
].
push_back
(
nullptr
)
;
}
VLOG
(
3
)
<<
"input vname "
<<
inp
->
var_desc_
->
Name
()
<<
" "
<<
inp
->
var_
->
IsInitialized
();
}
if
(
input
->
pre_op_
)
{
op
->
pre_ops_
->
push_back
(
input
->
pre_op_
);
op
->
pre_ops_out_idx_
->
push_back
(
input
->
pre_op_out_idx_
);
}
else
{
op
->
pre_ops_
->
push_back
(
nullptr
);
}
VLOG
(
3
)
<<
"input vname "
<<
vname
<<
" "
<<
var
->
Get
<
framework
::
LoDTensor
>
().
dims
().
size
();
}
*
op
->
output_vars_
=
outputs
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
const
std
::
string
vname
=
outputs
[
i
]
->
var_desc_
->
Name
();
framework
::
Variable
*
var
=
root_scope_
->
Var
(
vname
);
if
(
!
var
->
IsInitialized
())
{
framework
::
VarDesc
*
var_desc
=
block
->
FindVar
(
vname
);
op
->
output_vars_
=
outputs
;
for
(
auto
it
:
op
->
output_vars_
)
{
auto
&
outvars
=
outvars_map
[
it
.
first
];
const
std
::
vector
<
VarBase
*>&
outputs
=
it
.
second
;
for
(
size_t
i
=
0
;
i
<
outputs
.
size
();
++
i
)
{
VarBase
*
out
=
outputs
[
i
];
outvars
.
push_back
(
out
->
var_
);
vars
[
out
->
var_desc_
->
Name
()]
=
out
;
framework
::
VarDesc
*
var_desc
=
block
->
FindVar
(
out
->
var_desc_
->
Name
());
if
(
var_desc
->
GetType
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
var
->
GetMutable
<
framework
::
LoDTensor
>
();
out
->
var_
->
GetMutable
<
framework
::
LoDTensor
>
();
}
else
{
LOG
(
ERROR
)
<<
"tracer doesn't support yet"
;
}
}
out
->
stop_gradient_
=
stop_gradient
;
out
->
pre_op_
=
op
;
out
->
pre_op_out_name_
=
it
.
first
;
out
->
pre_op_out_idx_
=
i
;
outputs
[
i
]
->
stop_gradient_
=
stop_gradient
;
outputs
[
i
]
->
var_
=
var
;
outputs
[
i
]
->
pre_op_
=
op
;
outputs
[
i
]
->
pre_op_out_idx_
=
i
;
VLOG
(
3
)
<<
"output vname "
<<
out
->
var_desc_
->
Name
()
<<
" "
<<
out
->
var_
->
IsInitialized
();
}
}
VLOG
(
3
)
<<
"tracer running "
<<
op_desc
->
Type
();
op_base
->
Run
(
*
root_scope_
,
platform
::
CPUPlace
());
framework
::
RuntimeContext
ctx
(
invars_map
,
outvars_map
);
// TODO(panyx0718): Cache p.
framework
::
OperatorWithKernel
*
op_kernel
=
dynamic_cast
<
framework
::
OperatorWithKernel
*>
(
op_base
.
get
());
PADDLE_ENFORCE_NOT_NULL
(
op_kernel
,
"only support op with kernel"
);
framework
::
Scope
scope
;
platform
::
CPUPlace
place
;
PreparedOp
p
=
PreparedOp
::
Prepare
(
ctx
,
*
op_kernel
,
place
);
p
.
op
.
RuntimeInferShape
(
scope
,
place
,
ctx
);
p
.
func
(
framework
::
ExecutionContext
(
p
.
op
,
scope
,
*
p
.
dev_ctx
,
p
.
ctx
));
if
(
!
stop_gradient
)
{
framework
::
OpDesc
*
grad_op_desc
;
auto
grad_to_var
=
new
std
::
unordered_map
<
std
::
string
,
std
::
string
>
();
CreateGradOp
(
*
op_desc
,
{},
{
block
},
&
grad_op_desc
,
grad_to_var
);
op
->
grad_op_desc_
=
grad_op_desc
;
op
->
grad_to_var_
=
grad_to_var
;
VLOG
(
3
)
<<
"tracer create grad op "
<<
grad_op_desc
->
Type
();
for
(
auto
it
:
grad_op_desc
->
Inputs
())
{
auto
&
grad_in_vars
=
op
->
grad_input_vars_
[
it
.
first
];
for
(
const
std
::
string
&
grad_invar
:
it
.
second
)
{
block
->
FindRecursiveOrCreateVar
(
grad_invar
);
auto
var_it
=
grad_to_var
->
find
(
grad_invar
);
if
(
var_it
==
grad_to_var
->
end
())
{
auto
fwd_var_it
=
vars
.
find
(
grad_invar
);
PADDLE_ENFORCE
(
fwd_var_it
!=
vars
.
end
());
grad_in_vars
.
push_back
(
fwd_var_it
->
second
->
var_
);
}
else
{
VarBase
*
var
=
vars
[
var_it
->
second
];
if
(
!
var
->
grads_
->
IsInitialized
())
{
InitVar
(
var
->
var_
,
var
->
grads_
);
}
grad_in_vars
.
push_back
(
var
->
grads_
);
}
}
}
for
(
auto
it
:
grad_op_desc
->
Outputs
())
{
auto
&
grad_out_vars
=
op
->
grad_output_vars_
[
it
.
first
];
for
(
const
std
::
string
&
grad_outvar
:
it
.
second
)
{
block
->
FindRecursiveOrCreateVar
(
grad_outvar
);
auto
var_it
=
grad_to_var
->
find
(
grad_outvar
);
PADDLE_ENFORCE
(
var_it
!=
grad_to_var
->
end
());
VarBase
*
var
=
vars
[
var_it
->
second
];
if
(
!
var
->
grads_
->
IsInitialized
())
{
InitVar
(
var
->
var_
,
var
->
grads_
);
}
grad_out_vars
.
push_back
(
var
->
grads_
);
}
}
}
op
->
block_
=
block
;
}
framework
::
Scope
*
GetScope
()
{
return
root_scope_
.
get
();
}
private:
std
::
unique_ptr
<
framework
::
Scope
>
root_scope
_
;
framework
::
BlockDesc
*
root_block
_
;
};
}
// namespace imperative
...
...
paddle/fluid/inference/analysis/argument.h
浏览文件 @
9e3155e0
...
...
@@ -127,6 +127,7 @@ struct Argument {
std
::
function
<
bool
(
const
framework
::
ir
::
Node
*
)
>
);
DECL_ARGUMENT_FIELD
(
tensorrt_max_batch_size
,
TensorRtMaxBatchSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_workspace_size
,
TensorRtWorkspaceSize
,
int
);
DECL_ARGUMENT_FIELD
(
tensorrt_min_subgraph_size
,
TensorRtMinSubgraphSize
,
int
);
// The program transformed by IR analysis phase.
DECL_ARGUMENT_UNIQUE_FIELD
(
ir_analyzed_program
,
IrAnalyzedProgram
,
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
9e3155e0
...
...
@@ -75,6 +75,8 @@ void IRPassManager::CreatePasses(Argument *argument,
argument
->
tensorrt_node_teller_ptr
());
pass
->
Set
(
"workspace_size"
,
new
int
(
argument
->
tensorrt_workspace_size
()));
pass
->
Set
(
"max_batch_size"
,
new
int
(
argument
->
tensorrt_max_batch_size
()));
pass
->
Set
(
"min_subgraph_size"
,
new
int
(
argument
->
tensorrt_min_subgraph_size
()));
}
// graph_ = pass->Apply(std::move(graph_));
...
...
paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.cc
浏览文件 @
9e3155e0
...
...
@@ -12,12 +12,14 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include
"paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
#include
<algorithm>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_pattern_detector.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/analysis/ir_passes/subgraph_detector.h"
#include "paddle/fluid/inference/analysis/ir_passes/tensorrt_subgraph_pass.h"
namespace
paddle
{
namespace
inference
{
...
...
@@ -36,7 +38,8 @@ std::unique_ptr<framework::ir::Graph> analysis::TensorRtSubgraphPass::ApplyImpl(
auto
teller
=
Get
<
SubgraphDetector
::
NodeInsideSubgraphTeller
>
(
"tensorrt_node_teller"
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
2
/*min subgraph size*/
);
SubGraphFuser
fuser
(
graph
.
get
(),
teller
,
Get
<
int
>
(
"min_subgraph_size"
)
/*min subgraph size*/
);
fuser
();
for
(
auto
*
node
:
graph
->
Nodes
())
{
...
...
@@ -197,10 +200,26 @@ void TensorRtSubgraphPass::CreateTensorRTOp(framework::ir::Node *node,
std
::
vector
<
std
::
string
>
ExtractParameters
(
const
std
::
unordered_set
<
Node
*>
&
nodes
)
{
// We can judge whether a variable is a parameter by
// its presistable property, but sometimes the presistable
// of the feed op output is true, so we have to identify it.
std
::
vector
<
std
::
string
>
feed_outputs
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsOp
())
continue
;
std
::
string
op_type
=
node
->
Op
()
->
Type
();
if
(
op_type
==
"feed"
)
{
std
::
vector
<
std
::
string
>
output_names
=
node
->
Op
()
->
OutputArgumentNames
();
std
::
copy
(
output_names
.
begin
(),
output_names
.
end
(),
std
::
back_inserter
(
feed_outputs
));
}
}
std
::
vector
<
std
::
string
>
parameters
;
for
(
const
auto
&
node
:
nodes
)
{
if
(
!
node
->
IsVar
())
continue
;
if
(
node
->
Var
()
->
Persistable
())
{
if
(
node
->
Var
()
->
Persistable
()
&&
std
::
find
(
feed_outputs
.
begin
(),
feed_outputs
.
end
(),
node
->
Name
())
==
feed_outputs
.
end
())
{
parameters
.
push_back
(
node
->
Name
());
}
}
...
...
@@ -215,4 +234,5 @@ REGISTER_PASS(tensorrt_subgraph_pass,
paddle
::
inference
::
analysis
::
TensorRtSubgraphPass
)
.
RequirePassAttr
(
"tensorrt_node_teller"
)
.
RequirePassAttr
(
"max_batch_size"
)
.
RequirePassAttr
(
"workspace_size"
);
.
RequirePassAttr
(
"workspace_size"
)
.
RequirePassAttr
(
"min_subgraph_size"
);
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
9e3155e0
...
...
@@ -57,6 +57,7 @@ contrib::AnalysisConfig::AnalysisConfig(const contrib::AnalysisConfig &other) {
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
if
(
use_gpu
)
{
...
...
@@ -89,6 +90,7 @@ contrib::AnalysisConfig::AnalysisConfig(contrib::AnalysisConfig &&other) {
use_tensorrt_
=
other
.
use_tensorrt_
;
tensorrt_max_batchsize_
=
other
.
tensorrt_max_batchsize_
;
tensorrt_workspace_size_
=
other
.
tensorrt_workspace_size_
;
tensorrt_min_subgraph_size_
=
other
.
tensorrt_min_subgraph_size_
;
model_from_memory_
=
other
.
model_from_memory_
;
pass_builder_
=
std
::
move
(
other
.
pass_builder_
);
...
...
@@ -105,12 +107,14 @@ void contrib::AnalysisConfig::EnableMKLDNN() {
}
void
contrib
::
AnalysisConfig
::
EnableTensorRtEngine
(
int
workspace_size
,
int
max_batch_size
)
{
int
max_batch_size
,
int
min_subgraph_size
)
{
use_tensorrt_
=
true
;
tensorrt_workspace_size_
=
workspace_size
;
tensorrt_max_batchsize_
=
max_batch_size
;
// Append after the infer_clean pass.
pass_builder
()
->
InsertPass
(
1
,
"tensorrt_subgraph_pass"
);
tensorrt_min_subgraph_size_
=
min_subgraph_size
;
// Append after the conv+affine_channel fuse pass.
pass_builder
()
->
InsertPass
(
3
,
"tensorrt_subgraph_pass"
);
}
void
contrib
::
AnalysisConfig
::
SetModelBuffer
(
const
char
*
prog_buffer
,
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
9e3155e0
...
...
@@ -328,6 +328,7 @@ void AnalysisPredictor::OptimizeInferenceProgram() {
argument_
.
SetUseTensorRT
(
true
);
argument_
.
SetTensorRtWorkspaceSize
(
config_
.
tensorrt_workspace_size_
);
argument_
.
SetTensorRtMaxBatchSize
(
config_
.
tensorrt_max_batchsize_
);
argument_
.
SetTensorRtMinSubgraphSize
(
config_
.
tensorrt_min_subgraph_size_
);
}
if
(
config_
.
use_mkldnn_
)
{
...
...
paddle/fluid/inference/api/details/reset_tensor_array.cc
浏览文件 @
9e3155e0
...
...
@@ -25,7 +25,7 @@ void TensorArrayBatchCleaner::CollectTensorArrays(framework::Scope *scope) {
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
if
(
var
->
IsType
<
framework
::
LoDTensorArray
>
(
))
{
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
...
...
paddle/fluid/inference/api/details/reset_tensor_array.h
浏览文件 @
9e3155e0
...
...
@@ -27,8 +27,11 @@ namespace details {
// training phase.
struct
TensorArrayBatchCleaner
{
TensorArrayBatchCleaner
()
{
valid_types_
.
insert
(
typeid
(
framework
::
Tensor
));
valid_types_
.
insert
(
typeid
(
framework
::
LoDTensor
));
constexpr
auto
kTensorId
=
framework
::
VarTypeTrait
<
framework
::
Tensor
>::
kId
;
constexpr
auto
kLoDTensorId
=
framework
::
VarTypeTrait
<
framework
::
LoDTensor
>::
kId
;
valid_types_
.
insert
(
kTensorId
);
valid_types_
.
insert
(
kLoDTensorId
);
}
// Collect the variables that are not Tensor or LoDTensor, and reset them to a
// bool(trick), because some of them are containers, and some operators just
...
...
@@ -46,7 +49,7 @@ struct TensorArrayBatchCleaner {
bool
no_tensor_flag_
{
true
};
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
std
::
unordered_set
<
std
::
type_index
>
valid_types_
;
std
::
unordered_set
<
int
>
valid_types_
;
std
::
unordered_set
<
framework
::
Variable
*>
no_tensor_vars_
;
};
...
...
paddle/fluid/inference/api/helper.h
浏览文件 @
9e3155e0
...
...
@@ -113,6 +113,16 @@ static void TensorAssignData(PaddleTensor *tensor,
}
}
template
<
typename
T
>
static
void
TensorAssignData
(
PaddleTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
,
const
std
::
vector
<
size_t
>
&
lod
)
{
int
size
=
lod
[
lod
.
size
()
-
1
];
tensor
->
shape
.
assign
({
size
,
1
});
tensor
->
lod
.
assign
({
lod
});
TensorAssignData
(
tensor
,
data
);
}
template
<
typename
T
>
static
int
ZeroCopyTensorAssignData
(
ZeroCopyTensor
*
tensor
,
const
std
::
vector
<
std
::
vector
<
T
>>
&
data
)
{
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
9e3155e0
...
...
@@ -49,7 +49,7 @@ struct AnalysisConfig : public NativeConfig {
bool
use_feed_fetch_ops
{
true
};
void
EnableTensorRtEngine
(
int
workspace_size
=
1
<<
20
,
int
max_batch_size
=
1
);
int
max_batch_size
=
1
,
int
min_subgraph_size
=
3
);
bool
use_tensorrt
()
const
{
return
use_tensorrt_
;
}
void
EnableMKLDNN
();
...
...
@@ -69,8 +69,19 @@ struct AnalysisConfig : public NativeConfig {
bool
use_tensorrt_
{
false
};
bool
use_mkldnn_
{
false
};
std
::
unordered_set
<
std
::
string
>
mkldnn_enabled_op_types_
;
// For workspace_size, refer it from here:
// https://docs.nvidia.com/deeplearning/sdk/tensorrt-developer-guide/index.html#troubleshooting
int
tensorrt_workspace_size_
;
// While TensorRT allows an engine optimized for a given max batch size
// to run at any smaller size, the performance for those smaller
// sizes may not be as well-optimized. Therefore, Max batch is best
// equivalent to the runtime batch size.
int
tensorrt_max_batchsize_
;
// We transform the Ops that can be converted into TRT layer in the model,
// and aggregate these Ops into subgraphs for TRT execution.
// We set this variable to control the minimum number of nodes in the
// subgraph, 3 as default value.
int
tensorrt_min_subgraph_size_
{
3
};
std
::
unique_ptr
<
PassStrategy
>
pass_builder_
;
bool
model_from_memory_
{
false
};
};
...
...
paddle/fluid/inference/api/paddle_pass_builder.h
浏览文件 @
9e3155e0
...
...
@@ -118,11 +118,13 @@ class GpuPassStrategy : public PassStrategy {
public:
GpuPassStrategy
()
:
PassStrategy
({})
{
passes_
.
assign
({
"infer_clean_graph_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
"infer_clean_graph_pass"
,
//
"conv_affine_channel_fuse_pass"
,
//
"conv_eltwiseadd_affine_channel_fuse_pass"
,
//
"conv_bn_fuse_pass"
,
//
"conv_elementwise_add_act_fuse_pass"
,
//
"conv_elementwise_add2_act_fuse_pass"
,
//
"conv_elementwise_add_fuse_pass"
,
//
});
}
...
...
paddle/fluid/inference/tests/api/analyzer_lac_tester.cc
浏览文件 @
9e3155e0
...
...
@@ -98,10 +98,8 @@ void GetOneBatch(std::vector<PaddleTensor> *input_slots, DataRecord *data,
auto
one_batch
=
data
->
NextBatch
();
PaddleTensor
input_tensor
;
input_tensor
.
name
=
"word"
;
input_tensor
.
shape
.
assign
({
static_cast
<
int
>
(
one_batch
.
data
.
size
()),
1
});
input_tensor
.
lod
.
assign
({
one_batch
.
lod
});
input_tensor
.
dtype
=
PaddleDType
::
INT64
;
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
});
TensorAssignData
<
int64_t
>
(
&
input_tensor
,
{
one_batch
.
data
}
,
one_batch
.
lod
);
PADDLE_ENFORCE_EQ
(
batch_size
,
static_cast
<
int
>
(
one_batch
.
lod
.
size
()
-
1
));
input_slots
->
assign
({
input_tensor
});
}
...
...
paddle/fluid/inference/tests/api/analyzer_mm_dnn_tester.cc
浏览文件 @
9e3155e0
...
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
query
_data_all
,
title_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
query
,
title
;
std
::
vector
<
size_t
>
lod1
,
lod2
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,22 +31,9 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
query_data_all
.
size
())
{
data
.
query_data_all
.
assign
(
query_data_all
.
begin
()
+
batch_iter
,
query_data_all
.
begin
()
+
batch_end
);
data
.
title_data_all
.
assign
(
title_data_all
.
begin
()
+
batch_iter
,
title_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod1
.
push_back
(
0
);
data
.
lod2
.
push_back
(
0
);
CHECK
(
!
data
.
query_data_all
.
empty
());
CHECK
(
!
data
.
title_data_all
.
empty
());
CHECK_EQ
(
data
.
query_data_all
.
size
(),
data
.
title_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
query_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod1
.
push_back
(
data
.
lod1
.
back
()
+
data
.
query_data_all
[
j
].
size
());
data
.
lod2
.
push_back
(
data
.
lod2
.
back
()
+
data
.
title_data_all
[
j
].
size
());
}
if
(
batch_end
<=
query
.
size
())
{
GetInputPerBatch
(
query
,
&
data
.
query
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title
,
&
data
.
title
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -67,8 +52,8 @@ struct DataRecord {
// load title data
std
::
vector
<
int64_t
>
title_data
;
split_to_int64
(
data
[
1
],
' '
,
&
title_data
);
query
_data_all
.
push_back
(
std
::
move
(
query_data
));
title
_data_all
.
push_back
(
std
::
move
(
title_data
));
query
.
push_back
(
std
::
move
(
query_data
));
title
.
push_back
(
std
::
move
(
title_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -80,15 +65,9 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_query_tensor
.
name
=
"left"
;
lod_title_tensor
.
name
=
"right"
;
auto
one_batch
=
data
->
NextBatch
();
int
size1
=
one_batch
.
lod1
[
one_batch
.
lod1
.
size
()
-
1
];
// token batch size
int
size2
=
one_batch
.
lod2
[
one_batch
.
lod2
.
size
()
-
1
];
// token batch size
lod_query_tensor
.
shape
.
assign
({
size1
,
1
});
lod_query_tensor
.
lod
.
assign
({
one_batch
.
lod1
});
lod_title_tensor
.
shape
.
assign
({
size2
,
1
});
lod_title_tensor
.
lod
.
assign
({
one_batch
.
lod2
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_query_tensor
,
one_batch
.
query
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
lod_title_tensor
,
one_batch
.
title
,
one_batch
.
lod2
);
// Set inputs.
input_slots
->
assign
({
lod_query_tensor
,
lod_title_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_ner_tester.cc
浏览文件 @
9e3155e0
...
...
@@ -19,11 +19,9 @@ namespace inference {
using
contrib
::
AnalysisConfig
;
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
word
_data_all
,
mention_data_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
word
,
mention
;
std
::
vector
<
size_t
>
lod
;
// two inputs have the same lod info.
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,20 +31,10 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
word_data_all
.
size
())
{
data
.
word_data_all
.
assign
(
word_data_all
.
begin
()
+
batch_iter
,
word_data_all
.
begin
()
+
batch_end
);
data
.
mention_data_all
.
assign
(
mention_data_all
.
begin
()
+
batch_iter
,
mention_data_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
lod
.
push_back
(
0
);
CHECK
(
!
data
.
word_data_all
.
empty
());
CHECK
(
!
data
.
mention_data_all
.
empty
());
CHECK_EQ
(
data
.
word_data_all
.
size
(),
data
.
mention_data_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
word_data_all
.
size
();
j
++
)
{
// calculate lod
data
.
lod
.
push_back
(
data
.
lod
.
back
()
+
data
.
word_data_all
[
j
].
size
());
}
if
(
batch_end
<=
word
.
size
())
{
GetInputPerBatch
(
word
,
&
data
.
word
,
&
data
.
lod
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
mention
,
&
data
.
mention
,
&
data
.
lod
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -65,8 +53,8 @@ struct DataRecord {
// load mention data
std
::
vector
<
int64_t
>
mention_data
;
split_to_int64
(
data
[
3
],
' '
,
&
mention_data
);
word
_data_all
.
push_back
(
std
::
move
(
word_data
));
mention
_data_all
.
push_back
(
std
::
move
(
mention_data
));
word
.
push_back
(
std
::
move
(
word_data
));
mention
.
push_back
(
std
::
move
(
mention_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -78,14 +66,10 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
lod_word_tensor
.
name
=
"word"
;
lod_mention_tensor
.
name
=
"mention"
;
auto
one_batch
=
data
->
NextBatch
();
int
size
=
one_batch
.
lod
[
one_batch
.
lod
.
size
()
-
1
];
// token batch size
lod_word_tensor
.
shape
.
assign
({
size
,
1
});
lod_word_tensor
.
lod
.
assign
({
one_batch
.
lod
});
lod_mention_tensor
.
shape
.
assign
({
size
,
1
});
lod_mention_tensor
.
lod
.
assign
({
one_batch
.
lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention_data_all
);
TensorAssignData
<
int64_t
>
(
&
lod_word_tensor
,
one_batch
.
word
,
one_batch
.
lod
);
TensorAssignData
<
int64_t
>
(
&
lod_mention_tensor
,
one_batch
.
mention
,
one_batch
.
lod
);
// Set inputs.
input_slots
->
assign
({
lod_word_tensor
,
lod_mention_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/analyzer_seq_conv1_tester.cc
浏览文件 @
9e3155e0
...
...
@@ -18,12 +18,9 @@ namespace paddle {
namespace
inference
{
struct
DataRecord
{
std
::
vector
<
std
::
vector
<
int64_t
>>
title1_all
,
title2_all
,
title3_all
,
l1_all
;
std
::
vector
<
std
::
vector
<
int64_t
>>
title1
,
title2
,
title3
,
l1
;
std
::
vector
<
size_t
>
title1_lod
,
title2_lod
,
title3_lod
,
l1_lod
;
size_t
batch_iter
{
0
};
size_t
batch_size
{
1
};
size_t
num_samples
;
// total number of samples
std
::
vector
<
size_t
>
lod1
,
lod2
,
lod3
,
l1_lod
;
size_t
batch_iter
{
0
},
batch_size
{
1
},
num_samples
;
// total number of samples
DataRecord
()
=
default
;
explicit
DataRecord
(
const
std
::
string
&
path
,
int
batch_size
=
1
)
:
batch_size
(
batch_size
)
{
...
...
@@ -33,41 +30,11 @@ struct DataRecord {
DataRecord
data
;
size_t
batch_end
=
batch_iter
+
batch_size
;
// NOTE skip the final batch, if no enough data is provided.
if
(
batch_end
<=
title1_all
.
size
())
{
data
.
title1_all
.
assign
(
title1_all
.
begin
()
+
batch_iter
,
title1_all
.
begin
()
+
batch_end
);
data
.
title2_all
.
assign
(
title2_all
.
begin
()
+
batch_iter
,
title2_all
.
begin
()
+
batch_end
);
data
.
title3_all
.
assign
(
title3_all
.
begin
()
+
batch_iter
,
title3_all
.
begin
()
+
batch_end
);
data
.
l1_all
.
assign
(
l1_all
.
begin
()
+
batch_iter
,
l1_all
.
begin
()
+
batch_end
);
// Prepare LoDs
data
.
title1_lod
.
push_back
(
0
);
data
.
title2_lod
.
push_back
(
0
);
data
.
title3_lod
.
push_back
(
0
);
data
.
l1_lod
.
push_back
(
0
);
CHECK
(
!
data
.
title1_all
.
empty
());
CHECK
(
!
data
.
title2_all
.
empty
());
CHECK
(
!
data
.
title3_all
.
empty
());
CHECK
(
!
data
.
l1_all
.
empty
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title2_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
title3_all
.
size
());
CHECK_EQ
(
data
.
title1_all
.
size
(),
data
.
l1_all
.
size
());
for
(
size_t
j
=
0
;
j
<
data
.
title1_all
.
size
();
j
++
)
{
data
.
title1
.
push_back
(
data
.
title1_all
[
j
]);
data
.
title2
.
push_back
(
data
.
title2_all
[
j
]);
data
.
title3
.
push_back
(
data
.
title3_all
[
j
]);
data
.
l1
.
push_back
(
data
.
l1_all
[
j
]);
// calculate lod
data
.
title1_lod
.
push_back
(
data
.
title1_lod
.
back
()
+
data
.
title1_all
[
j
].
size
());
data
.
title2_lod
.
push_back
(
data
.
title2_lod
.
back
()
+
data
.
title2_all
[
j
].
size
());
data
.
title3_lod
.
push_back
(
data
.
title3_lod
.
back
()
+
data
.
title3_all
[
j
].
size
());
data
.
l1_lod
.
push_back
(
data
.
l1_lod
.
back
()
+
data
.
l1_all
[
j
].
size
());
}
if
(
batch_end
<=
title1
.
size
())
{
GetInputPerBatch
(
title1
,
&
data
.
title1
,
&
data
.
lod1
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title2
,
&
data
.
title2
,
&
data
.
lod2
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
title3
,
&
data
.
title3
,
&
data
.
lod3
,
batch_iter
,
batch_end
);
GetInputPerBatch
(
l1
,
&
data
.
l1
,
&
data
.
l1_lod
,
batch_iter
,
batch_end
);
}
batch_iter
+=
batch_size
;
return
data
;
...
...
@@ -92,10 +59,10 @@ struct DataRecord {
// load l1 data
std
::
vector
<
int64_t
>
l1_data
;
split_to_int64
(
data
[
3
],
' '
,
&
l1_data
);
title1
_all
.
push_back
(
std
::
move
(
title1_data
));
title2
_all
.
push_back
(
std
::
move
(
title2_data
));
title3
_all
.
push_back
(
std
::
move
(
title3_data
));
l1
_all
.
push_back
(
std
::
move
(
l1_data
));
title1
.
push_back
(
std
::
move
(
title1_data
));
title2
.
push_back
(
std
::
move
(
title2_data
));
title3
.
push_back
(
std
::
move
(
title3_data
));
l1
.
push_back
(
std
::
move
(
l1_data
));
}
num_samples
=
num_lines
;
}
...
...
@@ -109,24 +76,11 @@ void PrepareInputs(std::vector<PaddleTensor> *input_slots, DataRecord *data,
title3_tensor
.
name
=
"title3"
;
l1_tensor
.
name
=
"l1"
;
auto
one_batch
=
data
->
NextBatch
();
int
title1_size
=
one_batch
.
title1_lod
[
one_batch
.
title1_lod
.
size
()
-
1
];
title1_tensor
.
shape
.
assign
({
title1_size
,
1
});
title1_tensor
.
lod
.
assign
({
one_batch
.
title1_lod
});
int
title2_size
=
one_batch
.
title2_lod
[
one_batch
.
title2_lod
.
size
()
-
1
];
title2_tensor
.
shape
.
assign
({
title2_size
,
1
});
title2_tensor
.
lod
.
assign
({
one_batch
.
title2_lod
});
int
title3_size
=
one_batch
.
title3_lod
[
one_batch
.
title3_lod
.
size
()
-
1
];
title3_tensor
.
shape
.
assign
({
title3_size
,
1
});
title3_tensor
.
lod
.
assign
({
one_batch
.
title3_lod
});
int
l1_size
=
one_batch
.
l1_lod
[
one_batch
.
l1_lod
.
size
()
-
1
];
l1_tensor
.
shape
.
assign
({
l1_size
,
1
});
l1_tensor
.
lod
.
assign
({
one_batch
.
l1_lod
});
// assign data
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
);
TensorAssignData
<
int64_t
>
(
&
title1_tensor
,
one_batch
.
title1
,
one_batch
.
lod1
);
TensorAssignData
<
int64_t
>
(
&
title2_tensor
,
one_batch
.
title2
,
one_batch
.
lod2
);
TensorAssignData
<
int64_t
>
(
&
title3_tensor
,
one_batch
.
title3
,
one_batch
.
lod3
);
TensorAssignData
<
int64_t
>
(
&
l1_tensor
,
one_batch
.
l1
,
one_batch
.
l1_lod
);
// Set inputs.
input_slots
->
assign
({
title1_tensor
,
title2_tensor
,
title3_tensor
,
l1_tensor
});
for
(
auto
&
tensor
:
*
input_slots
)
{
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
9e3155e0
...
...
@@ -176,6 +176,18 @@ void SetFakeImageInput(std::vector<std::vector<PaddleTensor>> *inputs,
(
*
inputs
).
emplace_back
(
input_slots
);
}
void
GetInputPerBatch
(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
in
,
std
::
vector
<
std
::
vector
<
int64_t
>>
*
out
,
std
::
vector
<
size_t
>
*
lod
,
size_t
batch_iter
,
size_t
batch_end
)
{
lod
->
clear
();
lod
->
push_back
(
0
);
for
(
auto
it
=
in
.
begin
()
+
batch_iter
;
it
<
in
.
begin
()
+
batch_end
;
it
++
)
{
out
->
push_back
(
*
it
);
lod
->
push_back
(
lod
->
back
()
+
(
*
it
).
size
());
// calculate lod
}
}
void
TestOneThreadPrediction
(
const
PaddlePredictor
::
Config
*
config
,
const
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
&
inputs
,
...
...
paddle/fluid/operators/clip_by_norm_op.h
浏览文件 @
9e3155e0
...
...
@@ -64,7 +64,7 @@ class ClipByNormKernel : public framework::OpKernel<T> {
output
->
mutable_data
<
T
>
(
context
.
GetPlace
());
}
else
{
PADDLE_THROW
(
"Unexpected branch, input variable type is %s"
,
in_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
in_var
->
Type
()
));
}
PADDLE_ENFORCE_NOT_NULL
(
input
);
...
...
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
9e3155e0
...
...
@@ -175,14 +175,13 @@ class WhileGradOp : public framework::OperatorBase {
auto
&
og_inside
=
detail
::
Ref
(
cur_scope
.
Var
(
inside_og_name
),
"Cannot find inside gradient %s"
,
inside_og_name
);
if
(
framework
::
IsType
<
framework
::
LoDTensor
>
(
og_outside
.
Type
()
))
{
if
(
og_outside
.
IsType
<
framework
::
LoDTensor
>
(
))
{
auto
&
outside_tensor
=
og_outside
.
Get
<
framework
::
LoDTensor
>
();
auto
&
inside_tensor
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensor
>
());
inside_tensor
.
set_lod
(
outside_tensor
.
lod
());
inside_tensor
.
ShareDataWith
(
outside_tensor
);
}
else
if
(
framework
::
IsType
<
framework
::
LoDTensorArray
>
(
og_outside
.
Type
()))
{
}
else
if
(
og_outside
.
IsType
<
framework
::
LoDTensorArray
>
())
{
auto
&
outside_array
=
og_outside
.
Get
<
framework
::
LoDTensorArray
>
();
auto
&
inside_array
=
detail
::
Ref
(
og_inside
.
GetMutable
<
framework
::
LoDTensorArray
>
());
...
...
@@ -256,7 +255,7 @@ class WhileGradOp : public framework::OperatorBase {
var
->
IsType
<
LoDTensor
>
(),
"Currently the type of var only can be LoDTensorArray, "
"or LoDTensor, but the received var[%s] is %s."
,
inside_grad_name
,
var
->
Type
().
name
(
));
inside_grad_name
,
framework
::
ToTypeName
(
var
->
Type
()
));
if
(
var
->
IsType
<
LoDTensor
>
())
{
auto
&
inside_tensor
=
var
->
Get
<
framework
::
LoDTensor
>
();
...
...
paddle/fluid/operators/conv_fusion_op.cu.cc
浏览文件 @
9e3155e0
...
...
@@ -161,9 +161,7 @@ class CUDNNConvFusionOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_LE
(
workspace_size_in_bytes
,
workspace_size_limit
,
"workspace_size to be allocated exceeds the limit"
);
if
((
activation
==
"identity"
)
&&
(
algo
!=
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
)
&&
(
!
residual
))
{
if
((
activation
==
"identity"
)
&&
(
!
residual
))
{
// Only the CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM algo is
// enabled with CUDNN_ACTIVATION_IDENTITY in cuDNN lib.
// But test in some case, the speed is slower, change to use
...
...
paddle/fluid/operators/cudnn_lstm_op.cu.cc
浏览文件 @
9e3155e0
...
...
@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/cudnn_rnn_cache.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -22,239 +22,6 @@ namespace operators {
using
LoDTensor
=
framework
::
LoDTensor
;
using
Tensor
=
framework
::
Tensor
;
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
Tensor
reserve_data_
;
Tensor
workspace_data_
;
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
framework
::
ExecutionContext
&
ctx
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
ctx
.
GetPlace
());
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
template
<
typename
T
>
class
CudnnLSTMGPUKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
@@ -315,9 +82,9 @@ class CudnnLSTMGPUKernel : public framework::OpKernel<T> {
auto
input_w_numel
=
w
->
numel
();
auto
batch_size
=
x
->
dims
()[
1
];
cudnn_rnn_cache
->
init
(
handle
,
ctx
,
max_len
,
batch_size
,
input
_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
cudnn_rnn_cache
->
init
(
handle
,
ctx
.
GetPlace
(),
max_len
,
batch
_size
,
input_size
,
hidden_size
,
num_layers
,
dropout_prob
,
is_bidirec
,
seed
,
input_w_numel
);
}
auto
run_seq_len
=
x
->
dims
()[
0
];
...
...
paddle/fluid/operators/cudnn_rnn_cache.h
0 → 100644
浏览文件 @
9e3155e0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/cudnn_helper.h"
namespace
paddle
{
namespace
operators
{
struct
CudnnRNNCache
{
CudnnRNNCache
()
{
x_desc_
=
NULL
;
y_desc_
=
NULL
;
dx_desc_
=
NULL
;
dy_desc_
=
NULL
;
}
~
CudnnRNNCache
()
{
release
();
}
cudnnRNNDescriptor_t
rnn_desc_
;
cudnnTensorDescriptor_t
*
x_desc_
;
cudnnTensorDescriptor_t
*
y_desc_
;
cudnnTensorDescriptor_t
*
dx_desc_
;
cudnnTensorDescriptor_t
*
dy_desc_
;
cudnnTensorDescriptor_t
hx_desc_
;
cudnnTensorDescriptor_t
cx_desc_
;
cudnnTensorDescriptor_t
hy_desc_
;
cudnnTensorDescriptor_t
cy_desc_
;
cudnnTensorDescriptor_t
dhx_desc_
;
cudnnTensorDescriptor_t
dcx_desc_
;
cudnnTensorDescriptor_t
dhy_desc_
;
cudnnTensorDescriptor_t
dcy_desc_
;
cudnnTensorDescriptor_t
output_x_desc_
;
cudnnTensorDescriptor_t
output_y_desc_
;
cudnnDropoutDescriptor_t
dropout_desc_
;
size_t
weights_size_
;
cudnnFilterDescriptor_t
w_desc_
;
cudnnFilterDescriptor_t
dw_desc_
;
size_t
workspace_size_
;
size_t
reserve_size_
;
framework
::
Tensor
reserve_data_
;
framework
::
Tensor
workspace_data_
;
framework
::
Tensor
dropout_state_
;
size_t
max_length_
;
float
dropout_prob_
;
bool
is_bidirec_
;
int
batch_size_
;
int
input_size_
;
int
hidden_size_
;
int
num_layers_
;
int
seed_
;
void
init
(
cudnnHandle_t
handle
,
const
platform
::
Place
&
place
,
size_t
max_len
,
int
batch_size
,
int
input_size
,
int
hidden_size
,
int
num_layers
,
float
dropout_prob
,
bool
is_bidirec
,
int
seed
,
int
weight_numel
)
{
max_length_
=
max_len
;
batch_size_
=
batch_size
;
input_size_
=
input_size
;
hidden_size_
=
hidden_size
;
num_layers_
=
num_layers
;
dropout_prob_
=
dropout_prob
;
is_bidirec_
=
is_bidirec
;
seed_
=
seed
;
x_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
y_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dx_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
dy_desc_
=
new
cudnnTensorDescriptor_t
[
max_length_
];
int
dim_a
[
3
];
int
stride_a
[
3
];
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dy_desc_
[
i
]));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
input_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
x_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dx_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
dim_a
[
0
]
=
batch_size_
;
dim_a
[
1
]
=
is_bidirec_
?
hidden_size_
*
2
:
hidden_size_
;
dim_a
[
2
]
=
1
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
y_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dy_desc_
[
i
],
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
}
dim_a
[
0
]
=
num_layers_
*
(
is_bidirec_
?
2
:
1
);
dim_a
[
1
]
=
batch_size_
;
dim_a
[
2
]
=
hidden_size_
;
stride_a
[
0
]
=
dim_a
[
2
]
*
dim_a
[
1
];
stride_a
[
1
]
=
dim_a
[
2
];
stride_a
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateTensorDescriptor
(
&
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
hy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
cy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcx_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dhy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetTensorNdDescriptor
(
dcy_desc_
,
CUDNN_DATA_FLOAT
,
3
,
dim_a
,
stride_a
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateDropoutDescriptor
(
&
dropout_desc_
));
size_t
state_size
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDropoutGetStatesSize
(
handle
,
&
state_size
);
dropout_state_
.
Resize
({
static_cast
<
int64_t
>
(
state_size
)}));
auto
*
dropout_state_data
=
dropout_state_
.
mutable_data
<
uint8_t
>
(
place
);
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetDropoutDescriptor
(
dropout_desc_
,
handle
,
dropout_prob_
,
dropout_state_data
,
state_size
,
seed_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateRNNDescriptor
(
&
rnn_desc_
));
#if CUDNN_VERSION >= 6000
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor_v6
(
handle
,
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_RNN_ALGO_STANDARD
,
CUDNN_DATA_FLOAT
));
#else
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetRNNDescriptor
(
rnn_desc_
,
hidden_size_
,
num_layers_
,
dropout_desc_
,
CUDNN_LINEAR_INPUT
,
is_bidirec_
?
CUDNN_BIDIRECTIONAL
:
CUDNN_UNIDIRECTIONAL
,
CUDNN_LSTM
,
CUDNN_DATA_FLOAT
));
#endif
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnCreateFilterDescriptor
(
&
dw_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNParamsSize
(
handle
,
rnn_desc_
,
x_desc_
[
0
],
&
weights_size_
,
CUDNN_DATA_FLOAT
));
PADDLE_ENFORCE_EQ
(
weights_size_
,
sizeof
(
float
)
*
weight_numel
,
"cudnn lstm weight size should be SAME"
);
int
dim_w
[
3
];
dim_w
[
0
]
=
weights_size_
/
sizeof
(
float
);
dim_w
[
1
]
=
1
;
dim_w
[
2
]
=
1
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
w_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnSetFilterNdDescriptor
(
dw_desc_
,
CUDNN_DATA_FLOAT
,
CUDNN_TENSOR_NCHW
,
3
,
dim_w
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNWorkspaceSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
workspace_size_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnGetRNNTrainingReserveSize
(
handle
,
rnn_desc_
,
max_length_
,
x_desc_
,
&
reserve_size_
));
reserve_data_
.
Resize
({
static_cast
<
int64_t
>
(
reserve_size_
)});
reserve_data_
.
mutable_data
<
uint8_t
>
(
place
);
workspace_data_
.
Resize
({
static_cast
<
int64_t
>
(
workspace_size_
)});
workspace_data_
.
mutable_data
<
uint8_t
>
(
place
);
}
void
release
()
{
for
(
size_t
i
=
0
;
i
<
max_length_
;
++
i
)
{
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
x_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
y_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dx_desc_
[
i
]));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dy_desc_
[
i
]));
}
delete
[]
x_desc_
;
delete
[]
y_desc_
;
delete
[]
dx_desc_
;
delete
[]
dy_desc_
;
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
hy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
cy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcx_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dhy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyTensorDescriptor
(
dcy_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyDropoutDescriptor
(
dropout_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyRNNDescriptor
(
rnn_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
w_desc_
));
CUDNN_ENFORCE
(
platform
::
dynload
::
cudnnDestroyFilterDescriptor
(
dw_desc_
));
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/distributed_ops/split_ids_op.h
浏览文件 @
9e3155e0
...
...
@@ -116,7 +116,7 @@ class SplitIdsOpKernel : public framework::OpKernel<T> {
}
else
{
PADDLE_THROW
(
"% should be LoDTensor or SelectedRows, but the received type is %s"
,
ctx
.
Inputs
(
"Ids"
)[
0
],
ids_var
->
Type
().
name
(
));
ctx
.
Inputs
(
"Ids"
)[
0
],
framework
::
ToTypeName
(
ids_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/elementwise/elementwise_mul_op.h
浏览文件 @
9e3155e0
...
...
@@ -83,7 +83,7 @@ class ElementwiseMulKernel : public framework::OpKernel<T> {
z
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
}
else
{
PADDLE_THROW
(
"X's type[%s] is not supported by elementwise_op."
,
x_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_var
->
Type
()
));
}
z
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
...
...
paddle/fluid/operators/fill_constant_op.cc
浏览文件 @
9e3155e0
...
...
@@ -12,68 +12,40 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/fill_constant_op.h"
namespace
paddle
{
namespace
operators
{
class
FillConstant
InferShape
:
public
framework
::
InferShapeBase
{
class
FillConstant
Op
:
public
framework
::
OperatorWithKernel
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of FillConstantOp should not be null."
);
auto
&
shape
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int64_t
>>
(
"shape"
);
auto
&
shape
=
ctx
->
Attrs
().
Get
<
std
::
vector
<
int64_t
>>
(
"shape"
);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
shape
));
}
};
class
FillConstantOp
:
public
framework
::
OperatorBase
{
public:
using
framework
::
OperatorBase
::
OperatorBase
;
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
dev_place
)
const
override
{
auto
data_type
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
Attr
<
int
>
(
"dtype"
));
auto
value
=
Attr
<
float
>
(
"value"
);
auto
force_cpu
=
Attr
<
bool
>
(
"force_cpu"
);
framework
::
Tensor
*
tensor
=
nullptr
;
auto
&
out_var
=
*
scope
.
FindVar
(
Output
(
"Out"
));
if
(
out_var
.
IsType
<
framework
::
LoDTensor
>
())
{
tensor
=
out_var
.
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
framework
::
make_ddim
(
Attr
<
std
::
vector
<
int64_t
>>
(
"shape"
)));
}
else
if
(
out_var
.
IsType
<
framework
::
SelectedRows
>
())
{
tensor
=
out_var
.
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_value
();
tensor
->
Resize
(
framework
::
make_ddim
(
Attr
<
std
::
vector
<
int64_t
>>
(
"shape"
)));
}
else
{
PADDLE_THROW
(
"fill constant op's output only"
"supports SelectedRows and LoDTensor"
);
}
if
(
force_cpu
)
{
auto
cpu
=
platform
::
CPUPlace
();
tensor
->
mutable_data
(
cpu
,
data_type
);
}
else
{
tensor
->
mutable_data
(
dev_place
,
data_type
);
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
math
::
set_constant
(
dev_ctx
,
tensor
,
value
);
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
Type
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
GetPlace
());
}
};
class
FillConstantOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{}
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
auto
data_type
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
boost
::
get
<
int
>
(
op_desc
.
GetAttr
(
"dtype"
)));
auto
&
out_var_name
=
op_desc
.
Output
(
"Out"
).
front
();
block
->
Var
(
out_var_name
)
->
SetDataType
(
data_type
);
}
};
class
FillConstantOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
...
...
@@ -107,7 +79,13 @@ Fill up a variable with specified constant value.
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
fill_constant
,
ops
::
FillConstantOp
,
ops
::
FillConstantInferShape
,
ops
::
FillConstantOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
ops
::
FillConstantOpVarTypeInference
);
REGISTER_OPERATOR
(
fill_constant
,
ops
::
FillConstantOp
,
ops
::
FillConstantOpMaker
,
ops
::
FillConstantOpVarTypeInference
,
paddle
::
framework
::
EmptyGradOpMaker
);
REGISTER_OP_CPU_KERNEL
(
fill_constant
,
ops
::
FillConstantKernel
<
float
>
,
ops
::
FillConstantKernel
<
double
>
,
ops
::
FillConstantKernel
<
int64_t
>
,
ops
::
FillConstantKernel
<
int
>
,
ops
::
FillConstantKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/fill_constant_op.cu.cc
0 → 100644
浏览文件 @
9e3155e0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/fill_constant_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
fill_constant
,
ops
::
FillConstantKernel
<
float
>
,
ops
::
FillConstantKernel
<
double
>
,
ops
::
FillConstantKernel
<
int64_t
>
,
ops
::
FillConstantKernel
<
int
>
,
ops
::
FillConstantKernel
<
paddle
::
platform
::
float16
>
);
paddle/fluid/operators/fill_constant_op.h
0 → 100644
浏览文件 @
9e3155e0
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/math_function.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
FillConstantKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
data_type
=
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
ctx
.
Attr
<
int
>
(
"dtype"
));
auto
value
=
ctx
.
Attr
<
float
>
(
"value"
);
auto
force_cpu
=
ctx
.
Attr
<
bool
>
(
"force_cpu"
);
framework
::
Tensor
*
tensor
=
nullptr
;
framework
::
Variable
*
out_var
=
ctx
.
OutputVar
(
"Out"
);
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
tensor
=
out_var
->
GetMutable
<
framework
::
LoDTensor
>
();
tensor
->
Resize
(
framework
::
make_ddim
(
ctx
.
Attr
<
std
::
vector
<
int64_t
>>
(
"shape"
)));
}
else
if
(
out_var
->
IsType
<
framework
::
SelectedRows
>
())
{
tensor
=
out_var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
mutable_value
();
tensor
->
Resize
(
framework
::
make_ddim
(
ctx
.
Attr
<
std
::
vector
<
int64_t
>>
(
"shape"
)));
}
else
{
PADDLE_THROW
(
"fill constant op's output only"
"supports SelectedRows and LoDTensor"
);
}
if
(
force_cpu
)
{
tensor
->
mutable_data
(
platform
::
CPUPlace
(),
data_type
);
}
else
{
tensor
->
mutable_data
(
ctx
.
GetPlace
(),
data_type
);
}
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
ctx
.
GetPlace
());
math
::
set_constant
(
dev_ctx
,
tensor
,
value
);
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/optimizers/adadelta_op.h
浏览文件 @
9e3155e0
...
...
@@ -27,12 +27,14 @@ class AdadeltaOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
avg_squared_grad_out_tensor
=
...
...
paddle/fluid/operators/optimizers/adagrad_op.h
浏览文件 @
9e3155e0
...
...
@@ -50,7 +50,8 @@ class AdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
*
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
9e3155e0
...
...
@@ -347,7 +347,8 @@ class AdamOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
...
...
paddle/fluid/operators/optimizers/adamax_op.h
浏览文件 @
9e3155e0
...
...
@@ -27,12 +27,14 @@ class AdamaxOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/decayed_adagrad_op.h
浏览文件 @
9e3155e0
...
...
@@ -27,12 +27,14 @@ class DecayedAdagradOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
param_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
auto
moment_out_tensor
=
ctx
.
Output
<
framework
::
Tensor
>
(
"MomentOut"
);
...
...
paddle/fluid/operators/optimizers/ftrl_op.h
浏览文件 @
9e3155e0
...
...
@@ -32,12 +32,14 @@ class FTRLOpKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
const
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Grad"
).
front
(),
grad_var
->
Type
().
name
());
ctx
.
Inputs
(
"Grad"
).
front
(),
framework
::
ToTypeName
(
grad_var
->
Type
()));
auto
*
param_out
=
ctx
.
Output
<
Tensor
>
(
"ParamOut"
);
auto
*
sq_accum_out
=
ctx
.
Output
<
Tensor
>
(
"SquaredAccumOut"
);
...
...
paddle/fluid/operators/optimizers/momentum_op.h
浏览文件 @
9e3155e0
...
...
@@ -395,7 +395,7 @@ class MomentumOpKernel : public framework::OpKernel<T> {
PADDLE_THROW
(
string
::
Sprintf
(
"MomentumOp only supports LoDTensor or SelectedRows "
"gradient, but the received Variable Type is %s"
,
grad_var
->
Type
().
name
(
)));
framework
::
ToTypeName
(
grad_var
->
Type
()
)));
}
}
};
...
...
paddle/fluid/operators/optimizers/sgd_op.cu
浏览文件 @
9e3155e0
...
...
@@ -60,7 +60,8 @@ class SGDOpCUDAKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE
(
param_var
->
IsType
<
framework
::
LoDTensor
>
(),
"The Var(%s)'s type should be LoDTensor, "
"but the received is %s"
,
ctx
.
Inputs
(
"Param"
).
front
(),
param_var
->
Type
().
name
());
ctx
.
Inputs
(
"Param"
).
front
(),
framework
::
ToTypeName
(
param_var
->
Type
()));
auto
*
param
=
ctx
.
Input
<
framework
::
Tensor
>
(
"Param"
);
auto
*
param_out
=
ctx
.
Output
<
framework
::
Tensor
>
(
"ParamOut"
);
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
浏览文件 @
9e3155e0
...
...
@@ -245,7 +245,7 @@ class SumMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/operators/sum_op.cc
浏览文件 @
9e3155e0
...
...
@@ -126,7 +126,7 @@ class SumOp : public framework::OperatorWithKernel {
PADDLE_THROW
(
"Cannot find the input data type by all input data"
);
}
PADDLE_THROW
(
"Unexpected branch. Input type is %s"
,
x_vars
[
0
]
->
Type
().
name
(
));
framework
::
ToTypeName
(
x_vars
[
0
]
->
Type
()
));
}
};
...
...
paddle/fluid/operators/sum_op.h
浏览文件 @
9e3155e0
...
...
@@ -163,7 +163,7 @@ class SumKernel : public framework::OpKernel<T> {
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
(
));
framework
::
ToTypeName
(
out_var
->
Type
()
));
}
}
};
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
9e3155e0
...
...
@@ -14,7 +14,6 @@ limitations under the License. */
#include "paddle/fluid/pybind/imperative.h"
#include "paddle/fluid/framework/block_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/imperative/tracer.h"
namespace
paddle
{
...
...
@@ -27,9 +26,7 @@ void BindTracer(pybind11::module *m) {
[](
imperative
::
Tracer
&
self
,
framework
::
BlockDesc
*
root_block
)
{
new
(
&
self
)
imperative
::
Tracer
(
root_block
);
})
.
def
(
"trace"
,
&
imperative
::
Tracer
::
Trace
)
.
def
(
"get_scope"
,
&
imperative
::
Tracer
::
GetScope
,
pybind11
::
return_value_policy
::
reference
);
.
def
(
"trace"
,
&
imperative
::
Tracer
::
Trace
);
}
}
// namespace pybind
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
9e3155e0
...
...
@@ -125,9 +125,7 @@ PYBIND11_MODULE(core, m) {
// .def(py::init<>())
.
def
(
py
::
init
<
bool
>
(),
py
::
arg
(
"stop_gradient"
)
=
false
)
.
def
(
"_run_backward"
,
[](
imperative
::
VarBase
&
self
,
framework
::
Scope
*
scope
)
{
self
.
RunBackward
(
scope
);
})
[](
imperative
::
VarBase
&
self
)
{
self
.
RunBackward
();
})
.
def
(
"_grad_name"
,
&
imperative
::
VarBase
::
GradName
)
.
def
(
"_grad"
,
&
imperative
::
VarBase
::
Grad
)
.
def_property
(
"grad_value"
,
...
...
@@ -154,9 +152,9 @@ PYBIND11_MODULE(core, m) {
[](
const
imperative
::
VarBase
&
self
)
{
return
self
.
stop_gradient_
;
},
[](
imperative
::
VarBase
&
self
,
bool
stop_gradient
)
{
self
.
stop_gradient_
=
stop_gradient
;
})
;
})
py
::
class_
<
imperative
::
OpBase
,
PyOpBase
>
(
m
,
"OpBase"
,
R"DOC()DOC"
)
py
::
class_
<
imperative
::
OpBase
,
PyOpBase
>
(
m
,
"OpBase"
,
R"DOC()DOC"
)
.
def
(
py
::
init
<>
())
.
def_property
(
"desc"
,
[](
const
imperative
::
OpBase
&
self
)
{
return
self
.
op_desc_
;
},
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
9e3155e0
...
...
@@ -14,7 +14,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#=================================================
# Utils
#=================================================
...
...
@@ -418,13 +417,6 @@ EOF
else
ctest
--output-on-failure
fi
# make install should also be test when unittest
make
install
-j
`
nproc
`
pip
install
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
paddle version
fi
fi
}
...
...
@@ -922,6 +914,7 @@ function main() {
;;
assert_api
)
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
assert_api_spec_approvals
;;
test_inference
)
gen_capi_package
...
...
@@ -946,6 +939,15 @@ function main() {
run_test
assert_api_not_changed
${
PYTHON_ABI
:-
""
}
;;
cmake_gen
)
cmake_gen
${
PYTHON_ABI
:-
""
}
;;
gen_fluid_lib
)
gen_fluid_lib
;;
test_fluid_lib
)
test_fluid_lib
;;
*
)
print_usage
exit
0
...
...
python/paddle/fluid/framework.py
浏览文件 @
9e3155e0
...
...
@@ -15,6 +15,7 @@
from
__future__
import
print_function
import
collections
from
collections
import
defaultdict
import
contextlib
import
os
import
re
...
...
@@ -369,13 +370,11 @@ class Variable(object):
self
.
_ivar
.
stop_gradient
=
stop_gradient
def
_numpy
(
self
):
scope
=
_imperative_tracer
().
get_scope
()
tensor
=
core
.
get_variable_tensor
(
scope
,
self
.
desc
.
name
())
tensor
=
self
.
_ivar
.
value
.
get_tensor
()
return
np
.
array
(
tensor
)
def
_backward
(
self
):
scope
=
_imperative_tracer
().
get_scope
()
self
.
_ivar
.
_run_backward
(
scope
)
self
.
_ivar
.
_run_backward
()
def
_gradient
(
self
):
return
np
.
array
(
self
.
_ivar
.
_grad
())
...
...
@@ -710,20 +709,20 @@ class Operator(object):
if
_in_imperative_mode
():
self
.
iop
=
core
.
OpBase
()
self
.
iop
.
desc
=
self
.
desc
self
.
inputs
=
[]
self
.
inputs
=
defaultdict
(
list
)
if
inputs
is
not
None
:
for
inp
in
inputs
.
values
(
):
if
isinstance
(
inp
,
Variable
):
self
.
inputs
.
append
(
inp
)
elif
isinstance
(
inp
,
list
)
or
isinstance
(
inp
,
tuple
):
self
.
inputs
.
extend
(
inp
[:
])
self
.
outputs
=
[]
for
k
,
v
in
six
.
iteritems
(
inputs
):
if
isinstance
(
v
,
Variable
):
self
.
inputs
[
k
].
append
(
v
.
_ivar
)
elif
isinstance
(
v
,
list
)
or
isinstance
(
v
,
tuple
):
self
.
inputs
[
k
].
extend
([
var
.
_ivar
for
var
in
v
])
self
.
outputs
=
defaultdict
(
list
)
if
outputs
is
not
None
:
for
out
in
outputs
.
values
(
):
if
isinstance
(
out
,
Variable
):
self
.
outputs
.
append
(
out
)
elif
isinstance
(
out
,
list
)
or
isinstance
(
out
,
tuple
):
self
.
outputs
.
extend
(
out
[:
])
for
k
,
v
in
six
.
iteritems
(
outputs
):
if
isinstance
(
v
,
Variable
):
self
.
outputs
[
k
].
append
(
v
.
_ivar
)
elif
isinstance
(
v
,
list
)
or
isinstance
(
v
,
tuple
):
self
.
outputs
[
k
].
extend
([
var
.
_ivar
for
var
in
v
])
def
_has_kernel
(
self
,
op_type
):
return
op_type
not
in
self
.
OP_WITHOUT_KERNEL_SET
...
...
@@ -1302,8 +1301,7 @@ class Block(object):
def
_trace_op
(
self
,
op
,
stop_gradient
=
False
):
if
_in_imperative_mode
():
_imperative_tracer
().
trace
(
op
.
iop
,
[
v
.
_ivar
for
v
in
op
.
inputs
],
[
v
.
_ivar
for
v
in
op
.
outputs
],
self
.
desc
,
_imperative_tracer
().
trace
(
op
.
iop
,
op
.
inputs
,
op
.
outputs
,
self
.
desc
,
stop_gradient
)
def
_insert_op
(
self
,
index
,
*
args
,
**
kwargs
):
...
...
python/paddle/fluid/imperative/base.py
浏览文件 @
9e3155e0
...
...
@@ -45,8 +45,7 @@ def to_variable(value, block=None):
name
=
None
,
shape
=
value
.
shape
,
dtype
=
value
.
dtype
)
scope
=
framework
.
_imperative_tracer
().
get_scope
()
var
=
scope
.
var
(
py_var
.
name
)
var
=
py_var
.
_ivar
.
value
tensor
=
var
.
get_tensor
()
tensor
.
set
(
value
,
core
.
CPUPlace
())
return
py_var
...
...
python/paddle/fluid/layer_helper.py
浏览文件 @
9e3155e0
...
...
@@ -20,7 +20,7 @@ import six
import
sys
import
numpy
as
np
from
.framework
import
Variable
,
Parameter
,
default_main_program
,
default_startup_program
,
dtype_is_floating
from
.framework
import
Variable
,
Parameter
,
default_main_program
,
default_startup_program
,
dtype_is_floating
,
_in_imperative_mode
from
.
import
unique_name
from
paddle.fluid.imperative
import
base
as
imperative_base
from
paddle.fluid.initializer
import
Constant
,
Xavier
...
...
@@ -313,11 +313,22 @@ class LayerHelper(object):
param
=
self
.
_create_weight_normalize
(
attr
,
shape
,
dtype
)
WeightNormParamAttr
.
params_with_weight_norm
.
append
(
param
)
return
param
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
())
if
_in_imperative_mode
():
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
())
# In imperative mode, we want the returned parameter to be
# initialized so that it can be used imperatively.
return
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
else
:
self
.
startup_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
(
with_initializer
=
True
))
return
self
.
main_program
.
global_block
().
create_parameter
(
dtype
=
dtype
,
shape
=
shape
,
**
attr
.
_to_kwargs
())
def
get_parameter
(
self
,
name
):
param
=
self
.
main_program
.
global_block
().
var
(
name
)
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
9e3155e0
...
...
@@ -148,7 +148,7 @@ class ParallelExecutor(object):
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
# step
5: get persistable_vars, parameter
_vars, places. persistable_vars
# step
6: get persistable
_vars, places. persistable_vars
# need be broadcast to other local_scope.
persistable_vars
=
set
([
cpt
.
to_text
(
v
.
name
)
for
v
in
[
...
...
@@ -164,7 +164,7 @@ class ParallelExecutor(object):
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step
6
: init ParallelExecutor
# step
7
: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
places
,
persistable_vars
,
main
.
desc
,
cpt
.
to_text
(
loss_name
)
...
...
python/paddle/fluid/tests/book/high-level-api/image_classification/test_image_classification_resnet.py
浏览文件 @
9e3155e0
...
...
@@ -185,8 +185,10 @@ def main(use_cuda, parallel):
if
__name__
==
'__main__'
:
for
use_cuda
in
(
False
,
True
):
for
parallel
in
(
False
,
True
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
continue
main
(
use_cuda
=
use_cuda
,
parallel
=
parallel
)
on_ci
=
bool
(
int
(
os
.
environ
.
get
(
"SKIP_UNSTABLE_CI"
,
'0'
)))
if
not
on_ci
:
for
use_cuda
in
(
False
,
True
):
for
parallel
in
(
False
,
True
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
continue
main
(
use_cuda
=
use_cuda
,
parallel
=
parallel
)
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
9e3155e0
...
...
@@ -15,6 +15,18 @@
from
__future__
import
print_function
import
unittest
from
test_dist_base
import
TestDistBase
import
os
def
skip_ci
(
func
):
on_ci
=
bool
(
int
(
os
.
environ
.
get
(
"SKIP_UNSTABLE_CI"
,
'0'
)))
def
__func__
(
*
args
,
**
kwargs
):
if
on_ci
:
return
return
func
(
*
args
,
**
kwargs
)
return
__func__
class
TestDistSeResneXt2x2
(
TestDistBase
):
...
...
@@ -22,6 +34,7 @@ class TestDistSeResneXt2x2(TestDistBase):
self
.
_sync_mode
=
True
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
...
...
@@ -32,6 +45,7 @@ class TestDistseResnXt2x2WithMemopt(TestDistBase):
self
.
_mem_opt
=
True
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
1e-7
)
...
...
@@ -41,6 +55,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
self
.
_sync_mode
=
False
self
.
_use_reader_alloc
=
False
@
skip_ci
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
...
...
python/paddle/fluid/tests/unittests/test_imperative.py
浏览文件 @
9e3155e0
...
...
@@ -38,7 +38,9 @@ class MyLayer(fluid.imperative.PyLayer):
def
forward
(
self
,
inputs
):
x
=
fluid
.
layers
.
relu
(
inputs
)
self
.
_x_for_debug
=
x
return
[
fluid
.
layers
.
elementwise_mul
(
x
,
x
)]
x
=
fluid
.
layers
.
elementwise_mul
(
x
,
x
)
x
=
fluid
.
layers
.
reduce_sum
(
x
)
return
[
x
]
class
MLP
(
fluid
.
imperative
.
PyLayer
):
...
...
python/paddle/fluid/tests/unittests/test_weight_decay.py
0 → 100644
浏览文件 @
9e3155e0
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
contextlib
import
unittest
from
functools
import
partial
import
numpy
as
np
import
paddle
import
paddle.fluid.core
as
core
import
paddle.fluid
as
fluid
def
get_places
():
places
=
[]
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
return
places
@
contextlib
.
contextmanager
def
prog_scope_guard
(
main_prog
,
startup_prog
):
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
unique_name
.
guard
():
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
main_prog
,
startup_prog
):
yield
def
bow_net
(
data
,
label
,
dict_dim
,
is_sparse
=
False
,
emb_dim
=
128
,
hid_dim
=
128
,
hid_dim2
=
96
,
class_dim
=
2
):
"""
BOW net
This model is from https://github.com/PaddlePaddle/models:
fluid/PaddleNLP/text_classification/nets.py
"""
emb
=
fluid
.
layers
.
embedding
(
input
=
data
,
is_sparse
=
is_sparse
,
size
=
[
dict_dim
,
emb_dim
])
bow
=
fluid
.
layers
.
sequence_pool
(
input
=
emb
,
pool_type
=
'sum'
)
bow_tanh
=
fluid
.
layers
.
tanh
(
bow
)
fc_1
=
fluid
.
layers
.
fc
(
input
=
bow_tanh
,
size
=
hid_dim
,
act
=
"tanh"
)
fc_2
=
fluid
.
layers
.
fc
(
input
=
fc_1
,
size
=
hid_dim2
,
act
=
"tanh"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_2
],
size
=
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
return
avg_cost
class
TestWeightDecay
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
word_dict
=
paddle
.
dataset
.
imdb
.
word_dict
()
reader
=
paddle
.
batch
(
paddle
.
dataset
.
imdb
.
train
(
self
.
word_dict
),
batch_size
=
4
)()
self
.
train_data
=
[
next
(
reader
)
for
_
in
range
(
5
)]
self
.
learning_rate
=
.
5
def
run_executor
(
self
,
place
,
feed_list
,
loss
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
main_prog
=
fluid
.
default_main_program
()
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
exe
.
run
(
main_prog
,
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
run_parallel_exe
(
self
,
place
,
feed_list
,
loss
,
use_cuda
=
True
,
use_reduce
=
False
,
use_fast_executor
=
False
,
use_ir_memory_optimize
=
False
):
exe
=
fluid
.
Executor
(
place
)
feeder
=
fluid
.
DataFeeder
(
feed_list
=
feed_list
,
place
=
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exec_strategy
=
fluid
.
ExecutionStrategy
()
if
use_fast_executor
:
exec_strategy
.
use_experimental_executor
=
True
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
\
if
use_reduce
else
fluid
.
BuildStrategy
.
ReduceStrategy
.
AllReduce
build_strategy
.
memory_optimize
=
use_ir_memory_optimize
parallel_exe
=
fluid
.
ParallelExecutor
(
use_cuda
,
loss_name
=
loss
.
name
,
exec_strategy
=
exec_strategy
,
build_strategy
=
build_strategy
)
loss_set
=
[]
for
data
in
self
.
train_data
:
out
=
parallel_exe
.
run
(
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
loss
.
name
])
print
(
"loss %s"
%
(
np
.
average
(
out
)))
loss_set
.
append
(
np
.
average
(
out
))
return
loss_set
def
check_weight_decay
(
self
,
place
,
model
,
use_parallel_exe
=
False
,
use_reduce
=
False
):
main_prog
=
fluid
.
framework
.
Program
()
startup_prog
=
fluid
.
framework
.
Program
()
startup_prog
.
random_seed
=
1
with
prog_scope_guard
(
main_prog
=
main_prog
,
startup_prog
=
startup_prog
):
data
=
fluid
.
layers
.
data
(
name
=
"words"
,
shape
=
[
1
],
dtype
=
"int64"
,
lod_level
=
1
)
label
=
fluid
.
layers
.
data
(
name
=
"label"
,
shape
=
[
1
],
dtype
=
"int64"
)
avg_cost
=
model
(
data
,
label
,
len
(
self
.
word_dict
))
param_list
=
[(
var
,
var
*
self
.
learning_rate
)
for
var
in
main_prog
.
block
(
0
).
all_parameters
()]
optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
self
.
learning_rate
)
optimizer
.
minimize
(
avg_cost
)
for
params
in
param_list
:
updated_p
=
fluid
.
layers
.
elementwise_sub
(
x
=
params
[
0
],
y
=
params
[
1
])
fluid
.
layers
.
assign
(
input
=
updated_p
,
output
=
params
[
0
])
if
use_parallel_exe
:
loss
=
self
.
run_parallel_exe
(
place
,
[
data
,
label
],
loss
=
avg_cost
,
use_cuda
=
True
,
use_reduce
=
use_reduce
)
else
:
loss
=
self
.
run_executor
(
place
,
[
data
,
label
],
loss
=
avg_cost
)
return
loss
def
test_weight_decay
(
self
):
model
=
partial
(
bow_net
,
is_sparse
=
False
)
for
place
in
get_places
():
loss
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
False
)
loss2
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
False
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss2
[
i
],
rtol
=
5e-5
)
loss3
=
self
.
check_weight_decay
(
place
,
model
,
use_parallel_exe
=
True
,
use_reduce
=
True
)
for
i
in
range
(
len
(
loss
)):
assert
np
.
isclose
(
a
=
loss
[
i
],
b
=
loss3
[
i
],
rtol
=
5e-5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录