Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
1c116462
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2301
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1c116462
编写于
12月 20, 2018
作者:
M
minqiyang
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into imperative_mnist
test=develop
上级
29697c2e
55af1168
变更
79
显示空白变更内容
内联
并排
Showing
79 changed file
with
2033 addition
and
1850 deletion
+2033
-1850
CMakeLists.txt
CMakeLists.txt
+2
-2
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+25
-5
cmake/operators.cmake
cmake/operators.cmake
+1
-1
paddle/fluid/API.spec
paddle/fluid/API.spec
+17
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+1
-6
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+7
-8
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+0
-5
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+0
-1
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
...uid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
+22
-113
paddle/fluid/framework/ngraph_bridge.cc
paddle/fluid/framework/ngraph_bridge.cc
+8
-83
paddle/fluid/framework/ngraph_operator.cc
paddle/fluid/framework/ngraph_operator.cc
+2
-1
paddle/fluid/framework/op_desc.cc
paddle/fluid/framework/op_desc.cc
+108
-24
paddle/fluid/framework/operator.cc
paddle/fluid/framework/operator.cc
+230
-70
paddle/fluid/framework/operator.h
paddle/fluid/framework/operator.h
+46
-16
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+4
-5
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+0
-1
paddle/fluid/framework/shape_inference.cc
paddle/fluid/framework/shape_inference.cc
+0
-98
paddle/fluid/framework/shape_inference.h
paddle/fluid/framework/shape_inference.h
+16
-27
paddle/fluid/framework/type_defs.h
paddle/fluid/framework/type_defs.h
+3
-0
paddle/fluid/imperative/layer.cc
paddle/fluid/imperative/layer.cc
+0
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-3
paddle/fluid/operators/beam_search_decode_op.cc
paddle/fluid/operators/beam_search_decode_op.cc
+2
-1
paddle/fluid/operators/controlflow/parallel_do_op.cc
paddle/fluid/operators/controlflow/parallel_do_op.cc
+0
-426
paddle/fluid/operators/controlflow/while_op.cc
paddle/fluid/operators/controlflow/while_op.cc
+29
-14
paddle/fluid/operators/conv_mkldnn_op.cc
paddle/fluid/operators/conv_mkldnn_op.cc
+14
-8
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
+17
-6
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+10
-1
paddle/fluid/operators/distributed/grpc_serde.cc
paddle/fluid/operators/distributed/grpc_serde.cc
+8
-0
paddle/fluid/operators/distributed/sendrecvop_utils.h
paddle/fluid/operators/distributed/sendrecvop_utils.h
+7
-2
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+1
-1
paddle/fluid/operators/merge_selected_rows_op.cc
paddle/fluid/operators/merge_selected_rows_op.cc
+29
-1
paddle/fluid/operators/ngraph/ngraph_ops.h
paddle/fluid/operators/ngraph/ngraph_ops.h
+25
-0
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
+52
-0
paddle/fluid/operators/ngraph/ops/mul_op.h
paddle/fluid/operators/ngraph/ops/mul_op.h
+134
-0
paddle/fluid/operators/optimizers/adam_op.cc
paddle/fluid/operators/optimizers/adam_op.cc
+5
-0
paddle/fluid/operators/optimizers/adam_op.h
paddle/fluid/operators/optimizers/adam_op.h
+34
-12
paddle/fluid/operators/transpose_mkldnn_op.cc
paddle/fluid/operators/transpose_mkldnn_op.cc
+79
-0
paddle/fluid/operators/transpose_op.cc
paddle/fluid/operators/transpose_op.cc
+47
-2
paddle/fluid/platform/dynload/CMakeLists.txt
paddle/fluid/platform/dynload/CMakeLists.txt
+0
-2
paddle/fluid/platform/dynload/cudnn.h
paddle/fluid/platform/dynload/cudnn.h
+1
-1
paddle/fluid/platform/dynload/dynamic_loader.cc
paddle/fluid/platform/dynload/dynamic_loader.cc
+2
-0
paddle/fluid/platform/dynload/dynamic_loader.h
paddle/fluid/platform/dynload/dynamic_loader.h
+6
-0
paddle/fluid/platform/dynload/mklml.h
paddle/fluid/platform/dynload/mklml.h
+1
-1
paddle/fluid/platform/dynload/tensorrt.h
paddle/fluid/platform/dynload/tensorrt.h
+1
-1
paddle/fluid/platform/dynload/warpctc.h
paddle/fluid/platform/dynload/warpctc.h
+1
-1
paddle/fluid/platform/mkldnn_reuse.h
paddle/fluid/platform/mkldnn_reuse.h
+124
-0
paddle/fluid/platform/ngraph_helper.h
paddle/fluid/platform/ngraph_helper.h
+105
-0
paddle/fluid/platform/port.h
paddle/fluid/platform/port.h
+0
-1
paddle/fluid/pybind/imperative.cc
paddle/fluid/pybind/imperative.cc
+3
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+0
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+6
-6
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+8
-2
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+2
-77
python/paddle/fluid/contrib/__init__.py
python/paddle/fluid/contrib/__init__.py
+3
-0
python/paddle/fluid/contrib/utils/__init__.py
python/paddle/fluid/contrib/utils/__init__.py
+5
-4
python/paddle/fluid/contrib/utils/hdfs_utils.py
python/paddle/fluid/contrib/utils/hdfs_utils.py
+163
-138
python/paddle/fluid/contrib/utils/lookup_table_utils.py
python/paddle/fluid/contrib/utils/lookup_table_utils.py
+125
-58
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+15
-7
python/paddle/fluid/imperative/base.py
python/paddle/fluid/imperative/base.py
+2
-1
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+2
-153
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+45
-0
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+12
-4
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+38
-41
python/paddle/fluid/tests/book/notest_understand_sentiment.py
...on/paddle/fluid/tests/book/notest_understand_sentiment.py
+1
-17
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+2
-15
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+2
-14
python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
.../tests/book_memory_optimization/test_memopt_fit_a_line.py
+0
-87
python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py
...fluid/tests/unittests/ngraph/test_activation_ngraph_op.py
+58
-0
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
...paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
+42
-0
python/paddle/fluid/tests/unittests/test_adam_op.py
python/paddle/fluid/tests/unittests/test_adam_op.py
+32
-20
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
+19
-1
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
.../tests/unittests/test_get_tensor_from_selected_rows_op.py
+1
-1
python/paddle/fluid/tests/unittests/test_imperative.py
python/paddle/fluid/tests/unittests/test_imperative.py
+123
-0
python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
...addle/fluid/tests/unittests/test_merge_selectedrows_op.py
+2
-2
python/paddle/fluid/tests/unittests/test_parallel_op.py
python/paddle/fluid/tests/unittests/test_parallel_op.py
+0
-235
python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py
.../paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py
+76
-0
python/paddle/fluid/tests/unittests/test_transpose_op.py
python/paddle/fluid/tests/unittests/test_transpose_op.py
+11
-2
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+2
-3
python/setup.py.in
python/setup.py.in
+6
-5
未找到文件。
CMakeLists.txt
浏览文件 @
1c116462
...
@@ -208,10 +208,10 @@ include(external/xxhash) # download xxhash
...
@@ -208,10 +208,10 @@ include(external/xxhash) # download xxhash
include
(
external/dlpack
)
include
(
external/dlpack
)
include
(
external/snappy
)
# download snappy
include
(
external/snappy
)
# download snappy
include
(
external/snappystream
)
# download snappystream
include
(
external/snappystream
)
# download snappystream
include
(
external/warpctc
)
# download, build, install warpctc
if
(
NOT WIN32
)
if
(
NOT WIN32
)
# there is no official support of warpctc, nccl, cupti in windows
# there is no official support of nccl, cupti in windows
include
(
external/warpctc
)
# download, build, install warpctc
include
(
cupti
)
include
(
cupti
)
include
(
external/gzstream
)
include
(
external/gzstream
)
endif
(
NOT WIN32
)
endif
(
NOT WIN32
)
...
...
cmake/external/warpctc.cmake
浏览文件 @
1c116462
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
...
@@ -26,25 +26,33 @@ SET(WARPCTC_INCLUDE_DIR "${WARPCTC_INSTALL_DIR}/include"
# Used in unit test test_WarpCTCLayer
# Used in unit test test_WarpCTCLayer
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
SET
(
WARPCTC_LIB_DIR
"
${
WARPCTC_INSTALL_DIR
}
/lib"
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
CACHE PATH
"Warp-ctc Library Directory"
FORCE
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
)
IF
(
CMAKE_CXX_COMPILER_ID STREQUAL
"Clang"
OR CMAKE_CXX_COMPILER_ID STREQUAL
"AppleClang"
OR WIN32
)
SET
(
USE_OMP OFF
)
SET
(
USE_OMP OFF
)
ELSE
()
ELSE
()
SET
(
USE_OMP ON
)
SET
(
USE_OMP ON
)
ENDIF
()
ENDIF
()
IF
(
WIN32
)
SET
(
WARPCTC_REPOSITORY
"https://github.com/wopeizl/warp-ctc.git"
)
ELSE
()
SET
(
WARPCTC_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
)
ENDIF
()
ExternalProject_Add
(
ExternalProject_Add
(
extern_warpctc
extern_warpctc
${
EXTERNAL_PROJECT_LOG_ARGS
}
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/dzhwinter/warp-ctc.git"
GIT_REPOSITORY
${
WARPCTC_REPOSITORY
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
PREFIX
${
WARPCTC_SOURCES_DIR
}
UPDATE_COMMAND
""
UPDATE_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_C_COMPILER=
${
CMAKE_C_COMPILER
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS=
${
CMAKE_C_FLAGS
}
-DCMAKE_C_FLAGS_DEBUG=
${
CMAKE_C_FLAGS_DEBUG
}
-DCMAKE_C_FLAGS_RELEASE=
${
CMAKE_C_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS=
${
CMAKE_CXX_FLAGS
}
-DCMAKE_CXX_FLAGS_RELEASE=
${
CMAKE_CXX_FLAGS_RELEASE
}
-DCMAKE_CXX_FLAGS_DEBUG=
${
CMAKE_CXX_FLAGS_DEBUG
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DCMAKE_INSTALL_PREFIX=
${
WARPCTC_INSTALL_DIR
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_GPU=
${
WITH_GPU
}
-DWITH_OMP=
${
USE_OMP
}
-DWITH_OMP=
${
USE_OMP
}
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
...
@@ -59,6 +67,18 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
-DCMAKE_INSTALL_PREFIX:PATH=
${
WARPCTC_INSTALL_DIR
}
)
)
IF
(
WIN32
)
IF
(
NOT EXISTS
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
)
add_custom_command
(
TARGET extern_warpctc POST_BUILD
COMMAND cmake -E copy
${
WARPCTC_INSTALL_DIR
}
/bin/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
ENDIF
()
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/warpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
else
(
WIN32
)
SET
(
WARPCTC_LIBRARIES
"
${
WARPCTC_INSTALL_DIR
}
/lib/libwarpctc
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
"
CACHE FILEPATH
"Warp-ctc Library"
FORCE
)
ENDIF
(
WIN32
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
MESSAGE
(
STATUS
"warp-ctc library:
${
WARPCTC_LIBRARIES
}
"
)
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
INCLUDE_DIRECTORIES
(
${
WARPCTC_INCLUDE_DIR
}
)
# For warpctc code to include its headers.
...
...
cmake/operators.cmake
浏览文件 @
1c116462
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
...
@@ -84,7 +84,7 @@ function(op_library TARGET)
endif
()
endif
()
if
(
WIN32
)
if
(
WIN32
)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
"warpctc_op"
)
foreach
(
windows_unsupport_op
"nccl_op"
"gen_nccl_id_op"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
if
(
"
${
TARGET
}
"
STREQUAL
"
${
windows_unsupport_op
}
"
)
return
()
return
()
endif
()
endif
()
...
...
paddle/fluid/API.spec
浏览文件 @
1c116462
...
@@ -350,6 +350,22 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
...
@@ -350,6 +350,22 @@ paddle.fluid.contrib.QuantizeTranspiler.__init__ ArgSpec(args=['self', 'weight_b
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.convert_to_int8 ArgSpec(args=['self', 'program', 'place', 'scope'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.freeze_program ArgSpec(args=['self', 'program', 'place', 'fuse_bn', 'scope'], varargs=None, keywords=None, defaults=(False, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.QuantizeTranspiler.training_transpile ArgSpec(args=['self', 'program', 'startup_program'], varargs=None, keywords=None, defaults=(None, None))
paddle.fluid.contrib.load_persistables_for_increment ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var', 'lookup_table_var_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.load_persistables_for_inference ArgSpec(args=['dirname', 'executor', 'program', 'lookup_table_var_name'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.convert_dist_to_sparse_program ArgSpec(args=['program'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.__init__ ArgSpec(args=['self', 'hadoop_home', 'configs'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.delete ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.download ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'unzip'], varargs=None, keywords=None, defaults=(False, False))
paddle.fluid.contrib.HDFSClient.is_dir ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.is_exist ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.contrib.HDFSClient.ls ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.lsr ArgSpec(args=['self', 'hdfs_path', 'only_file', 'sort'], varargs=None, keywords=None, defaults=(True, True))
paddle.fluid.contrib.HDFSClient.make_local_dirs ArgSpec(args=['local_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.makedirs ArgSpec(args=['self', 'hdfs_path'], varargs=None, keywords=None, defaults=None)
paddle.fluid.contrib.HDFSClient.rename ArgSpec(args=['self', 'hdfs_src_path', 'hdfs_dst_path', 'overwrite'], varargs=None, keywords=None, defaults=(False,))
paddle.fluid.contrib.HDFSClient.upload ArgSpec(args=['self', 'hdfs_path', 'local_path', 'overwrite', 'retry_times'], varargs=None, keywords=None, defaults=(False, 5))
paddle.fluid.contrib.multi_download ArgSpec(args=['client', 'hdfs_path', 'local_path', 'trainer_id', 'trainers', 'multi_processes'], varargs=None, keywords=None, defaults=(5,))
paddle.fluid.contrib.multi_upload ArgSpec(args=['client', 'hdfs_path', 'local_path', 'multi_processes', 'overwrite', 'sync'], varargs=None, keywords=None, defaults=(5, False, True))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
paddle.fluid.transpiler.DistributeTranspiler.get_pserver_programs ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None)
...
@@ -376,7 +392,7 @@ paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learnin
...
@@ -376,7 +392,7 @@ paddle.fluid.optimizer.MomentumOptimizer.__init__ ArgSpec(args=['self', 'learnin
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.MomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None))
paddle.fluid.optimizer.AdagradOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(1e-06, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdagradOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'
], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, Non
e))
paddle.fluid.optimizer.AdamOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'
, 'lazy_mode'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None, Fals
e))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'beta1', 'beta2', 'epsilon', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.9, 0.999, 1e-08, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.AdamaxOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
1c116462
...
@@ -131,9 +131,7 @@ std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy(
...
@@ -131,9 +131,7 @@ std::shared_ptr<ir::PassBuilder> BuildStrategy::CreatePassesFromStrategy(
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
BuildStrategy
::
Apply
(
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
{
#else
#else
...
@@ -149,9 +147,6 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -149,9 +147,6 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
SetNotOwned
<
const
std
::
vector
<
platform
::
Place
>>
(
"places"
,
&
places
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
Erase
(
"loss_var_name"
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
SetNotOwned
<
const
std
::
string
>
(
"loss_var_name"
,
&
loss_var_name
);
pass
->
Erase
(
"params"
);
pass
->
SetNotOwned
<
const
std
::
unordered_set
<
std
::
string
>>
(
"params"
,
&
param_names
);
pass
->
Erase
(
"local_scopes"
);
pass
->
Erase
(
"local_scopes"
);
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
pass
->
SetNotOwned
<
const
std
::
vector
<
Scope
*>>
(
"local_scopes"
,
&
local_scopes
);
&
local_scopes
);
...
...
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
1c116462
...
@@ -106,14 +106,13 @@ struct BuildStrategy {
...
@@ -106,14 +106,13 @@ struct BuildStrategy {
// Apply the passes built by the pass_builder_. The passes will be
// Apply the passes built by the pass_builder_. The passes will be
// applied to the Program and output an ir::Graph.
// applied to the Program and output an ir::Graph.
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
Apply
(
const
ProgramDesc
&
main_program
,
const
ProgramDesc
&
main_program
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
string
&
loss_var_name
,
const
std
::
string
&
loss_var_name
,
const
std
::
unordered_set
<
std
::
string
>
&
param_names
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
const
bool
use_cuda
,
platform
::
NCCLContextMap
*
nccl_ctxs
)
const
;
#else
#else
const
bool
use_cuda
)
const
;
const
bool
use_cuda
)
const
;
#endif
#endif
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
1c116462
...
@@ -130,7 +130,6 @@ void AddOutputToLeafOps(ir::Graph *graph) {
...
@@ -130,7 +130,6 @@ void AddOutputToLeafOps(ir::Graph *graph) {
static
const
char
kLossVarName
[]
=
"loss_var_name"
;
static
const
char
kLossVarName
[]
=
"loss_var_name"
;
static
const
char
kPlaces
[]
=
"places"
;
static
const
char
kPlaces
[]
=
"places"
;
static
const
char
kParams
[]
=
"params"
;
static
const
char
kLocalScopes
[]
=
"local_scopes"
;
static
const
char
kLocalScopes
[]
=
"local_scopes"
;
static
const
char
kStrategy
[]
=
"strategy"
;
static
const
char
kStrategy
[]
=
"strategy"
;
static
const
char
kNumTrainers
[]
=
"num_trainers"
;
static
const
char
kNumTrainers
[]
=
"num_trainers"
;
...
@@ -147,9 +146,6 @@ void MultiDevSSAGraphBuilder::Init() const {
...
@@ -147,9 +146,6 @@ void MultiDevSSAGraphBuilder::Init() const {
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
nccl_ctxs_
=
&
Get
<
platform
::
NCCLContextMap
>
(
"nccl_ctxs"
);
#endif
#endif
for
(
auto
&
p
:
Get
<
const
std
::
unordered_set
<
std
::
string
>>
(
kParams
))
{
grad_names_
.
insert
(
GradVarName
(
p
));
}
balance_vars_
.
resize
(
places_
.
size
(),
0
);
balance_vars_
.
resize
(
places_
.
size
(),
0
);
if
(
strategy_
.
enable_data_balance_
&&
places_
.
size
()
==
1
)
{
if
(
strategy_
.
enable_data_balance_
&&
places_
.
size
()
==
1
)
{
LOG
(
WARNING
)
<<
"It is no need to enable data balance when there is only "
LOG
(
WARNING
)
<<
"It is no need to enable data balance when there is only "
...
@@ -896,7 +892,6 @@ REGISTER_PASS(multi_devices_pass,
...
@@ -896,7 +892,6 @@ REGISTER_PASS(multi_devices_pass,
paddle
::
framework
::
details
::
MultiDevSSAGraphBuilder
)
paddle
::
framework
::
details
::
MultiDevSSAGraphBuilder
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLossVarName
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLossVarName
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kPlaces
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kParams
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kLocalScopes
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kStrategy
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kStrategy
)
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kNumTrainers
);
.
RequirePassAttr
(
paddle
::
framework
::
details
::
kNumTrainers
);
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
1c116462
...
@@ -102,7 +102,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -102,7 +102,6 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
mutable
std
::
string
loss_var_name_
;
mutable
std
::
string
loss_var_name_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
platform
::
Place
>
places_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
mutable
std
::
vector
<
Scope
*>
local_scopes_
;
mutable
std
::
unordered_set
<
std
::
string
>
grad_names_
;
mutable
BuildStrategy
strategy_
;
mutable
BuildStrategy
strategy_
;
mutable
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars_
;
mutable
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars_
;
...
...
paddle/fluid/framework/ir/conv_elementwise_add_mkldnn_fuse_pass.cc
浏览文件 @
1c116462
...
@@ -24,35 +24,6 @@ namespace paddle {
...
@@ -24,35 +24,6 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
// The function keeps the graph consistent by replacing
// a node 'from' in the set of inputs nodes
// of the visited node by a node 'to'.
void
CorrectGraphEdges
(
Graph
*
graph
,
Node
*
from
,
Node
*
to
)
{
for
(
auto
&
node
:
GraphTraits
::
DFS
(
*
graph
))
{
auto
from_in_inputs
=
std
::
find
(
std
::
begin
(
node
.
inputs
),
std
::
end
(
node
.
inputs
),
from
);
if
(
from_in_inputs
!=
std
::
end
(
node
.
inputs
))
{
IR_NODE_LINK_TO
(
to
,
(
&
node
));
auto
inputs
=
node
.
Op
()
->
Inputs
();
using
input_type
=
VariableNameMap
::
value_type
;
std
::
for_each
(
std
::
begin
(
inputs
),
std
::
end
(
inputs
),
[
from
,
to
,
&
node
](
const
input_type
&
i
)
->
void
{
auto
param_names
=
i
.
second
;
auto
pi
=
std
::
find
(
std
::
begin
(
param_names
),
std
::
end
(
param_names
),
from
->
Name
());
if
(
pi
!=
std
::
end
(
param_names
))
{
node
.
Op
()
->
SetInput
(
i
.
first
,
{
to
->
Name
()});
}
});
}
}
}
bool
IsReachable
(
ir
::
Graph
*
graph
,
Node
*
from
,
Node
*
to
)
{
bool
IsReachable
(
ir
::
Graph
*
graph
,
Node
*
from
,
Node
*
to
)
{
auto
find_node
=
[](
ir
::
Graph
*
graph
,
const
Node
*
node
)
->
Node
*
{
auto
find_node
=
[](
ir
::
Graph
*
graph
,
const
Node
*
node
)
->
Node
*
{
for
(
auto
n
:
graph
->
Nodes
())
{
for
(
auto
n
:
graph
->
Nodes
())
{
...
@@ -99,24 +70,11 @@ bool IsReachable(ir::Graph* graph, Node* from, Node* to) {
...
@@ -99,24 +70,11 @@ bool IsReachable(ir::Graph* graph, Node* from, Node* to) {
return
false
;
return
false
;
}
}
boost
::
optional
<
Node
*>
HasBias
(
const
Node
&
op
,
const
std
::
string
&
bias_name
)
{
template
<
typename
T
>
auto
bias_input_names
=
op
.
Op
()
->
Inputs
();
boost
::
optional
<
T
>
HasAttribute
(
const
Node
&
op
,
const
std
::
string
&
attr
)
{
auto
bias_it
=
bias_input_names
.
find
(
bias_name
);
if
(
op
.
Op
()
->
HasAttr
(
attr
))
return
boost
::
get
<
T
>
(
op
.
Op
()
->
GetAttr
(
attr
));
if
(
bias_it
!=
std
::
end
(
bias_input_names
))
{
else
bool
has_bias
=
!
bias_it
->
second
.
empty
();
if
(
has_bias
)
{
auto
bias_names
=
bias_it
->
second
;
auto
bias_names_it
=
std
::
find_if
(
std
::
begin
(
op
.
inputs
),
std
::
end
(
op
.
inputs
),
[
&
bias_names
](
Node
*
n
)
->
bool
{
return
n
->
Name
()
==
bias_names
[
0
];
});
return
*
bias_names_it
;
}
}
return
boost
::
none
;
return
boost
::
none
;
}
}
...
@@ -151,40 +109,18 @@ void ResidualConnectionMKLDNNFusePass::IdentityFuseHandle::operator()(
...
@@ -151,40 +109,18 @@ void ResidualConnectionMKLDNNFusePass::IdentityFuseHandle::operator()(
if
(
!
IsReachable
(
graph
,
elementwise_add_identity
,
conv_output
))
return
;
if
(
!
IsReachable
(
graph
,
elementwise_add_identity
,
conv_output
))
return
;
OpDesc
op_desc
;
auto
fuse_relu
=
HasAttribute
<
bool
>
(
*
conv_op
,
"fuse_relu"
)
;
op_desc
.
SetType
(
"conv2d"
)
;
if
(
fuse_relu
&&
*
fuse_relu
)
return
;
op_desc
.
SetInput
(
"Input"
,
{
conv_input
->
Name
()});
conv_op
->
Op
()
->
SetInput
(
"ResidualData"
,
{
elementwise_add_identity
->
Name
()});
op_desc
.
SetInput
(
"Filter"
,
{
conv_filter
->
Name
()});
conv_op
->
Op
()
->
SetOutput
(
"Output"
,
{
elementwise_add_out
->
Name
()});
op_desc
.
SetInput
(
"ResidualData"
,
{
elementwise_add_identity
->
Name
()});
conv_op
->
Op
()
->
SetAttr
(
"fuse_residual_connection"
,
true
);
op_desc
.
SetOutput
(
"Output"
,
{
conv_output
->
Name
()});
auto
conv_bias
=
HasBias
(
*
conv_op
,
"Bias"
);
GraphSafeRemoveNodes
(
graph
,
{
conv_output
,
elementwise_add_op
}
);
if
(
conv_bias
)
{
IR_NODE_LINK_TO
(
elementwise_add_identity
,
conv_op
);
op_desc
.
SetInput
(
"Bias"
,
{(
*
conv_bias
)
->
Name
()});
IR_NODE_LINK_TO
(
conv_op
,
elementwise_add_out
);
}
for
(
const
auto
&
attr
:
conv_op
->
Op
()
->
GetAttrMap
())
{
op_desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
}
op_desc
.
SetAttr
(
"fuse_residual_connection"
,
true
);
auto
fused_conv_op
=
graph
->
CreateOpNode
(
&
op_desc
);
IR_NODE_LINK_TO
(
conv_input
,
fused_conv_op
);
IR_NODE_LINK_TO
(
conv_filter
,
fused_conv_op
);
IR_NODE_LINK_TO
(
elementwise_add_identity
,
fused_conv_op
);
IR_NODE_LINK_TO
(
fused_conv_op
,
conv_output
);
if
(
conv_bias
)
{
IR_NODE_LINK_TO
((
*
conv_bias
),
fused_conv_op
);
}
CorrectGraphEdges
(
graph
,
elementwise_add_out
,
conv_output
);
GraphSafeRemoveNodes
(
graph
,
{
elementwise_add_out
,
conv_op
,
elementwise_add_op
});
(
*
fusion_stats
)
++
;
(
*
fusion_stats
)
++
;
}
}
...
@@ -229,60 +165,33 @@ void ResidualConnectionMKLDNNFusePass::ProjectionFuseHandle::operator()(
...
@@ -229,60 +165,33 @@ void ResidualConnectionMKLDNNFusePass::ProjectionFuseHandle::operator()(
Node
*
projection_node
;
Node
*
projection_node
;
Node
*
residual_conv_op
;
Node
*
residual_conv_op
;
Node
*
residual_conv_input
;
Node
*
residual_conv_filter
;
Node
*
residual_conv_output
;
Node
*
residual_conv_output
;
if
(
IsReachable
(
graph
,
conv_x_input
,
conv_y_output
))
{
if
(
IsReachable
(
graph
,
conv_x_input
,
conv_y_output
))
{
projection_node
=
conv_x_output
;
projection_node
=
conv_x_output
;
residual_conv_op
=
conv_y_op
;
residual_conv_op
=
conv_y_op
;
residual_conv_input
=
conv_y_input
;
residual_conv_filter
=
conv_y_filter
;
residual_conv_output
=
conv_y_output
;
residual_conv_output
=
conv_y_output
;
}
else
if
(
IsReachable
(
graph
,
conv_y_input
,
conv_x_output
))
{
}
else
if
(
IsReachable
(
graph
,
conv_y_input
,
conv_x_output
))
{
projection_node
=
conv_y_output
;
projection_node
=
conv_y_output
;
residual_conv_op
=
conv_x_op
;
residual_conv_op
=
conv_x_op
;
residual_conv_input
=
conv_x_input
;
residual_conv_filter
=
conv_x_filter
;
residual_conv_output
=
conv_x_output
;
residual_conv_output
=
conv_x_output
;
}
else
{
}
else
{
return
;
return
;
}
}
OpDesc
op_desc
;
auto
fuse_relu
=
HasAttribute
<
bool
>
(
*
residual_conv_op
,
"fuse_relu"
)
;
op_desc
.
SetType
(
"conv2d"
)
;
if
(
fuse_relu
&&
*
fuse_relu
)
return
;
op_desc
.
SetInput
(
"Input"
,
{
residual_conv_input
->
Name
()});
residual_conv_op
->
Op
()
->
SetInput
(
"ResidualData"
,
{
projection_node
->
Name
()});
op_desc
.
SetInput
(
"Filter"
,
{
residual_conv_filter
->
Name
()});
residual_conv_op
->
Op
()
->
SetOutput
(
"Output"
,
{
elementwise_add_out
->
Name
()});
op_desc
.
SetInput
(
"ResidualData"
,
{
projection_node
->
Name
()});
op_desc
.
SetOutput
(
"Output"
,
{
residual_conv_output
->
Name
()});
auto
residual_conv_bias
=
HasBias
(
*
residual_conv_op
,
"Bias"
);
residual_conv_op
->
Op
()
->
SetAttr
(
"fuse_residual_connection"
,
true
);
if
(
residual_conv_bias
)
{
GraphSafeRemoveNodes
(
graph
,
{
residual_conv_output
,
elementwise_add_op
});
op_desc
.
SetInput
(
"Bias"
,
{(
*
residual_conv_bias
)
->
Name
()});
}
for
(
const
auto
&
attr
:
residual_conv_op
->
Op
()
->
GetAttrMap
())
{
op_desc
.
SetAttr
(
attr
.
first
,
attr
.
second
);
}
op_desc
.
SetAttr
(
"fuse_residual_connection"
,
true
);
auto
fused_conv_op
=
graph
->
CreateOpNode
(
&
op_desc
);
IR_NODE_LINK_TO
(
projection_node
,
residual_conv_op
);
IR_NODE_LINK_TO
(
residual_conv_op
,
elementwise_add_out
);
IR_NODE_LINK_TO
(
residual_conv_input
,
fused_conv_op
);
IR_NODE_LINK_TO
(
residual_conv_filter
,
fused_conv_op
);
IR_NODE_LINK_TO
(
projection_node
,
fused_conv_op
);
IR_NODE_LINK_TO
(
fused_conv_op
,
residual_conv_output
);
if
(
residual_conv_bias
)
{
IR_NODE_LINK_TO
((
*
residual_conv_bias
),
fused_conv_op
);
}
CorrectGraphEdges
(
graph
,
elementwise_add_out
,
residual_conv_output
);
GraphSafeRemoveNodes
(
graph
,
{
elementwise_add_out
,
residual_conv_op
,
elementwise_add_op
});
(
*
fusion_stats
)
++
;
(
*
fusion_stats
)
++
;
}
}
...
...
paddle/fluid/framework/ngraph_bridge.cc
浏览文件 @
1c116462
...
@@ -16,100 +16,25 @@ limitations under the License. */
...
@@ -16,100 +16,25 @@ limitations under the License. */
#include <functional>
#include <functional>
#include <vector>
#include <vector>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/ngraph_bridge.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/ngraph/ngraph_ops.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/enforce.h"
#include "paddle/fluid/platform/ngraph_helper.h"
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
const
VariableNameMap
&
var_map
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
var_map
.
at
(
name
);
PADDLE_ENFORCE_EQ
(
var_names
.
size
(),
1
,
"op %s name %s expects one associated var"
,
op
->
Type
(),
name
);
if
(
ngb_node_map
->
find
(
var_names
[
0
])
!=
ngb_node_map
->
end
())
{
return
(
*
ngb_node_map
)[
var_names
[
0
]];
}
else
{
return
nullptr
;
}
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetInputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
name
,
op
->
Inputs
(),
ngb_node_map
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetOutputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
name
,
op
->
Outputs
(),
ngb_node_map
);
}
static
void
SetOutputNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
,
std
::
shared_ptr
<
ngraph
::
Node
>
node
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
op
->
Outputs
().
at
(
name
);
if
(
var_names
.
size
()
==
1
)
{
(
*
ngb_node_map
)[
var_names
[
0
]]
=
node
;
}
else
if
(
var_names
.
size
()
==
0
)
{
(
*
ngb_node_map
)[
""
]
=
node
;
}
else
{
PADDLE_THROW
(
"name %s has more than 1 var_names."
,
name
);
}
}
static
bool
HasOutput
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
const
std
::
string
name
)
{
auto
&
outputs
=
op
->
Outputs
();
if
(
outputs
.
find
(
name
)
==
outputs
.
end
())
return
false
;
return
outputs
.
at
(
name
).
size
()
>
0
;
}
template
<
typename
T
>
static
void
BuildBinaryNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
x
=
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
x
,
y
);
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
template
<
typename
T
>
static
void
BuildUnaryNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
input
=
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
input
);
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
std
::
map
<
std
::
string
,
std
::
map
<
std
::
string
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
function
<
void
(
const
std
::
shared_ptr
<
OperatorBase
>&
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
)
>>
NgraphBridge
::
NG_NODE_MAP
=
{{
"relu"
,
BuildUnaryNode
<
ngraph
::
op
::
Relu
>
},
NgraphBridge
::
NG_NODE_MAP
=
{
{
"tanh"
,
BuildUnaryNode
<
ngraph
::
op
::
Tanh
>
}};
{
"mul"
,
paddle
::
operators
::
ngraphs
::
BuildMulNode
},
{
"mul_grad"
,
paddle
::
operators
::
ngraphs
::
BuildMulGradNode
},
{
"relu"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Relu
>
},
{
"tanh"
,
paddle
::
operators
::
ngraphs
::
BuildUnaryNode
<
ngraph
::
op
::
Tanh
>
}};
void
NgraphBridge
::
BuildNgNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
void
NgraphBridge
::
BuildNgNode
(
const
std
::
shared_ptr
<
OperatorBase
>&
op
)
{
auto
&
op_type
=
op
->
Type
();
auto
&
op_type
=
op
->
Type
();
...
...
paddle/fluid/framework/ngraph_operator.cc
浏览文件 @
1c116462
...
@@ -278,7 +278,8 @@ std::shared_ptr<ngraph::runtime::Backend> NgraphEngine::backend_ =
...
@@ -278,7 +278,8 @@ std::shared_ptr<ngraph::runtime::Backend> NgraphEngine::backend_ =
ngraph
::
runtime
::
Backend
::
create
(
"CPU"
);
ngraph
::
runtime
::
Backend
::
create
(
"CPU"
);
void
NgraphEngine
::
GetNgInputShape
(
std
::
shared_ptr
<
OperatorBase
>
op
)
{
void
NgraphEngine
::
GetNgInputShape
(
std
::
shared_ptr
<
OperatorBase
>
op
)
{
op
->
RuntimeInferShape
(
scope_
,
place_
);
RuntimeContext
ctx
(
op
->
Inputs
(),
op
->
Outputs
(),
scope_
);
op
->
RuntimeInferShape
(
scope_
,
place_
,
ctx
);
for
(
auto
&
var_name_item
:
op
->
Inputs
())
{
for
(
auto
&
var_name_item
:
op
->
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
auto
*
var
=
scope_
.
FindVar
(
var_name
);
auto
*
var
=
scope_
.
FindVar
(
var_name
);
...
...
paddle/fluid/framework/op_desc.cc
浏览文件 @
1c116462
...
@@ -110,22 +110,125 @@ class CompileTimeInferShapeContext : public InferShapeContext {
...
@@ -110,22 +110,125 @@ class CompileTimeInferShapeContext : public InferShapeContext {
}
}
}
}
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Inputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
});
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Outputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
});
return
res
;
}
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
return
this
->
GetDim
(
arg_names
[
0
]);
}
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
return
GetDims
(
arg_names
);
}
bool
IsRuntime
()
const
override
;
bool
IsRuntime
()
const
override
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
Inputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
Outputs
(
name
));
}
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
auto
&
arg_names
=
Outputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
SetDim
(
arg_names
[
0
],
dim
);
}
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
override
{
auto
&
names
=
Outputs
(
name
);
SetDims
(
names
,
dims
);
}
protected:
protected:
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
override
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
CompileTimeInferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
;
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
;
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
;
DDim
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
DDim
res
;
try
{
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
return
res
;
}
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetDim
(
name
);
});
return
ret
;
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
);
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
)
{
size_t
length
=
names
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
SetDim
(
names
[
i
],
dims
[
i
]);
}
}
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
;
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
;
void
SetRepeatedDims
(
const
std
::
string
&
name
,
void
SetRepeatedDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
override
;
const
std
::
vector
<
DDim
>
&
dims
)
override
;
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
override
;
const
OpDesc
&
op_
;
const
OpDesc
&
op_
;
const
BlockDesc
&
block_
;
const
BlockDesc
&
block_
;
};
};
...
@@ -644,20 +747,6 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
...
@@ -644,20 +747,6 @@ const std::vector<std::string> &CompileTimeInferShapeContext::Outputs(
return
op_
.
Output
(
name
);
return
op_
.
Output
(
name
);
}
}
DDim
CompileTimeInferShapeContext
::
GetDim
(
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
PADDLE_ENFORCE
(
var
!=
nullptr
,
"Cannot find variable %s"
,
name
);
DDim
res
;
try
{
auto
shape
=
var
->
GetShape
();
res
=
shape
.
empty
()
?
make_ddim
({
0UL
})
:
make_ddim
(
shape
);
}
catch
(...)
{
VLOG
(
5
)
<<
"GetDim of variable "
<<
name
<<
" error"
;
std
::
rethrow_exception
(
std
::
current_exception
());
}
return
res
;
}
std
::
vector
<
DDim
>
CompileTimeInferShapeContext
::
GetRepeatedDims
(
std
::
vector
<
DDim
>
CompileTimeInferShapeContext
::
GetRepeatedDims
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
auto
var
=
block_
.
FindVarRecursive
(
name
);
auto
var
=
block_
.
FindVarRecursive
(
name
);
...
@@ -696,10 +785,5 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType(
...
@@ -696,10 +785,5 @@ proto::VarType::Type CompileTimeInferShapeContext::GetVarType(
return
block_
.
FindVarRecursive
(
name
)
->
GetType
();
return
block_
.
FindVarRecursive
(
name
)
->
GetType
();
}
}
InferShapeVarPtr
CompileTimeInferShapeContext
::
GetVarPtr
(
const
std
::
string
&
name
)
{
return
block_
.
FindVarRecursive
(
name
);
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/operator.cc
浏览文件 @
1c116462
...
@@ -137,6 +137,25 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
...
@@ -137,6 +137,25 @@ static LoD GetLoD(const Scope& scope, const std::string& name) {
}
}
}
}
RuntimeContext
::
RuntimeContext
(
const
VariableNameMap
&
innames
,
const
VariableNameMap
&
outnames
,
const
Scope
&
scope
)
{
for
(
auto
&
var_name_item
:
innames
)
{
std
::
vector
<
Variable
*>&
input_vars
=
inputs
[
var_name_item
.
first
];
input_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
input_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
for
(
auto
&
var_name_item
:
outnames
)
{
std
::
vector
<
Variable
*>&
output_vars
=
outputs
[
var_name_item
.
first
];
output_vars
.
reserve
(
var_name_item
.
second
.
size
());
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
output_vars
.
push_back
(
scope
.
FindVar
(
var_name
));
}
}
}
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
void
OperatorBase
::
Run
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
{
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
VLOG
(
4
)
<<
place
<<
" "
<<
DebugStringEx
(
&
scope
);
if
(
platform
::
is_gpu_place
(
place
))
{
if
(
platform
::
is_gpu_place
(
place
))
{
...
@@ -412,11 +431,48 @@ bool ExecutionContext::HasOutput(const std::string& name) const {
...
@@ -412,11 +431,48 @@ bool ExecutionContext::HasOutput(const std::string& name) const {
return
var
!=
nullptr
;
return
var
!=
nullptr
;
}
}
const
Variable
*
ExecutionContext
::
InputVar
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
inputs
.
find
(
name
);
if
(
it
==
ctx_
.
inputs
.
end
())
return
nullptr
;
PADDLE_ENFORCE_LE
(
it
->
second
.
size
(),
1UL
,
"Operator %s's input %s should contain only one variable."
,
op_
.
Type
(),
name
);
return
it
->
second
.
empty
()
?
nullptr
:
it
->
second
[
0
];
}
const
Variable
*
ExecutionContext
::
LegacyInputVar
(
const
std
::
string
&
name
)
const
{
auto
ipt
=
op_
.
Input
(
name
);
return
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
}
Variable
*
ExecutionContext
::
OutputVar
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
outputs
.
find
(
name
);
if
(
it
==
ctx_
.
outputs
.
end
())
return
nullptr
;
PADDLE_ENFORCE_LE
(
it
->
second
.
size
(),
1UL
,
"Operator %s's output %s should contain only one variable."
,
op_
.
Type
(),
name
);
return
it
->
second
.
empty
()
?
nullptr
:
it
->
second
[
0
];
}
Variable
*
ExecutionContext
::
LegacyOutputVar
(
const
std
::
string
&
name
)
const
{
auto
opt
=
op_
.
Output
(
name
);
return
opt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
opt
);
}
template
<
>
template
<
>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
Input
<
LoDTensor
>
(
name
);
return
Input
<
LoDTensor
>
(
name
);
}
}
template
<
>
const
Tensor
*
ExecutionContext
::
LegacyInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
LegacyInput
<
LoDTensor
>
(
name
);
}
template
<
>
template
<
>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
...
@@ -441,6 +497,11 @@ Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
...
@@ -441,6 +497,11 @@ Tensor* ExecutionContext::Output<Tensor>(const std::string& name) const {
return
Output
<
LoDTensor
>
(
name
);
return
Output
<
LoDTensor
>
(
name
);
}
}
template
<
>
Tensor
*
ExecutionContext
::
LegacyOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
return
LegacyOutput
<
LoDTensor
>
(
name
);
}
template
<
>
template
<
>
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
...
@@ -477,51 +538,48 @@ bool OpSupportGPU(const std::string& op_type) {
...
@@ -477,51 +538,48 @@ bool OpSupportGPU(const std::string& op_type) {
class
RuntimeInferShapeContext
:
public
InferShapeContext
{
class
RuntimeInferShapeContext
:
public
InferShapeContext
{
public:
public:
RuntimeInferShapeContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
)
RuntimeInferShapeContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
:
op_
(
op
),
scope_
(
scope
)
{}
const
RuntimeContext
&
ctx
)
:
op_
(
op
),
scope_
(
scope
),
ctx_
(
ctx
)
{}
bool
HasInput
(
const
std
::
string
&
name
)
const
override
{
bool
HasInput
(
const
std
::
string
&
name
)
const
override
{
// has only one input
// has only one input
const
auto
&
ins
=
op_
.
Inputs
()
;
const
auto
&
ins
=
ctx_
.
inputs
;
auto
it
=
ins
.
find
(
name
);
auto
it
=
ins
.
find
(
name
);
if
(
it
==
ins
.
end
())
{
if
(
it
==
ins
.
end
())
{
return
false
;
return
false
;
}
}
const
auto
&
in
=
it
->
second
;
const
auto
&
in
=
it
->
second
;
if
(
in
.
size
()
==
0
||
in
[
0
]
==
kEmptyVarName
)
{
if
(
in
.
size
()
==
0
)
return
false
;
return
false
;
}
PADDLE_ENFORCE_EQ
(
in
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
in
.
size
(),
1UL
,
"Input %s should not have more than one inputs"
,
name
);
"Input %s should not have more than one inputs"
,
name
);
return
scope_
.
FindVar
(
in
[
0
])
!=
nullptr
;
return
in
[
0
]
!=
nullptr
;
}
}
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
bool
HasOutput
(
const
std
::
string
&
name
)
const
override
{
// has only one output
// has only one output
const
auto
&
outs
=
op_
.
Outputs
()
;
const
auto
&
outs
=
ctx_
.
outputs
;
auto
it
=
outs
.
find
(
name
);
auto
it
=
outs
.
find
(
name
);
if
(
it
==
outs
.
end
())
{
if
(
it
==
outs
.
end
())
{
return
false
;
return
false
;
}
}
const
auto
&
out
=
it
->
second
;
const
auto
&
out
=
it
->
second
;
if
(
out
.
size
()
==
0
||
out
[
0
]
==
kEmptyVarName
)
{
if
(
out
.
size
()
==
0
)
{
return
false
;
return
false
;
}
}
PADDLE_ENFORCE_EQ
(
out
.
size
(),
1UL
,
PADDLE_ENFORCE_EQ
(
out
.
size
(),
1UL
,
"Output %s should not have more than one outputs"
,
name
);
"Output %s should not have more than one outputs"
,
name
);
return
scope_
.
FindVar
(
out
[
0
])
!=
nullptr
;
return
out
[
0
]
!=
nullptr
;
}
}
bool
HasInputs
(
const
std
::
string
&
name
)
const
override
{
bool
HasInputs
(
const
std
::
string
&
name
)
const
override
{
if
(
!
op_
.
HasInputs
(
name
))
{
const
auto
&
ins
=
ctx_
.
inputs
;
return
false
;
auto
it
=
ins
.
find
(
name
);
}
if
(
it
==
ins
.
end
()
||
it
->
second
.
empty
())
{
auto
inputs
=
op_
.
Inputs
(
name
);
if
(
inputs
.
empty
())
{
return
false
;
return
false
;
}
}
for
(
auto
&
input
:
i
nputs
)
{
for
(
auto
&
input
:
i
t
->
second
)
{
if
(
scope_
.
FindVar
(
input
)
==
nullptr
)
{
if
(
input
==
nullptr
)
{
return
false
;
return
false
;
}
}
}
}
...
@@ -529,15 +587,13 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -529,15 +587,13 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
}
bool
HasOutputs
(
const
std
::
string
&
name
)
const
override
{
bool
HasOutputs
(
const
std
::
string
&
name
)
const
override
{
if
(
!
op_
.
HasOutputs
(
name
))
{
const
auto
&
outs
=
ctx_
.
outputs
;
return
false
;
auto
it
=
outs
.
find
(
name
);
}
if
(
it
==
outs
.
end
()
||
it
->
second
.
empty
())
{
auto
outputs
=
op_
.
Outputs
(
name
);
if
(
outputs
.
empty
())
{
return
false
;
return
false
;
}
}
for
(
auto
&
output
:
outputs
)
{
for
(
auto
&
output
:
it
->
second
)
{
if
(
scope_
.
FindVar
(
output
)
==
nullptr
)
{
if
(
output
==
nullptr
)
{
return
false
;
return
false
;
}
}
}
}
...
@@ -558,16 +614,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -558,16 +614,18 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
ShareDim
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
void
ShareDim
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
size_t
j
=
0
)
override
{
size_t
j
=
0
)
override
{
PADDLE_ENFORCE_LT
(
i
,
Inputs
(
in
).
size
());
auto
in_it
=
ctx_
.
inputs
.
find
(
in
);
PADDLE_ENFORCE_LT
(
j
,
Outputs
(
out
).
size
());
auto
out_it
=
ctx_
.
outputs
.
find
(
out
);
const
std
::
string
&
input_n
=
Inputs
(
in
)[
i
];
PADDLE_ENFORCE
(
in_it
!=
ctx_
.
inputs
.
end
()
&&
in_it
->
second
.
size
()
>
i
,
const
std
::
string
&
output_n
=
Outputs
(
out
)[
j
];
"Inputs %s should have %llu argument"
,
in
,
i
);
PADDLE_ENFORCE
(
out_it
!=
ctx_
.
outputs
.
end
()
&&
out_it
->
second
.
size
()
>
j
,
"Outputs %s should have %llu argument"
,
out
,
j
);
Variable
*
in_var
=
in_it
->
second
[
i
];
Variable
*
out_var
=
out_it
->
second
[
j
];
Variable
*
in_var
=
scope_
.
FindVar
(
input_n
);
Variable
*
out_var
=
scope_
.
FindVar
(
output_n
);
PADDLE_ENFORCE
(
in_var
->
Type
()
==
out_var
->
Type
(),
PADDLE_ENFORCE
(
in_var
->
Type
()
==
out_var
->
Type
(),
"The type of %s and %s is not the same."
,
output_n
,
"The type of %s and %s is not the same."
,
in
,
out
);
GetDim
(
input_n
));
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
in_var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
&
in_sele_rows
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
auto
&
in_sele_rows
=
in_var
->
Get
<
framework
::
SelectedRows
>
();
...
@@ -588,13 +646,16 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -588,13 +646,16 @@ class RuntimeInferShapeContext : public InferShapeContext {
void
ShareLoD
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
void
ShareLoD
(
const
std
::
string
&
in
,
const
std
::
string
&
out
,
size_t
i
=
0
,
size_t
j
=
0
)
const
override
{
size_t
j
=
0
)
const
override
{
const
std
::
vector
<
std
::
string
>&
inputs
=
Inputs
(
in
);
auto
in_it
=
ctx_
.
inputs
.
find
(
in
);
const
std
::
vector
<
std
::
string
>&
outputs
=
Outputs
(
out
);
auto
out_it
=
ctx_
.
outputs
.
find
(
out
);
PADDLE_ENFORCE_LT
(
i
,
inputs
.
size
());
PADDLE_ENFORCE
(
in_it
!=
ctx_
.
inputs
.
end
()
&&
in_it
->
second
.
size
()
>
i
,
PADDLE_ENFORCE_LT
(
j
,
outputs
.
size
());
"Inputs %s should have %llu argument"
,
in
,
i
);
Variable
*
in_var
=
scope_
.
FindVar
(
inputs
.
at
(
i
));
PADDLE_ENFORCE
(
out_it
!=
ctx_
.
outputs
.
end
()
&&
out_it
->
second
.
size
()
>
j
,
"Outputs %s should have %llu argument"
,
out
,
j
);
Variable
*
in_var
=
in_it
->
second
.
at
(
i
);
if
(
!
in_var
->
IsType
<
LoDTensor
>
())
return
;
if
(
!
in_var
->
IsType
<
LoDTensor
>
())
return
;
Variable
*
out_var
=
scope_
.
FindVar
(
outputs
.
at
(
j
)
);
Variable
*
out_var
=
out_it
->
second
.
at
(
j
);
PADDLE_ENFORCE
(
out_var
->
IsType
<
LoDTensor
>
(),
PADDLE_ENFORCE
(
out_var
->
IsType
<
LoDTensor
>
(),
"The %d-th output of Output(%s) must be LoDTensor."
,
j
,
out
);
"The %d-th output of Output(%s) must be LoDTensor."
,
j
,
out
);
auto
in_tensor
=
in_var
->
Get
<
LoDTensor
>
();
auto
in_tensor
=
in_var
->
Get
<
LoDTensor
>
();
...
@@ -629,9 +690,64 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -629,9 +690,64 @@ class RuntimeInferShapeContext : public InferShapeContext {
bool
IsRuntime
()
const
override
{
return
true
;
}
bool
IsRuntime
()
const
override
{
return
true
;
}
// TODO(paddle-dev): Can this be template?
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
vars
.
size
());
res
.
insert
(
res
.
begin
(),
vars
.
begin
(),
vars
.
end
());
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
override
{
const
std
::
vector
<
Variable
*>&
vars
=
OutputVars
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
vars
.
size
());
res
.
insert
(
res
.
begin
(),
vars
.
begin
(),
vars
.
end
());
return
res
;
}
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
PADDLE_ENFORCE_EQ
(
vars
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
vars
.
size
());
return
this
->
GetDim
(
vars
[
0
]);
}
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
override
{
const
std
::
vector
<
Variable
*>&
vars
=
InputVars
(
name
);
return
GetDims
(
vars
);
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
InputVars
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
override
{
return
GetVarTypes
(
OutputVars
(
name
));
}
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
auto
&
vars
=
OutputVars
(
name
);
PADDLE_ENFORCE_EQ
(
vars
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
vars
.
size
());
SetDim
(
vars
[
0
],
dim
);
}
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>&
dims
)
override
{
auto
&
vars
=
OutputVars
(
name
);
SetDims
(
vars
,
dims
);
}
protected:
protected:
DDim
GetDim
(
const
std
::
string
&
name
)
const
override
{
DDim
GetDim
(
Variable
*
var
)
const
{
Variable
*
var
=
scope_
.
FindVar
(
name
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
PADDLE_ENFORCE_NOT_NULL
(
var
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
return
var
->
Get
<
LoDTensor
>
().
dims
();
return
var
->
Get
<
LoDTensor
>
().
dims
();
...
@@ -639,25 +755,44 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -639,25 +755,44 @@ class RuntimeInferShapeContext : public InferShapeContext {
return
var
->
Get
<
SelectedRows
>
().
GetCompleteDims
();
return
var
->
Get
<
SelectedRows
>
().
GetCompleteDims
();
}
else
{
}
else
{
PADDLE_THROW
(
PADDLE_THROW
(
"Only LoDTensor/SelectedRows support 'GetDim', but Variable
%s'
s "
"Only LoDTensor/SelectedRows support 'GetDim', but Variables "
"type_id is %s."
,
"type_id is %s."
,
name
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
}
}
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
Variable
*>&
vars
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
vars
.
size
());
std
::
transform
(
vars
.
begin
(),
vars
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
Variable
*
var
)
{
return
this
->
GetDim
(
var
);
});
return
ret
;
}
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
{
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
override
{
PADDLE_THROW
(
"Only compile time support this method"
);
PADDLE_THROW
(
"Only compile time support this method"
);
}
}
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
override
{
void
SetDim
(
Variable
*
var
,
const
DDim
&
dim
)
{
Variable
*
var
=
scope_
.
FindVar
(
name
);
if
(
var
->
IsType
<
LoDTensor
>
())
{
if
(
var
->
IsType
<
LoDTensor
>
())
{
var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
dim
);
var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
dim
);
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
}
else
if
(
var
->
IsType
<
SelectedRows
>
())
{
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
var
->
GetMutable
<
SelectedRows
>
()
->
set_height
(
dim
[
0
]);
}
else
{
}
else
{
PADDLE_THROW
(
"Variable %s type_id %s, expect LoDTensor/SelectedRows."
,
PADDLE_THROW
(
"Variable type_id %s, expect LoDTensor/SelectedRows."
,
name
,
var
->
Type
().
name
());
var
->
Type
().
name
());
}
}
void
SetDims
(
const
std
::
vector
<
Variable
*>&
vars
,
const
std
::
vector
<
DDim
>&
dims
)
{
size_t
length
=
vars
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
vars
[
i
]
==
nullptr
)
{
continue
;
}
SetDim
(
vars
[
i
],
dims
[
i
]);
}
}
}
}
...
@@ -666,18 +801,39 @@ class RuntimeInferShapeContext : public InferShapeContext {
...
@@ -666,18 +801,39 @@ class RuntimeInferShapeContext : public InferShapeContext {
PADDLE_THROW
(
"Only compile time support this method"
);
PADDLE_THROW
(
"Only compile time support this method"
);
}
}
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
override
{
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
auto
*
var
=
scope_
.
FindVar
(
name
);
const
std
::
vector
<
Variable
*>&
vars
)
const
{
return
ToVarType
(
var
->
Type
());
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
vars
.
size
());
std
::
transform
(
vars
.
begin
(),
vars
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
RuntimeInferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
}
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
override
{
proto
::
VarType
::
Type
GetVarType
(
Variable
*
var
)
const
{
return
scope_
.
FindVar
(
name
);
return
ToVarType
(
var
->
Type
()
);
}
}
private:
private:
const
std
::
vector
<
Variable
*>&
InputVars
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
inputs
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
ctx_
.
inputs
.
end
(),
"Operator %s does not have the input %s."
,
op_
.
Type
(),
name
);
return
it
->
second
;
}
const
std
::
vector
<
Variable
*>&
OutputVars
(
const
std
::
string
&
name
)
const
{
auto
it
=
ctx_
.
outputs
.
find
(
name
);
PADDLE_ENFORCE
(
it
!=
ctx_
.
outputs
.
end
(),
"Operator %s does not have the outputs %s."
,
op_
.
Type
(),
name
);
return
it
->
second
;
}
const
OperatorBase
&
op_
;
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
Scope
&
scope_
;
const
RuntimeContext
&
ctx_
;
};
};
static
void
CheckTensorNANOrInf
(
const
std
::
string
&
name
,
static
void
CheckTensorNANOrInf
(
const
std
::
string
&
name
,
...
@@ -696,15 +852,15 @@ static void CheckTensorNANOrInf(const std::string& name,
...
@@ -696,15 +852,15 @@ static void CheckTensorNANOrInf(const std::string& name,
}
}
void
OperatorWithKernel
::
RuntimeInferShape
(
const
Scope
&
scope
,
void
OperatorWithKernel
::
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
const
platform
::
Place
&
place
,
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
const
RuntimeContext
&
ctx
)
const
{
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
,
ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
}
}
void
OperatorWithKernel
::
RunImpl
(
const
Scope
&
scope
,
void
OperatorWithKernel
::
RunImpl
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{
const
platform
::
Place
&
place
)
const
{
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
scope
);
RuntimeContext
ctx
(
Inputs
(),
Outputs
(),
scope
);
this
->
InferShape
(
&
infer_shape_ctx
);
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
*
dev_ctx
=
pool
.
Get
(
place
);
auto
*
dev_ctx
=
pool
.
Get
(
place
);
...
@@ -718,15 +874,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -718,15 +874,8 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
OpKernelMap
&
kernels
=
kernels_iter
->
second
;
// TODO(dzhwinter) : kernel fallback mechanism will be added when all the
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
// transform functions are ready.
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
,
ctx
));
// for (auto& candidate : kKernelPriority) {
// Do selection
// }
auto
expected_kernel_key
=
this
->
GetExpectedKernelType
(
ExecutionContext
(
*
this
,
scope
,
*
dev_ctx
));
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
VLOG
(
3
)
<<
"expected_kernel_key:"
<<
expected_kernel_key
;
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
auto
kernel_iter
=
kernels
.
find
(
expected_kernel_key
);
...
@@ -748,7 +897,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -748,7 +897,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
// do data transformScope &transfer_scope;
// do data transformScope &transfer_scope;
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
std
::
vector
<
std
::
string
>
transfered_inplace_vars
;
auto
*
transfer_scope
=
auto
*
transfer_scope
=
TryTransferData
(
scope
,
expected_kernel_key
,
&
transfered_inplace_vars
);
PrepareData
(
scope
,
expected_kernel_key
,
&
transfered_inplace_vars
,
&
ctx
);
// exec scope is the scope that kernel actually executed on.
// exec scope is the scope that kernel actually executed on.
const
Scope
&
exec_scope
=
const
Scope
&
exec_scope
=
...
@@ -758,7 +907,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -758,7 +907,11 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
dev_ctx
=
pool
.
Get
(
expected_kernel_key
.
place_
);
dev_ctx
=
pool
.
Get
(
expected_kernel_key
.
place_
);
}
}
kernel_iter
->
second
(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
));
RuntimeInferShapeContext
infer_shape_ctx
(
*
this
,
exec_scope
,
ctx
);
this
->
InferShape
(
&
infer_shape_ctx
);
// TODO(panyx0718): ExecutionContext should only depend on RuntimeContext
// not Scope. Imperative mode only pass inputs and get outputs.
kernel_iter
->
second
(
ExecutionContext
(
*
this
,
exec_scope
,
*
dev_ctx
,
ctx
));
if
(
!
transfered_inplace_vars
.
empty
())
{
if
(
!
transfered_inplace_vars
.
empty
())
{
// there is inplace variable has been transfered.
// there is inplace variable has been transfered.
...
@@ -782,6 +935,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
...
@@ -782,6 +935,7 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
}
}
}
}
}
void
OperatorWithKernel
::
TransferInplaceVarsBack
(
void
OperatorWithKernel
::
TransferInplaceVarsBack
(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
const
Scope
&
transfer_scope
)
const
{
const
Scope
&
transfer_scope
)
const
{
...
@@ -797,13 +951,18 @@ void OperatorWithKernel::TransferInplaceVarsBack(
...
@@ -797,13 +951,18 @@ void OperatorWithKernel::TransferInplaceVarsBack(
}
}
}
}
Scope
*
OperatorWithKernel
::
TryTransfer
Data
(
Scope
*
OperatorWithKernel
::
Prepare
Data
(
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
)
const
{
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
{
Scope
*
new_scope
=
nullptr
;
Scope
*
new_scope
=
nullptr
;
for
(
auto
&
var_name_item
:
Inputs
())
{
for
(
auto
&
var_name_item
:
Inputs
())
{
for
(
auto
&
var_name
:
var_name_item
.
second
)
{
std
::
vector
<
Variable
*>&
input_vars
=
ctx
->
inputs
[
var_name_item
.
first
];
auto
*
var
=
scope
.
FindVar
(
var_name
);
for
(
size_t
i
=
0
;
i
<
var_name_item
.
second
.
size
();
++
i
)
{
auto
&
var_name
=
var_name_item
.
second
[
i
];
auto
*
var
=
input_vars
[
i
];
// Only tensor can be tranfer to another device.
// Only tensor can be tranfer to another device.
if
(
var
==
nullptr
||
!
VarIsTensor
(
*
var
))
{
if
(
var
==
nullptr
||
!
VarIsTensor
(
*
var
))
{
continue
;
continue
;
...
@@ -851,6 +1010,7 @@ Scope* OperatorWithKernel::TryTransferData(
...
@@ -851,6 +1010,7 @@ Scope* OperatorWithKernel::TryTransferData(
}
}
auto
*
trans_var
=
new_scope
->
Var
(
var_name
);
auto
*
trans_var
=
new_scope
->
Var
(
var_name
);
input_vars
[
i
]
=
trans_var
;
Tensor
out
;
Tensor
out
;
TransformData
(
expected_kernel_key
,
kernel_type_for_var
,
*
tensor_in
,
&
out
);
TransformData
(
expected_kernel_key
,
kernel_type_for_var
,
*
tensor_in
,
&
out
);
...
...
paddle/fluid/framework/operator.h
浏览文件 @
1c116462
...
@@ -70,6 +70,15 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);
...
@@ -70,6 +70,15 @@ Tensor* GetMutableLoDTensorOrSelectedRowsValueFromVar(Variable* var);
class
OperatorBase
;
class
OperatorBase
;
class
ExecutionContext
;
class
ExecutionContext
;
class
RuntimeContext
{
public:
RuntimeContext
(
const
VariableNameMap
&
innames
,
const
VariableNameMap
&
outnames
,
const
Scope
&
scope
);
VariableValueMap
inputs
;
VariableValueMap
outputs
;
};
/**
/**
* OperatorBase has the basic elements that Net will call to do computation.
* OperatorBase has the basic elements that Net will call to do computation.
* Only CreateOperator from OpRegistry will new Operator directly. User
* Only CreateOperator from OpRegistry will new Operator directly. User
...
@@ -129,7 +138,8 @@ class OperatorBase {
...
@@ -129,7 +138,8 @@ class OperatorBase {
void
SetIsCalledByExecutor
(
bool
x
)
{
run_by_executor_
=
x
;
}
void
SetIsCalledByExecutor
(
bool
x
)
{
run_by_executor_
=
x
;
}
virtual
void
RuntimeInferShape
(
const
Scope
&
scope
,
virtual
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
{}
const
platform
::
Place
&
place
,
const
RuntimeContext
&
ctx
)
const
{}
protected:
protected:
std
::
string
type_
;
std
::
string
type_
;
...
@@ -156,8 +166,9 @@ class OperatorBase {
...
@@ -156,8 +166,9 @@ class OperatorBase {
class
ExecutionContext
{
class
ExecutionContext
{
public:
public:
ExecutionContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
ExecutionContext
(
const
OperatorBase
&
op
,
const
Scope
&
scope
,
const
platform
::
DeviceContext
&
device_context
)
const
platform
::
DeviceContext
&
device_context
,
:
op_
(
op
),
scope_
(
scope
),
device_context_
(
device_context
)
{}
const
RuntimeContext
&
ctx
)
:
op_
(
op
),
scope_
(
scope
),
device_context_
(
device_context
),
ctx_
(
ctx
)
{}
const
OperatorBase
&
op
()
const
{
return
op_
;
}
const
OperatorBase
&
op
()
const
{
return
op_
;
}
...
@@ -180,15 +191,9 @@ class ExecutionContext {
...
@@ -180,15 +191,9 @@ class ExecutionContext {
return
op_
.
Outputs
(
name
).
size
();
return
op_
.
Outputs
(
name
).
size
();
}
}
const
Variable
*
InputVar
(
const
std
::
string
&
name
)
const
{
const
Variable
*
InputVar
(
const
std
::
string
&
name
)
const
;
auto
ipt
=
op_
.
Input
(
name
);
return
ipt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
ipt
);
}
Variable
*
OutputVar
(
const
std
::
string
&
name
)
const
{
Variable
*
OutputVar
(
const
std
::
string
&
name
)
const
;
auto
opt
=
op_
.
Output
(
name
);
return
opt
==
kEmptyVarName
?
nullptr
:
scope_
.
FindVar
(
opt
);
}
const
std
::
vector
<
const
Variable
*>
MultiInputVar
(
const
std
::
vector
<
const
Variable
*>
MultiInputVar
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
...
@@ -227,6 +232,22 @@ class ExecutionContext {
...
@@ -227,6 +232,22 @@ class ExecutionContext {
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
}
}
template
<
typename
T
>
const
T
*
LegacyInput
(
const
std
::
string
&
name
)
const
{
auto
*
var
=
LegacyInputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
&
var
->
Get
<
T
>
();
}
template
<
typename
T
>
T
*
LegacyOutput
(
const
std
::
string
&
name
)
const
{
auto
var
=
LegacyOutputVar
(
name
);
return
var
==
nullptr
?
nullptr
:
var
->
GetMutable
<
T
>
();
}
const
Variable
*
LegacyInputVar
(
const
std
::
string
&
name
)
const
;
Variable
*
LegacyOutputVar
(
const
std
::
string
&
name
)
const
;
template
<
typename
T
>
template
<
typename
T
>
const
std
::
vector
<
const
T
*>
MultiInput
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
const
T
*>
MultiInput
(
const
std
::
string
&
name
)
const
{
auto
names
=
op_
.
Inputs
(
name
);
auto
names
=
op_
.
Inputs
(
name
);
...
@@ -286,11 +307,16 @@ class ExecutionContext {
...
@@ -286,11 +307,16 @@ class ExecutionContext {
const
OperatorBase
&
op_
;
const
OperatorBase
&
op_
;
const
Scope
&
scope_
;
const
Scope
&
scope_
;
const
platform
::
DeviceContext
&
device_context_
;
const
platform
::
DeviceContext
&
device_context_
;
const
RuntimeContext
&
ctx_
;
};
};
template
<>
template
<>
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
const
Tensor
*
ExecutionContext
::
Input
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
const
Tensor
*
ExecutionContext
::
LegacyInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
template
<>
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
vector
<
const
Tensor
*>
ExecutionContext
::
MultiInput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
;
...
@@ -298,6 +324,9 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
...
@@ -298,6 +324,9 @@ const std::vector<const Tensor*> ExecutionContext::MultiInput<Tensor>(
template
<>
template
<>
Tensor
*
ExecutionContext
::
Output
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
Tensor
*
ExecutionContext
::
Output
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
Tensor
*
ExecutionContext
::
LegacyOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
template
<>
template
<>
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
std
::
vector
<
Tensor
*>
ExecutionContext
::
MultiOutput
<
Tensor
>
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
;
...
@@ -350,8 +379,8 @@ class OperatorWithKernel : public OperatorBase {
...
@@ -350,8 +379,8 @@ class OperatorWithKernel : public OperatorBase {
OpInfoMap
::
Instance
().
Get
(
Type
()).
infer_shape_
(
ctx
);
OpInfoMap
::
Instance
().
Get
(
Type
()).
infer_shape_
(
ctx
);
}
}
void
RuntimeInferShape
(
const
Scope
&
scope
,
void
RuntimeInferShape
(
const
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
platform
::
Place
&
place
)
const
override
;
const
RuntimeContext
&
ctx
)
const
override
;
protected:
protected:
virtual
OpKernelType
GetExpectedKernelType
(
const
ExecutionContext
&
ctx
)
const
;
virtual
OpKernelType
GetExpectedKernelType
(
const
ExecutionContext
&
ctx
)
const
;
...
@@ -371,9 +400,10 @@ class OperatorWithKernel : public OperatorBase {
...
@@ -371,9 +400,10 @@ class OperatorWithKernel : public OperatorBase {
*
*
* * transfered_inplace_vars is a output vector.
* * transfered_inplace_vars is a output vector.
*/
*/
Scope
*
TryTransferData
(
Scope
*
PrepareData
(
const
Scope
&
scope
,
const
Scope
&
scope
,
const
OpKernelType
&
expected_kernel_key
,
const
OpKernelType
&
expected_kernel_key
,
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
)
const
;
std
::
vector
<
std
::
string
>*
transfered_inplace_vars
,
RuntimeContext
*
ctx
)
const
;
void
TransferInplaceVarsBack
(
const
Scope
&
scope
,
void
TransferInplaceVarsBack
(
const
Scope
&
scope
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
const
std
::
vector
<
std
::
string
>&
inplace_vars
,
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
1c116462
...
@@ -190,7 +190,6 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
...
@@ -190,7 +190,6 @@ std::vector<Scope *> &ParallelExecutor::GetLocalScopes() {
ParallelExecutor
::
ParallelExecutor
(
ParallelExecutor
::
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
Scope
*
scope
,
const
std
::
vector
<
Scope
*>
&
local_scopes
,
...
@@ -209,7 +208,7 @@ ParallelExecutor::ParallelExecutor(
...
@@ -209,7 +208,7 @@ ParallelExecutor::ParallelExecutor(
"the number of places must be greater than 1."
);
"the number of places must be greater than 1."
);
}
}
// Step 1. Bcast the
param
s to devs.
// Step 1. Bcast the
bcast_var
s to devs.
// Create local scopes
// Create local scopes
if
(
local_scopes
.
empty
())
{
if
(
local_scopes
.
empty
())
{
member_
->
own_local_scope_
=
true
;
member_
->
own_local_scope_
=
true
;
...
@@ -249,12 +248,12 @@ ParallelExecutor::ParallelExecutor(
...
@@ -249,12 +248,12 @@ ParallelExecutor::ParallelExecutor(
// ncclOp
// ncclOp
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
#if defined(PADDLE_WITH_CUDA) && !defined(_WIN32)
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
main_program
,
member_
->
places_
,
loss_var_name
,
member_
->
local_scopes_
,
member_
->
local_scopes_
,
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
member_
->
use_cuda_
,
member_
->
nccl_ctxs_
.
get
());
#else
#else
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
build_strategy
.
Apply
(
main_program
,
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
member_
->
local_scopes_
,
member_
->
use_cuda_
);
#endif
#endif
auto
max_memory_size
=
GetEagerDeletionThreshold
();
auto
max_memory_size
=
GetEagerDeletionThreshold
();
if
(
max_memory_size
>=
0
)
{
if
(
max_memory_size
>=
0
)
{
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
1c116462
...
@@ -41,7 +41,6 @@ class ParallelExecutor {
...
@@ -41,7 +41,6 @@ class ParallelExecutor {
public:
public:
explicit
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
explicit
ParallelExecutor
(
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
unordered_set
<
std
::
string
>
&
params
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
std
::
unordered_set
<
std
::
string
>
&
bcast_vars
,
const
ProgramDesc
&
main_program
,
const
ProgramDesc
&
main_program
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
const
std
::
string
&
loss_var_name
,
Scope
*
scope
,
...
...
paddle/fluid/framework/shape_inference.cc
浏览文件 @
1c116462
...
@@ -22,20 +22,6 @@ limitations under the License. */
...
@@ -22,20 +22,6 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
DDim
InferShapeContext
::
GetInputDim
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Input(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
return
this
->
GetDim
(
arg_names
[
0
]);
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetInputsDim
(
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
return
GetDims
(
arg_names
);
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetReaderDims
(
std
::
vector
<
DDim
>
InferShapeContext
::
GetReaderDims
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Inputs
(
name
);
...
@@ -46,26 +32,6 @@ std::vector<DDim> InferShapeContext::GetReaderDims(
...
@@ -46,26 +32,6 @@ std::vector<DDim> InferShapeContext::GetReaderDims(
return
this
->
GetRepeatedDims
(
arg_names
[
0
]);
return
this
->
GetRepeatedDims
(
arg_names
[
0
]);
}
}
DDim
InferShapeContext
::
GetInputsElementDim
(
const
std
::
string
&
name
,
int
idx
)
const
{
const
std
::
vector
<
std
::
string
>
&
names
=
Inputs
(
name
);
return
this
->
GetDim
(
names
[
idx
]);
}
void
InferShapeContext
::
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
{
auto
&
arg_names
=
Outputs
(
name
);
PADDLE_ENFORCE_EQ
(
arg_names
.
size
(),
1UL
,
"Output(%s) should hold one element, but now it holds %d"
,
name
,
arg_names
.
size
());
SetDim
(
arg_names
[
0
],
dim
);
}
void
InferShapeContext
::
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
{
auto
&
names
=
Outputs
(
name
);
SetDims
(
names
,
dims
);
}
void
InferShapeContext
::
SetReaderDims
(
const
std
::
string
&
name
,
void
InferShapeContext
::
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
{
const
std
::
vector
<
DDim
>
&
dims
)
{
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Outputs
(
name
);
const
std
::
vector
<
std
::
string
>
&
arg_names
=
Outputs
(
name
);
...
@@ -76,69 +42,5 @@ void InferShapeContext::SetReaderDims(const std::string &name,
...
@@ -76,69 +42,5 @@ void InferShapeContext::SetReaderDims(const std::string &name,
return
this
->
SetRepeatedDims
(
arg_names
[
0
],
dims
);
return
this
->
SetRepeatedDims
(
arg_names
[
0
],
dims
);
}
}
std
::
vector
<
InferShapeVarPtr
>
InferShapeContext
::
GetInputVarPtrs
(
const
std
::
string
&
name
)
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Inputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetVarPtr
(
name
);
});
return
res
;
}
std
::
vector
<
InferShapeVarPtr
>
InferShapeContext
::
GetOutputVarPtrs
(
const
std
::
string
&
name
)
{
const
std
::
vector
<
std
::
string
>
arg_names
=
Outputs
(
name
);
std
::
vector
<
InferShapeVarPtr
>
res
;
res
.
reserve
(
arg_names
.
size
());
std
::
transform
(
arg_names
.
begin
(),
arg_names
.
end
(),
std
::
back_inserter
(
res
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetVarPtr
(
name
);
});
return
res
;
}
std
::
vector
<
DDim
>
InferShapeContext
::
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
DDim
>
ret
;
ret
.
reserve
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
std
::
back_inserter
(
ret
),
[
this
](
const
std
::
string
&
name
)
{
return
this
->
GetDim
(
name
);
});
return
ret
;
}
void
InferShapeContext
::
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
)
{
size_t
length
=
names
.
size
();
PADDLE_ENFORCE_EQ
(
length
,
dims
.
size
());
for
(
size_t
i
=
0
;
i
<
length
;
++
i
)
{
if
(
names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
}
SetDim
(
names
[
i
],
dims
[
i
]);
}
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetInputsVarType
(
const
std
::
string
&
name
)
const
{
return
GetVarTypes
(
Inputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetOutputsVarType
(
const
std
::
string
&
name
)
const
{
return
GetVarTypes
(
Outputs
(
name
));
}
std
::
vector
<
proto
::
VarType
::
Type
>
InferShapeContext
::
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
{
std
::
vector
<
proto
::
VarType
::
Type
>
retv
;
retv
.
resize
(
names
.
size
());
std
::
transform
(
names
.
begin
(),
names
.
end
(),
retv
.
begin
(),
std
::
bind
(
std
::
mem_fn
(
&
InferShapeContext
::
GetVarType
),
this
,
std
::
placeholders
::
_1
));
return
retv
;
}
}
// namespace framework
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/shape_inference.h
浏览文件 @
1c116462
...
@@ -33,22 +33,23 @@ class InferShapeContext {
...
@@ -33,22 +33,23 @@ class InferShapeContext {
virtual
bool
HasInput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutput
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutput
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
virtual
std
::
vector
<
proto
::
VarType
::
Type
>
GetInputsVarType
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
virtual
std
::
vector
<
proto
::
VarType
::
Type
>
GetOutputsVarType
(
const
std
::
string
&
name
)
const
;
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasInputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutputs
(
const
std
::
string
&
name
)
const
=
0
;
virtual
bool
HasOutputs
(
const
std
::
string
&
name
)
const
=
0
;
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
;
virtual
DDim
GetInputDim
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
;
virtual
std
::
vector
<
DDim
>
GetInputsDim
(
const
std
::
string
&
name
)
const
=
0
;
std
::
vector
<
DDim
>
GetReaderDims
(
const
std
::
string
&
name
)
const
;
virtual
std
::
vector
<
DDim
>
GetReaderDims
(
const
std
::
string
&
name
)
const
;
DDim
GetInputsElementDim
(
const
std
::
string
&
name
,
int
idx
)
const
;
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
);
virtual
void
SetOutputDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
=
0
;
void
SetOutputsDim
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
void
SetOutputsDim
(
const
std
::
string
&
name
,
void
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
virtual
void
SetReaderDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
);
virtual
AttrReader
Attrs
()
const
=
0
;
virtual
AttrReader
Attrs
()
const
=
0
;
virtual
const
std
::
vector
<
std
::
string
>
&
Inputs
(
virtual
const
std
::
vector
<
std
::
string
>
&
Inputs
(
...
@@ -67,27 +68,15 @@ class InferShapeContext {
...
@@ -67,27 +68,15 @@ class InferShapeContext {
virtual
bool
IsRuntime
()
const
=
0
;
virtual
bool
IsRuntime
()
const
=
0
;
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
const
std
::
string
&
name
);
virtual
std
::
vector
<
InferShapeVarPtr
>
GetInputVarPtrs
(
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
);
const
std
::
string
&
name
)
=
0
;
virtual
InferShapeVarPtr
GetVarPtr
(
const
std
::
string
&
name
)
=
0
;
virtual
std
::
vector
<
InferShapeVarPtr
>
GetOutputVarPtrs
(
const
std
::
string
&
name
)
=
0
;
// Note: In while op, we need this to be public
void
SetDims
(
const
std
::
vector
<
std
::
string
>
&
names
,
const
std
::
vector
<
DDim
>
&
dims
);
protected:
protected:
virtual
DDim
GetDim
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetDim
(
const
std
::
string
&
name
,
const
DDim
&
dim
)
=
0
;
virtual
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
=
0
;
virtual
std
::
vector
<
DDim
>
GetRepeatedDims
(
const
std
::
string
&
name
)
const
=
0
;
virtual
void
SetRepeatedDims
(
const
std
::
string
&
name
,
virtual
void
SetRepeatedDims
(
const
std
::
string
&
name
,
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
const
std
::
vector
<
DDim
>
&
dims
)
=
0
;
std
::
vector
<
DDim
>
GetDims
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
std
::
vector
<
proto
::
VarType
::
Type
>
GetVarTypes
(
const
std
::
vector
<
std
::
string
>
&
names
)
const
;
virtual
proto
::
VarType
::
Type
GetVarType
(
const
std
::
string
&
name
)
const
=
0
;
};
};
}
// namespace framework
}
// namespace framework
...
...
paddle/fluid/framework/type_defs.h
浏览文件 @
1c116462
...
@@ -28,8 +28,11 @@ class OperatorBase;
...
@@ -28,8 +28,11 @@ class OperatorBase;
class
OpDesc
;
class
OpDesc
;
class
InferShapeContext
;
class
InferShapeContext
;
class
BlockDesc
;
class
BlockDesc
;
class
Variable
;
using
VariableNameMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
;
using
VariableNameMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
std
::
string
>>
;
// TODO(panyx0718): Replace vector with something like gtl::Vector.
using
VariableValueMap
=
std
::
map
<
std
::
string
,
std
::
vector
<
Variable
*>>
;
// The order should be as same as framework.proto
// The order should be as same as framework.proto
using
Attribute
=
using
Attribute
=
...
...
paddle/fluid/imperative/layer.cc
浏览文件 @
1c116462
...
@@ -217,9 +217,6 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
...
@@ -217,9 +217,6 @@ std::vector<Variable*> OpBase::ApplyGrad(framework::Scope* scope) {
VarBase
*
origin_var
=
(
*
input_vars_
)[
i
];
VarBase
*
origin_var
=
(
*
input_vars_
)[
i
];
for
(
const
std
::
string
&
outvar
:
grad_op_desc_
->
OutputArgumentNames
())
{
for
(
const
std
::
string
&
outvar
:
grad_op_desc_
->
OutputArgumentNames
())
{
Variable
*
var
=
scope
->
FindVar
(
outvar
);
Variable
*
var
=
scope
->
FindVar
(
outvar
);
if
(
var
->
IsInitialized
())
{
VLOG
(
3
)
<<
"get grad op output var "
<<
outvar
;
}
std
::
string
orig_var_name
=
grad_to_var_
->
at
(
outvar
);
std
::
string
orig_var_name
=
grad_to_var_
->
at
(
outvar
);
if
(
origin_var
->
var_desc_
->
Name
()
!=
orig_var_name
||
if
(
origin_var
->
var_desc_
->
Name
()
!=
orig_var_name
||
origin_var
->
stop_gradient_
)
{
origin_var
->
stop_gradient_
)
{
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
1c116462
...
@@ -64,9 +64,7 @@ endif()
...
@@ -64,9 +64,7 @@ endif()
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
OP_HEADER_DEPS
}
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
selected_rows_functor selected_rows lod_tensor maxouting unpooling pooling lod_rank_table context_project sequence_pooling executor
)
if
(
NOT WIN32
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
dynload_warpctc
)
endif
()
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence_padding sequence_scale cos_sim_functor memory jit_kernel concat_and_split cross_entropy softmax vol2col im2col sampler
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
set
(
COMMON_OP_DEPS
${
COMMON_OP_DEPS
}
sequence2batch lstm_compute matrix_bit_code gru_compute activation_functions
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
...
...
paddle/fluid/operators/beam_search_decode_op.cc
浏览文件 @
1c116462
...
@@ -122,7 +122,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
...
@@ -122,7 +122,8 @@ class BeamSearchDecodeOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
auto
&
dev_ctx
=
*
pool
.
Get
(
dev_place
);
framework
::
ExecutionContext
ctx
(
*
this
,
scope
,
dev_ctx
);
framework
::
RuntimeContext
run_ctx
(
Inputs
(),
Outputs
(),
scope
);
framework
::
ExecutionContext
ctx
(
*
this
,
scope
,
dev_ctx
,
run_ctx
);
const
LoDTensorArray
*
ids
=
ctx
.
Input
<
LoDTensorArray
>
(
"Ids"
);
const
LoDTensorArray
*
ids
=
ctx
.
Input
<
LoDTensorArray
>
(
"Ids"
);
const
LoDTensorArray
*
scores
=
ctx
.
Input
<
LoDTensorArray
>
(
"Scores"
);
const
LoDTensorArray
*
scores
=
ctx
.
Input
<
LoDTensorArray
>
(
"Scores"
);
...
...
paddle/fluid/operators/controlflow/parallel_do_op.cc
已删除
100644 → 0
浏览文件 @
29697c2e
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <vector>
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
namespace
paddle
{
namespace
operators
{
static
constexpr
char
kInputs
[]
=
"inputs"
;
static
constexpr
char
kParameters
[]
=
"parameters"
;
static
constexpr
char
kPlaces
[]
=
"places"
;
static
constexpr
char
kOutputs
[]
=
"outputs"
;
static
constexpr
char
kParallelScopes
[]
=
"parallel_scopes"
;
static
constexpr
char
kParallelBlock
[]
=
"sub_block"
;
static
constexpr
char
kUseNCCL
[]
=
"use_nccl"
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
SelectedRows
=
framework
::
SelectedRows
;
static
void
SplitTensorAndMoveTensorToScopes
(
const
framework
::
Scope
&
scope
,
std
::
vector
<
framework
::
Scope
*>
*
sub_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
std
::
vector
<
std
::
string
>
&
names
)
{
size_t
num_sub_scopes
=
0
;
for
(
auto
&
argu
:
names
)
{
const
auto
&
tensor
=
detail
::
Ref
(
scope
.
FindVar
(
argu
),
"Cannot find variable %s in the parent scope"
,
argu
)
.
Get
<
LoDTensor
>
();
auto
lod_tensors
=
tensor
.
SplitLoDTensor
(
places
);
for
(
auto
&
lod
:
lod_tensors
)
{
VLOG
(
3
)
<<
lod
.
dims
();
}
if
(
num_sub_scopes
==
0
)
{
num_sub_scopes
=
lod_tensors
.
size
();
}
else
{
PADDLE_ENFORCE_EQ
(
num_sub_scopes
,
lod_tensors
.
size
());
}
PADDLE_ENFORCE_NE
(
num_sub_scopes
,
0
);
if
(
sub_scopes
->
size
()
==
0
)
{
sub_scopes
->
reserve
(
num_sub_scopes
);
for
(
size_t
i
=
0
;
i
<
num_sub_scopes
;
++
i
)
{
sub_scopes
->
emplace_back
(
&
scope
.
NewScope
());
}
}
for
(
size_t
i
=
0
;
i
<
lod_tensors
.
size
();
++
i
)
{
*
detail
::
Ref
(
sub_scopes
->
at
(
i
)
->
Var
(
argu
),
"Cannot find variable in the sub-scope"
,
argu
)
.
GetMutable
<
LoDTensor
>
()
=
lod_tensors
[
i
];
}
}
}
inline
void
CopyOrShare
(
const
framework
::
Variable
&
src
,
const
platform
::
Place
&
dst_place
,
framework
::
Variable
*
dst
)
{
if
(
src
.
IsType
<
LoDTensor
>
())
{
if
(
src
.
Get
<
LoDTensor
>
().
place
()
==
dst_place
)
{
dst
->
GetMutable
<
LoDTensor
>
()
->
ShareDataWith
(
src
.
Get
<
LoDTensor
>
());
dst
->
GetMutable
<
LoDTensor
>
()
->
set_lod
(
src
.
Get
<
LoDTensor
>
().
lod
());
}
else
{
TensorCopy
(
src
.
Get
<
LoDTensor
>
(),
dst_place
,
dst
->
GetMutable
<
LoDTensor
>
());
}
}
else
if
(
src
.
IsType
<
SelectedRows
>
())
{
auto
&
src_sr
=
src
.
Get
<
SelectedRows
>
();
auto
*
dst_sr
=
dst
->
GetMutable
<
SelectedRows
>
();
dst_sr
->
set_height
(
src_sr
.
height
());
if
(
src_sr
.
value
().
place
()
==
dst_place
)
{
dst_sr
->
mutable_value
()
->
ShareDataWith
(
src_sr
.
value
());
dst_sr
->
set_rows
(
src_sr
.
rows
());
}
else
{
TensorCopy
(
src_sr
.
value
(),
dst_place
,
dst_sr
->
mutable_value
());
}
}
else
{
PADDLE_THROW
(
"Expect LoDTensor/SelectedRows, get %s"
,
src
.
Type
().
name
());
}
}
void
WaitOnPlace
(
const
platform
::
Place
place
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
dev_ctx
.
Wait
();
}
void
WaitOnPlaces
(
const
std
::
vector
<
platform
::
Place
>
places
)
{
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
for
(
auto
&
place
:
places
)
{
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
dev_ctx
.
Wait
();
}
}
class
ParallelDoOp
:
public
framework
::
OperatorBase
{
public:
ParallelDoOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
// get device context from pool
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
dev_ctx
=
*
pool
.
Get
(
place
);
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
kParallelBlock
);
auto
*
program
=
block
->
Program
();
auto
&
places
=
scope
.
FindVar
(
Input
(
kPlaces
))
->
Get
<
platform
::
PlaceList
>
();
auto
&
sub_scopes
=
*
scope
.
FindVar
(
Output
(
kParallelScopes
))
->
GetMutable
<
std
::
vector
<
framework
::
Scope
*>>
();
// split input
SplitTensorAndMoveTensorToScopes
(
scope
,
&
sub_scopes
,
places
,
Inputs
(
kInputs
));
// copy parameter
for
(
auto
&
param
:
Inputs
(
kParameters
))
{
PADDLE_ENFORCE
(
scope
.
FindVar
(
param
)
->
IsType
<
LoDTensor
>
(),
"Only support parameter type as LoDTensor"
);
auto
&
src
=
scope
.
FindVar
(
param
)
->
Get
<
LoDTensor
>
();
auto
*
sub_scope0
=
sub_scopes
[
0
];
auto
*
dst0
=
sub_scope0
->
Var
(
param
)
->
GetMutable
<
LoDTensor
>
();
dst0
->
ShareDataWith
(
src
);
for
(
size_t
i
=
1
;
i
<
sub_scopes
.
size
();
++
i
)
{
auto
&
place
=
places
[
i
];
auto
*
sub_scope
=
sub_scopes
[
i
];
auto
*
dst
=
sub_scope
->
Var
(
param
)
->
GetMutable
<
LoDTensor
>
();
framework
::
TensorCopy
(
src
,
place
,
dst
);
}
}
WaitOnPlaces
(
places
);
std
::
vector
<
std
::
future
<
void
>>
workers
;
workers
.
reserve
(
places
.
size
());
for
(
size_t
place_idx
=
0
;
place_idx
<
sub_scopes
.
size
();
++
place_idx
)
{
auto
&
place
=
places
[
place_idx
];
auto
*
cur_scope
=
sub_scopes
[
place_idx
];
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
framework
::
Executor
executor
(
place
);
executor
.
Run
(
*
program
,
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
}));
}
for
(
auto
&
worker
:
workers
)
{
worker
.
wait
();
}
WaitOnPlaces
(
places
);
// merge output
for
(
auto
&
o_name
:
Outputs
(
kOutputs
))
{
std
::
vector
<
const
framework
::
LoDTensor
*>
lod_tensors
;
lod_tensors
.
reserve
(
sub_scopes
.
size
());
for
(
auto
*
sub_scope
:
sub_scopes
)
{
lod_tensors
.
emplace_back
(
&
sub_scope
->
FindVar
(
o_name
)
->
Get
<
LoDTensor
>
());
}
auto
*
lod_tensor_to_be_merged
=
scope
.
FindVar
(
o_name
)
->
GetMutable
<
LoDTensor
>
();
lod_tensor_to_be_merged
->
MergeLoDTensor
(
lod_tensors
,
dev_ctx
.
GetPlace
());
}
WaitOnPlaces
(
places
);
}
};
class
ParallelDoOpProtoMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
kInputs
,
""
).
AsDuplicable
();
AddInput
(
kParameters
,
""
).
AsDuplicable
();
AddInput
(
kPlaces
,
""
);
AddOutput
(
kOutputs
,
""
).
AsDuplicable
();
AddOutput
(
kParallelScopes
,
""
);
AddAttr
<
framework
::
BlockDesc
*>
(
kParallelBlock
,
""
);
AddAttr
<
bool
>
(
kUseNCCL
,
"true if we use nccl on backward"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
ParallelDo Operator.
)DOC"
);
}
};
class
ParallelDoGradOp
:
public
framework
::
OperatorBase
{
public:
ParallelDoGradOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
framework
::
OperatorBase
(
type
,
inputs
,
outputs
,
attrs
)
{}
private:
void
RunImpl
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
)
const
override
{
auto
*
block
=
Attr
<
framework
::
BlockDesc
*>
(
kParallelBlock
);
auto
*
program
=
block
->
Program
();
auto
&
sub_scopes
=
scope
.
FindVar
(
Input
(
kParallelScopes
))
->
Get
<
std
::
vector
<
framework
::
Scope
*>>
();
auto
&
places
=
scope
.
FindVar
(
Input
(
kPlaces
))
->
Get
<
platform
::
PlaceList
>
();
// feed output@grad
SplitTensorAndMoveTensorToScopes
(
scope
,
const_cast
<
std
::
vector
<
framework
::
Scope
*>
*>
(
&
sub_scopes
),
places
,
Inputs
(
framework
::
GradVarName
(
kOutputs
)));
WaitOnPlaces
(
places
);
// exe run
std
::
vector
<
std
::
future
<
void
>>
workers
;
for
(
size_t
i
=
0
;
i
<
sub_scopes
.
size
();
++
i
)
{
auto
&
place
=
places
[
i
];
auto
*
cur_scope
=
sub_scopes
[
i
];
// execute
workers
.
emplace_back
(
framework
::
Async
([
program
,
cur_scope
,
place
,
block
]
{
framework
::
Executor
executor
(
place
);
executor
.
Run
(
*
program
,
cur_scope
,
block
->
ID
(),
false
/*create_local_scope*/
);
}));
}
for
(
auto
&
worker
:
workers
)
{
worker
.
wait
();
}
WaitOnPlaces
(
places
);
// NCCL allreduce op will be added by backward,
// so no need to explicitly accumulate grad
if
(
!
(
Attr
<
bool
>
(
kUseNCCL
)))
{
AccumulateGrad
(
scope
,
place
,
sub_scopes
,
places
);
}
else
{
for
(
auto
&
place
:
places
)
{
PADDLE_ENFORCE
(
platform
::
is_gpu_place
(
place
),
"NCCL only supports cuda place"
);
}
}
for
(
auto
&
s
:
Outputs
(
framework
::
GradVarName
(
kParameters
)))
{
if
(
s
==
framework
::
kEmptyVarName
)
{
continue
;
}
VLOG
(
3
)
<<
"Moving "
<<
s
;
CopyOrShare
(
*
sub_scopes
[
0
]
->
FindVar
(
s
),
place
,
scope
.
FindVar
(
s
));
}
WaitOnPlaces
(
places
);
}
void
AccumulateGrad
(
const
framework
::
Scope
&
scope
,
const
platform
::
Place
&
place
,
const
std
::
vector
<
framework
::
Scope
*>
&
sub_scopes
,
const
platform
::
PlaceList
&
places
)
const
{
for
(
auto
&
s
:
Outputs
(
framework
::
GradVarName
(
kParameters
)))
{
if
(
s
==
framework
::
kEmptyVarName
)
{
continue
;
}
VLOG
(
3
)
<<
"Accumulating "
<<
s
;
if
(
s
==
framework
::
kEmptyVarName
)
continue
;
std
::
string
tmp_name
;
auto
*
tmp
=
sub_scopes
[
0
]
->
Var
(
&
tmp_name
);
for
(
size_t
i
=
1
;
i
<
sub_scopes
.
size
();
++
i
)
{
CopyOrShare
(
*
sub_scopes
[
i
]
->
FindVar
(
s
),
places
[
0
],
tmp
);
WaitOnPlaces
(
places
);
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
VLOG
(
10
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
}
CopyOrShare
(
*
sub_scopes
[
0
]
->
FindVar
(
s
),
place
,
scope
.
FindVar
(
s
));
}
WaitOnPlaces
(
places
);
}
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
sout
,
const
std
::
vector
<
std
::
string
>
&
strs
)
{
std
::
copy
(
strs
.
begin
(),
strs
.
end
(),
std
::
ostream_iterator
<
std
::
string
>
(
sout
,
","
));
return
sout
;
}
class
ParallelDoGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
virtual
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
{
auto
*
grad
=
new
framework
::
OpDesc
();
grad
->
SetType
(
"parallel_do_grad"
);
for
(
auto
&
input_param
:
this
->
InputNames
())
{
VLOG
(
3
)
<<
input_param
;
grad
->
SetInput
(
input_param
,
this
->
Input
(
input_param
));
if
(
input_param
!=
kPlaces
)
{
grad
->
SetOutput
(
framework
::
GradVarName
(
input_param
),
this
->
InputGrad
(
input_param
,
false
));
}
}
auto
*
g_block
=
this
->
grad_block_
[
0
];
// All variable name that needed by gradient operators
std
::
unordered_set
<
std
::
string
>
all_inputs_in_grad_blocks
;
for
(
size_t
i
=
0
;
i
<
g_block
->
OpSize
();
++
i
)
{
auto
*
op
=
g_block
->
Op
(
i
);
for
(
auto
&
var_name
:
op
->
InputArgumentNames
())
{
all_inputs_in_grad_blocks
.
insert
(
var_name
);
}
}
for
(
auto
&
output_param
:
this
->
OutputNames
())
{
if
(
output_param
==
kParallelScopes
)
{
grad
->
SetInput
(
output_param
,
this
->
Output
(
output_param
));
grad
->
SetInput
(
framework
::
GradVarName
(
output_param
),
this
->
Output
(
output_param
));
}
else
{
grad
->
SetInput
(
output_param
,
this
->
Output
(
output_param
));
std
::
vector
<
std
::
string
>
og_names
;
for
(
auto
&
og_name
:
this
->
OutputGrad
(
output_param
))
{
if
(
all_inputs_in_grad_blocks
.
count
(
og_name
)
!=
0
)
{
// there are some gradient operators who need the OG. So make this
// OG as an input of parallel.do
og_names
.
push_back
(
og_name
);
}
// else, there is no operator who need the OG. Do not use this OG as
// an input
}
grad
->
SetInput
(
framework
::
GradVarName
(
output_param
),
og_names
);
}
}
grad
->
SetInput
(
"Communicator"
,
{
"nccl_com__do_not_change_"
});
grad
->
SetAttrMap
(
this
->
Attrs
());
grad
->
SetBlockAttr
(
kParallelBlock
,
grad_block_
[
0
]);
return
std
::
unique_ptr
<
framework
::
OpDesc
>
(
grad
);
}
};
class
ParallelDoGradOpShapeInference
:
public
framework
::
InferShapeBase
{
public:
void
operator
()(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
kParameters
));
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
kInputs
));
PADDLE_ENFORCE
(
ctx
->
HasInputs
(
kOutputs
));
ctx
->
SetOutputsDim
(
framework
::
GradVarName
(
kParameters
),
ctx
->
GetInputsDim
(
kParameters
));
auto
i_dims
=
ctx
->
GetInputsDim
(
kInputs
);
auto
ig_names
=
ctx
->
Outputs
(
framework
::
GradVarName
(
kInputs
));
for
(
size_t
i
=
0
;
i
<
ig_names
.
size
();
++
i
)
{
auto
&
ig_name
=
ig_names
[
i
];
if
(
ig_name
==
framework
::
kEmptyVarName
)
{
continue
;
}
ctx
->
SetDims
({
ig_name
},
{
i_dims
[
i
]});
}
auto
p_dims
=
ctx
->
GetInputsDim
(
kParameters
);
auto
pg_names
=
ctx
->
Outputs
(
framework
::
GradVarName
(
kParameters
));
for
(
size_t
i
=
0
;
i
<
pg_names
.
size
();
++
i
)
{
auto
&
pg_name
=
pg_names
[
i
];
if
(
pg_name
==
framework
::
kEmptyVarName
)
{
continue
;
}
ctx
->
SetDims
({
pg_name
},
{
p_dims
[
i
]});
}
}
};
class
ParallelDoGradOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{
framework
::
BlockDesc
*
sub_block
=
boost
::
get
<
framework
::
BlockDesc
*>
(
op_desc
.
GetAttr
(
kParallelBlock
));
for
(
auto
&
out_vars
:
op_desc
.
Outputs
())
{
for
(
auto
&
out_var
:
out_vars
.
second
)
{
auto
&
var
=
block
->
FindRecursiveOrCreateVar
(
out_var
);
auto
sub_var
=
sub_block
->
FindRecursiveOrCreateVar
(
out_var
);
if
(
sub_var
.
GetType
()
!=
var
.
GetType
())
{
var
.
SetType
(
sub_var
.
GetType
());
}
}
}
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OPERATOR
(
parallel_do
,
paddle
::
operators
::
ParallelDoOp
,
paddle
::
operators
::
ParallelDoOpProtoMaker
,
paddle
::
operators
::
ParallelDoGradOpDescMaker
);
REGISTER_OPERATOR
(
parallel_do_grad
,
paddle
::
operators
::
ParallelDoGradOp
,
paddle
::
operators
::
ParallelDoGradOpShapeInference
,
paddle
::
operators
::
ParallelDoGradOpVarTypeInference
);
paddle/fluid/operators/controlflow/while_op.cc
浏览文件 @
1c116462
...
@@ -399,26 +399,41 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
...
@@ -399,26 +399,41 @@ class WhileGradOpShapeInference : public framework::InferShapeBase {
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
kOutputs
);
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
ctx
->
HasInputs
(
framework
::
GradVarName
(
kOutputs
));
auto
p_names
=
ctx
->
Inputs
(
kX
);
auto
pg_ig_names
=
ctx
->
Outputs
(
kXGRAD
);
auto
pg_ig_names
=
ctx
->
Outputs
(
kXGRAD
);
auto
var_types
=
ctx
->
GetInputsVarType
(
kX
);
std
::
vector
<
framework
::
InferShapeVarPtr
>
in_var_ptrs
=
std
::
vector
<
std
::
string
>
names_to_set
;
ctx
->
GetInputVarPtrs
(
kX
);
std
::
vector
<
framework
::
DDim
>
dims_to_set
;
std
::
vector
<
framework
::
InferShapeVarPtr
>
out_var_ptrs
=
for
(
size_t
i
=
0
;
i
<
p_names
.
size
();
++
i
)
{
ctx
->
GetOutputVarPtrs
(
kXGRAD
);
PADDLE_ENFORCE
(
in_var_ptrs
.
size
()
==
out_var_ptrs
.
size
());
for
(
size_t
i
=
0
;
i
<
in_var_ptrs
.
size
();
++
i
)
{
if
(
pg_ig_names
[
i
]
==
framework
::
kEmptyVarName
)
{
if
(
pg_ig_names
[
i
]
==
framework
::
kEmptyVarName
)
{
continue
;
continue
;
}
}
auto
dims
=
ctx
->
GetInputsElementDim
(
kX
,
i
);
if
(
ctx
->
IsRuntime
())
{
if
(
var_types
[
i
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
framework
::
Variable
*
in_var
=
names_to_set
.
push_back
(
pg_ig_names
[
i
]);
boost
::
get
<
framework
::
Variable
*>
(
in_var_ptrs
[
i
]);
dims_to_set
.
push_back
(
dims
);
framework
::
Variable
*
out_var
=
}
else
if
(
var_types
[
i
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
boost
::
get
<
framework
::
Variable
*>
(
out_var_ptrs
[
i
]);
// not sure how to set the dim of LOD_TENSOR_ARRAY
names_to_set
.
push_back
(
pg_ig_names
[
i
]);
auto
type
=
framework
::
ToVarType
(
in_var
->
Type
());
dims_to_set
.
push_back
(
dims
);
if
(
type
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
out_var
->
GetMutable
<
LoDTensor
>
()
->
Resize
(
in_var
->
Get
<
framework
::
LoDTensor
>
().
dims
());
}
else
if
(
type
==
framework
::
proto
::
VarType
::
SELECTED_ROWS
)
{
out_var
->
GetMutable
<
framework
::
SelectedRows
>
()
->
set_height
(
in_var
->
Get
<
framework
::
SelectedRows
>
().
GetCompleteDims
()[
0
]);
}
else
if
(
type
==
framework
::
proto
::
VarType
::
LOD_TENSOR_ARRAY
)
{
PADDLE_THROW
(
"WhileGradOp doesn't support type %d"
,
static_cast
<
int
>
(
type
));
}
}
else
{
framework
::
VarDesc
*
in_var
=
boost
::
get
<
framework
::
VarDesc
*>
(
in_var_ptrs
[
i
]);
boost
::
get
<
framework
::
VarDesc
*>
(
out_var_ptrs
[
i
])
->
SetShape
(
in_var
->
GetShape
());
}
}
}
}
ctx
->
SetDims
(
names_to_set
,
dims_to_set
);
}
}
};
};
...
...
paddle/fluid/operators/conv_mkldnn_op.cc
浏览文件 @
1c116462
...
@@ -155,11 +155,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -155,11 +155,14 @@ class ConvMKLDNNOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
platform
::
data_format_to_memory_format
(
data_format
);
weights_format
=
mkldnn
::
memory
::
format
::
any
;
// Check the format for user's special output
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
}
weights_format
=
GetWeightsFormat
(
chosen_memory_format
,
g
,
is_conv3d
);
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
@@ -435,11 +438,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
...
@@ -435,11 +438,14 @@ class ConvMKLDNNGradOpKernel : public paddle::framework::OpKernel<T> {
auto
chosen_memory_format
=
auto
chosen_memory_format
=
platform
::
data_format_to_memory_format
(
data_format
);
platform
::
data_format_to_memory_format
(
data_format
);
weights_format
=
mkldnn
::
memory
::
format
::
any
;
// Check the format for user's special output
if
(
chosen_memory_format
!=
mkldnn
::
memory
::
format
::
any
)
{
if
(
is_conv3d
)
{
if
(
is_conv3d
)
{
chosen_memory_format
=
chosen_memory_format
=
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
platform
::
MKLDNNFormatForSize
(
src_tz
.
size
(),
chosen_memory_format
);
}
}
weights_format
=
GetWeightsFormat
(
chosen_memory_format
,
g
,
is_conv3d
);
}
auto
src_md
=
platform
::
MKLDNNMemDesc
(
auto
src_md
=
platform
::
MKLDNNMemDesc
(
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
src_tz
,
platform
::
MKLDNNGetDataType
<
T
>
(),
chosen_memory_format
);
...
...
paddle/fluid/operators/distributed/brpc_sendrecvop_utils.cc
浏览文件 @
1c116462
...
@@ -16,6 +16,7 @@ limitations under the License. */
...
@@ -16,6 +16,7 @@ limitations under the License. */
#include <nccl.h>
#include <nccl.h>
#endif
#endif
#include <sys/time.h>
#include <sys/time.h>
#include <limits>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
...
@@ -31,7 +32,12 @@ namespace distributed {
...
@@ -31,7 +32,12 @@ namespace distributed {
class
IOBufWriter
{
class
IOBufWriter
{
public:
public:
static
void
Append
(
butil
::
IOBuf
*
iobuf
,
int
k
,
const
char
*
v
,
int64_t
vlen
)
{
static
void
Append
(
const
std
::
string
&
varname
,
butil
::
IOBuf
*
iobuf
,
int
k
,
const
char
*
v
,
int64_t
vlen
)
{
if
(
vlen
>=
std
::
numeric_limits
<
int
>::
max
()
||
vlen
<
0
)
{
LOG
(
FATAL
)
<<
"AppendZeroCopy varname:"
<<
varname
<<
", vlen:"
<<
vlen
;
}
iobuf
->
append
(
reinterpret_cast
<
char
*>
(
&
k
),
4
);
iobuf
->
append
(
reinterpret_cast
<
char
*>
(
&
k
),
4
);
iobuf
->
append
(
reinterpret_cast
<
char
*>
(
&
vlen
),
8
);
iobuf
->
append
(
reinterpret_cast
<
char
*>
(
&
vlen
),
8
);
iobuf
->
append
(
v
,
vlen
);
iobuf
->
append
(
v
,
vlen
);
...
@@ -87,6 +93,10 @@ class IOBufWriter {
...
@@ -87,6 +93,10 @@ class IOBufWriter {
int
k
,
const
char
*
v
,
int64_t
vlen
,
int
k
,
const
char
*
v
,
int64_t
vlen
,
bool
in_cuda_pinned
,
void
(
*
destroy
)(
void
*
),
bool
in_cuda_pinned
,
void
(
*
destroy
)(
void
*
),
void
*
user_data
)
{
void
*
user_data
)
{
if
(
vlen
>=
std
::
numeric_limits
<
int
>::
max
()
||
vlen
<
0
)
{
LOG
(
FATAL
)
<<
"AppendZeroCopy varname:"
<<
varname
<<
", vlen:"
<<
vlen
;
}
#ifdef PADDLE_WITH_BRPC_RDMA
#ifdef PADDLE_WITH_BRPC_RDMA
IOBufWriter
::
AppendRdmaZeroCopy
(
varname
,
iobuf
,
k
,
v
,
vlen
,
in_cuda_pinned
,
IOBufWriter
::
AppendRdmaZeroCopy
(
varname
,
iobuf
,
k
,
v
,
vlen
,
in_cuda_pinned
,
destroy
,
user_data
);
destroy
,
user_data
);
...
@@ -134,7 +144,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
...
@@ -134,7 +144,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
request
->
set_type
(
::
sendrecv
::
NCCL_ID
);
request
->
set_type
(
::
sendrecv
::
NCCL_ID
);
const
ncclUniqueId
&
uid
=
var
->
Get
<
ncclUniqueId
>
();
const
ncclUniqueId
&
uid
=
var
->
Get
<
ncclUniqueId
>
();
// TODO(gongwb): use append_zero to avoid data copy.
// TODO(gongwb): use append_zero to avoid data copy.
IOBufWriter
::
Append
(
iobuf
,
IOBufWriter
::
Append
(
name
,
iobuf
,
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
uid
.
internal
,
NCCL_UNIQUE_ID_BYTES
);
uid
.
internal
,
NCCL_UNIQUE_ID_BYTES
);
return
;
return
;
...
@@ -149,7 +159,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
...
@@ -149,7 +159,7 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
// FIXME(gongwb): it seems that can use zero copy.
// FIXME(gongwb): it seems that can use zero copy.
if
(
var_is_not_stable
)
{
if
(
var_is_not_stable
)
{
IOBufWriter
::
Append
(
IOBufWriter
::
Append
(
iobuf
,
::
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
name
,
iobuf
,
::
sendrecv
::
VariableMessage
::
kSerializedFieldNumber
,
static_cast
<
const
char
*>
(
payload
->
ptr
()),
payload
->
memory_size
());
static_cast
<
const
char
*>
(
payload
->
ptr
()),
payload
->
memory_size
());
}
else
{
}
else
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
...
@@ -171,10 +181,11 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
...
@@ -171,10 +181,11 @@ void SerializeToIOBuf(const std::string& name, framework::Variable* var,
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
size_t
rows_memory_size
=
PADDLE_ENFORCE
(
VectorElemName
(
slr
->
rows
())
==
typeid
(
int64_t
).
name
());
slr
->
rows
().
size
()
*
framework
::
SizeOfType
(
typeid
(
int64_t
)
);
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
sizeof
(
int64_t
);
IOBufWriter
::
Append
(
iobuf
,
::
sendrecv
::
VariableMessage
::
kRowsFieldNumber
,
IOBufWriter
::
Append
(
name
,
iobuf
,
::
sendrecv
::
VariableMessage
::
kRowsFieldNumber
,
reinterpret_cast
<
const
char
*>
(
slr
->
rows
().
data
()),
reinterpret_cast
<
const
char
*>
(
slr
->
rows
().
data
()),
static_cast
<
int64_t
>
(
rows_memory_size
));
static_cast
<
int64_t
>
(
rows_memory_size
));
}
}
...
...
paddle/fluid/operators/distributed/grpc_client.cc
浏览文件 @
1c116462
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...
@@ -12,6 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
See the License for the specific language governing permissions and
limitations under the License. */
limitations under the License. */
#include <stdlib.h>
#include <limits>
#include <limits>
#include "glog/logging.h" // For VLOG
#include "glog/logging.h" // For VLOG
...
@@ -420,7 +421,15 @@ void GRPCClient::Proceed() {
...
@@ -420,7 +421,15 @@ void GRPCClient::Proceed() {
sync_cond_
.
notify_all
();
sync_cond_
.
notify_all
();
}
}
}
}
VLOG
(
3
)
<<
"GRPCClient Proceed end"
;
// Last log message
// Avoid using VLOG() and LOG(): in the destructor of google::LogMessage() a
// static Mutex log_mutex is used for synchronization, which might have been
// destructed at this moment.
if
(
FLAGS_v
>=
3
)
{
std
::
string
msg
(
"GRPCClient Proceed end"
);
fwrite
(
msg
.
c_str
(),
msg
.
length
(),
1
,
stdout
);
}
}
}
std
::
shared_ptr
<
grpc
::
Channel
>
GRPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
std
::
shared_ptr
<
grpc
::
Channel
>
GRPCClient
::
GetChannel
(
const
std
::
string
&
ep
)
{
...
...
paddle/fluid/operators/distributed/grpc_serde.cc
浏览文件 @
1c116462
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
#include <nccl.h>
#endif
#endif
#include <limits>
#include <thread> // NOLINT
#include <thread> // NOLINT
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/coded_stream.h"
...
@@ -102,6 +103,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
...
@@ -102,6 +103,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
e
.
WriteVarlengthBeginning
(
VarMsg
::
kSerializedFieldNumber
,
e
.
WriteVarlengthBeginning
(
VarMsg
::
kSerializedFieldNumber
,
payload
->
memory_size
());
payload
->
memory_size
());
if
(
payload
->
memory_size
()
>=
std
::
numeric_limits
<
int
>::
max
())
{
LOG
(
FATAL
)
<<
"AppendZeroCopy varname:"
<<
name
<<
", vlen:"
<<
payload
->
memory_size
();
}
// steal reference of tensor data
// steal reference of tensor data
::
grpc
::
Slice
slices
[
4
];
// metadata, tensor, rows meta, rows
::
grpc
::
Slice
slices
[
4
];
// metadata, tensor, rows meta, rows
int
num_slices
=
2
;
// only SelectedRows have rows buffer
int
num_slices
=
2
;
// only SelectedRows have rows buffer
...
@@ -115,7 +120,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
...
@@ -115,7 +120,10 @@ void SerializeToByteBuffer(const std::string& name, framework::Variable* var,
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
if
(
var
->
IsType
<
framework
::
SelectedRows
>
())
{
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
auto
*
slr
=
var
->
GetMutable
<
framework
::
SelectedRows
>
();
ProtoEncodeHelper
e2
(
static_cast
<
char
*>
(
buf
),
128
);
ProtoEncodeHelper
e2
(
static_cast
<
char
*>
(
buf
),
128
);
PADDLE_ENFORCE
(
VectorElemName
(
slr
->
rows
())
==
typeid
(
int64_t
).
name
());
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
sizeof
(
int64_t
);
size_t
rows_memory_size
=
slr
->
rows
().
size
()
*
sizeof
(
int64_t
);
e2
.
WriteVarlengthBeginning
(
VarMsg
::
kRowsFieldNumber
,
rows_memory_size
);
e2
.
WriteVarlengthBeginning
(
VarMsg
::
kRowsFieldNumber
,
rows_memory_size
);
slices
[
2
]
=
::
grpc
::
Slice
(
e2
.
size
());
slices
[
2
]
=
::
grpc
::
Slice
(
e2
.
size
());
memcpy
(
const_cast
<
uint8_t
*>
(
slices
[
2
].
begin
()),
e2
.
data
(),
e2
.
size
());
memcpy
(
const_cast
<
uint8_t
*>
(
slices
[
2
].
begin
()),
e2
.
data
(),
e2
.
size
());
...
...
paddle/fluid/operators/distributed/sendrecvop_utils.h
浏览文件 @
1c116462
...
@@ -15,6 +15,7 @@ limitations under the License. */
...
@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#pragma once
#include <iostream>
#include <iostream>
#include <string>
#include <string>
#include <typeindex>
#include <vector>
#include <vector>
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/framework/data_type.h"
...
@@ -23,9 +24,8 @@ limitations under the License. */
...
@@ -23,9 +24,8 @@ limitations under the License. */
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/platform/port.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
#include "paddle/fluid/operators/distributed/send_recv.pb.h"
#include "paddle/fluid/platform/port.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -83,6 +83,11 @@ inline framework::proto::VarType::Type ToVarType(
...
@@ -83,6 +83,11 @@ inline framework::proto::VarType::Type ToVarType(
}
}
}
}
template
<
template
<
typename
>
class
T
,
typename
Elem
>
std
::
string
VectorElemName
(
const
T
<
Elem
>&
arg
)
{
return
typeid
(
Elem
).
name
();
}
}
// namespace distributed
}
// namespace distributed
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/distributed/variable_response.cc
浏览文件 @
1c116462
...
@@ -118,7 +118,7 @@ bool VariableResponse::CopyLodTensorData(
...
@@ -118,7 +118,7 @@ bool VariableResponse::CopyLodTensorData(
VLOG
(
6
)
<<
"Tensor.memory_size = "
<<
tensor
->
memory_size
()
VLOG
(
6
)
<<
"Tensor.memory_size = "
<<
tensor
->
memory_size
()
<<
", Buffer Size = "
<<
length
;
<<
", Buffer Size = "
<<
length
;
PADDLE_ENFORCE_EQ
(
tensor
->
memory_size
(),
length
);
PADDLE_ENFORCE_EQ
(
tensor
->
memory_size
(),
static_cast
<
unsigned
int
>
(
length
)
);
return
ReadRaw
(
input
,
ctx
,
tensor
->
place
(),
tensor_data
,
length
);
return
ReadRaw
(
input
,
ctx
,
tensor
->
place
(),
tensor_data
,
length
);
}
}
...
...
paddle/fluid/operators/merge_selected_rows_op.cc
浏览文件 @
1c116462
...
@@ -26,6 +26,13 @@ class MergeSelectedRowsOp : public framework::OperatorWithKernel {
...
@@ -26,6 +26,13 @@ class MergeSelectedRowsOp : public framework::OperatorWithKernel {
"Input(X) of MergeSelectedRowsOp should not be null."
);
"Input(X) of MergeSelectedRowsOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of MergeSelectedRowsOp should not be null."
);
"Output(Out) of MergeSelectedRowsOp should not be null."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetInputsVarType
(
"X"
).
front
(),
framework
::
proto
::
VarType
::
SELECTED_ROWS
,
"Input X only should be SelectedRows."
);
PADDLE_ENFORCE_EQ
(
ctx
->
GetOutputsVarType
(
"Out"
).
front
(),
framework
::
proto
::
VarType
::
SELECTED_ROWS
,
"Output Y only should be SelectedRows."
);
ctx
->
ShareDim
(
"X"
,
/*->*/
"Out"
);
ctx
->
ShareDim
(
"X"
,
/*->*/
"Out"
);
}
}
};
};
...
@@ -43,7 +50,28 @@ class MergeSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -43,7 +50,28 @@ class MergeSelectedRowsOpMaker : public framework::OpProtoAndCheckerMaker {
R"DOC(
R"DOC(
MergeSelectedRows Operator.
MergeSelectedRows Operator.
MergeSelectedRows is used to merge the duplicated rows of the input.
MergeSelectedRows is used to merge the duplicated rows of the input. The
output's row has no duplicated, and it's order is incremental.
Example:
Input:
X.rows is [0, 5, 5, 4, 19]
X.height is 20
X.value is:
[[1, 1]
[2, 2]
[3, 3]
[4, 4]
[6, 6]]
Output:
Out.row is [0, 4, 5, 19]
Out.height is 20
Out.value is:
[[1, 1]
[4, 4]
[5, 5]
[6, 6]]
)DOC"
);
)DOC"
);
}
}
};
};
...
...
paddle/fluid/operators/ngraph/ngraph_ops.h
0 → 100644
浏览文件 @
1c116462
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
/*
* This file contains the list of the ngraph operators for Paddle.
*
* ATTENTION: It requires some C++11 features, for lower version C++ or C, we
* might release another API.
*/
#pragma once
#include "ops/binary_unnary_op.h"
#include "ops/mul_op.h"
paddle/fluid/operators/ngraph/ops/binary_unnary_op.h
0 → 100644
浏览文件 @
1c116462
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
template
<
typename
T
>
static
void
BuildBinaryNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
x
,
y
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
template
<
typename
T
>
static
void
BuildUnaryNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
input
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
out
=
std
::
make_shared
<
T
>
(
input
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/ngraph/ops/mul_op.h
0 → 100644
浏览文件 @
1c116462
/*Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <string>
#include "ngraph/ngraph.hpp"
#include "paddle/fluid/platform/ngraph_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
ngraphs
{
static
void
BuildMulNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
int
x_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"x_num_col_dims"
);
int
y_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"y_num_col_dims"
);
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
x_reshape
=
x
;
auto
y_reshape
=
y
;
if
(
x
->
get_shape
().
size
()
>
2
)
{
auto
x_2d
=
paddle
::
platform
::
FlattenTo2d
(
x
->
get_shape
(),
x_num_col_dims
);
x_reshape
=
paddle
::
platform
::
NgReshaper
(
x
,
x_2d
);
}
if
(
y
->
get_shape
().
size
()
>
2
)
{
auto
y_2d
=
paddle
::
platform
::
FlattenTo2d
(
y
->
get_shape
(),
y_num_col_dims
);
y_reshape
=
paddle
::
platform
::
NgReshaper
(
y
,
y_2d
);
}
std
::
shared_ptr
<
ngraph
::
Node
>
out
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
x_reshape
,
y_reshape
);
auto
dummy_out
=
paddle
::
platform
::
GetOutputNode
(
op
,
"Out"
,
ngb_node_map
);
if
(
dummy_out
&&
dummy_out
->
get_shape
()
!=
out
->
get_shape
())
{
out
=
paddle
::
platform
::
NgReshaper
(
out
,
dummy_out
->
get_shape
());
}
paddle
::
platform
::
SetOutputNode
(
op
,
"Out"
,
out
,
ngb_node_map
);
}
static
void
BuildMulGradNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
op_attrs
=
paddle
::
framework
::
AttrReader
(
op
->
Attrs
());
int
x_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"x_num_col_dims"
);
int
y_num_col_dims
=
op_attrs
.
Get
<
int
>
(
"y_num_col_dims"
);
auto
x
=
paddle
::
platform
::
GetInputNode
(
op
,
"X"
,
ngb_node_map
);
auto
y
=
paddle
::
platform
::
GetInputNode
(
op
,
"Y"
,
ngb_node_map
);
auto
dout
=
paddle
::
platform
::
GetInputNode
(
op
,
"Out@GRAD"
,
ngb_node_map
);
bool
is_dx
=
paddle
::
platform
::
HasOutput
(
op
,
"X@GRAD"
)
?
true
:
false
;
bool
is_dy
=
paddle
::
platform
::
HasOutput
(
op
,
"Y@GRAD"
)
?
true
:
false
;
auto
x_shape
=
x
->
get_shape
();
auto
y_shape
=
y
->
get_shape
();
auto
x_reshape
=
x
;
auto
y_reshape
=
y
;
if
(
x_shape
.
size
()
>
2
)
{
auto
x_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
x_shape
,
x_num_col_dims
);
x_reshape
=
paddle
::
platform
::
NgReshaper
(
x
,
x_2d_shape
);
}
if
(
y_shape
.
size
()
>
2
)
{
auto
y_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
y_shape
,
y_num_col_dims
);
y_reshape
=
paddle
::
platform
::
NgReshaper
(
y
,
y_2d_shape
);
}
auto
x_reshape_shape
=
x_reshape
->
get_shape
();
std
::
reverse
(
x_reshape_shape
.
begin
(),
x_reshape_shape
.
end
());
auto
x_transpose
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
x_reshape
,
ngraph
::
AxisVector
{
1
,
0
},
x_reshape_shape
);
auto
y_reshape_shape
=
y_reshape
->
get_shape
();
std
::
reverse
(
y_reshape_shape
.
begin
(),
y_reshape_shape
.
end
());
auto
y_transpose
=
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
y_reshape
,
ngraph
::
AxisVector
{
1
,
0
},
y_reshape_shape
);
if
(
is_dx
)
{
if
(
dout
->
get_shape
().
size
()
>
2
)
{
auto
dout_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
dout
->
get_shape
(),
2
);
dout
=
paddle
::
platform
::
NgReshaper
(
dout
,
dout_2d_shape
);
}
auto
dx
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
dout
,
y_transpose
);
if
(
dx
->
get_shape
()
==
x_shape
)
{
paddle
::
platform
::
SetOutputNode
(
op
,
"X@GRAD"
,
dx
,
ngb_node_map
);
}
else
{
auto
dx_reshape
=
paddle
::
platform
::
NgReshaper
(
dx
,
x_shape
);
paddle
::
platform
::
SetOutputNode
(
op
,
"X@GRAD"
,
dx_reshape
,
ngb_node_map
);
}
}
if
(
is_dy
)
{
if
(
dout
->
get_shape
().
size
()
>
2
)
{
auto
dout_2d_shape
=
paddle
::
platform
::
FlattenTo2d
(
dout
->
get_shape
(),
2
);
dout
=
paddle
::
platform
::
NgReshaper
(
dout
,
dout_2d_shape
);
}
auto
dy
=
std
::
make_shared
<
ngraph
::
op
::
Dot
>
(
x_transpose
,
dout
);
if
(
dy
->
get_shape
()
==
y_shape
)
{
paddle
::
platform
::
SetOutputNode
(
op
,
"Y@GRAD"
,
dy
,
ngb_node_map
);
}
else
{
auto
dy_reshape
=
paddle
::
platform
::
NgReshaper
(
dy
,
y_shape
);
paddle
::
platform
::
SetOutputNode
(
op
,
"Y@GRAD"
,
dy_reshape
,
ngb_node_map
);
}
}
}
}
// namespace ngraphs
}
// namespace operators
}
// namespace paddle
#endif
paddle/fluid/operators/optimizers/adam_op.cc
浏览文件 @
1c116462
...
@@ -109,6 +109,11 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -109,6 +109,11 @@ class AdamOpMaker : public framework::OpProtoAndCheckerMaker {
"(float, default 1.0e-8) "
"(float, default 1.0e-8) "
"Constant for numerical stability"
)
"Constant for numerical stability"
)
.
SetDefault
(
1.0e-8
f
);
.
SetDefault
(
1.0e-8
f
);
AddAttr
<
bool
>
(
"lazy_mode"
,
"(bool, default false) "
"only update the parameter that has gradient in sparse update"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Adam Optimizer.
Adam Optimizer.
...
...
paddle/fluid/operators/optimizers/adam_op.h
浏览文件 @
1c116462
...
@@ -177,12 +177,13 @@ struct SparseAdamFunctor {
...
@@ -177,12 +177,13 @@ struct SparseAdamFunctor {
const
int64_t
*
rows_
;
const
int64_t
*
rows_
;
int64_t
row_numel_
;
int64_t
row_numel_
;
int64_t
row_count_
;
int64_t
row_count_
;
bool
lazy_mode_
;
SparseAdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
SparseAdamFunctor
(
T
beta1
,
T
beta2
,
T
epsilon
,
const
T
*
beta1_pow
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
beta2_pow
,
const
T
*
mom1
,
T
*
mom1_out
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
mom2
,
T
*
mom2_out
,
const
T
*
lr
,
const
T
*
grad
,
const
T
*
param
,
T
*
param_out
,
const
int64_t
*
rows
,
const
T
*
param
,
T
*
param_out
,
const
int64_t
*
rows
,
int64_t
row_numel
,
int64_t
row_count
)
int64_t
row_numel
,
int64_t
row_count
,
bool
lazy_mode
)
:
beta1_
(
beta1
),
:
beta1_
(
beta1
),
beta2_
(
beta2
),
beta2_
(
beta2
),
epsilon_
(
epsilon
),
epsilon_
(
epsilon
),
...
@@ -198,13 +199,10 @@ struct SparseAdamFunctor {
...
@@ -198,13 +199,10 @@ struct SparseAdamFunctor {
param_out_
(
param_out
),
param_out_
(
param_out
),
rows_
(
rows
),
rows_
(
rows
),
row_numel_
(
row_numel
),
row_numel_
(
row_numel
),
row_count_
(
row_count
)
{}
row_count_
(
row_count
),
lazy_mode_
(
lazy_mode
)
{}
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_count_
,
i
/
row_numel_
);
T
g
=
row_idx
>=
0
?
grad_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
inline
HOSTDEVICE
void
adam_update
(
size_t
i
,
T
g
)
const
{
// The following code is the same as dense
// The following code is the same as dense
T
mom1
=
moment1_
[
i
];
T
mom1
=
moment1_
[
i
];
T
mom2
=
moment2_
[
i
];
T
mom2
=
moment2_
[
i
];
...
@@ -225,6 +223,17 @@ struct SparseAdamFunctor {
...
@@ -225,6 +223,17 @@ struct SparseAdamFunctor {
moment2_out_
[
i
]
=
mom2
;
moment2_out_
[
i
]
=
mom2
;
param_out_
[
i
]
=
p
;
param_out_
[
i
]
=
p
;
}
}
inline
HOSTDEVICE
void
operator
()(
size_t
i
)
const
{
auto
row_idx
=
math
::
BinarySearch
<
int64_t
>
(
rows_
,
row_count_
,
i
/
row_numel_
);
if
(
lazy_mode_
&&
row_idx
<
0
)
{
return
;
}
else
{
T
g
=
row_idx
>=
0
?
grad_
[
row_idx
*
row_numel_
+
i
%
row_numel_
]
:
0
;
adam_update
(
i
,
g
);
}
}
};
};
template
<
typename
DeviceContext
,
typename
T
>
template
<
typename
DeviceContext
,
typename
T
>
...
@@ -240,6 +249,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -240,6 +249,7 @@ class AdamOpKernel : public framework::OpKernel<T> {
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
framework
::
LoDTensor
;
using
paddle
::
operators
::
detail
::
Ref
;
using
paddle
::
operators
::
detail
::
Ref
;
bool
lazy_mode
=
ctx
.
Attr
<
bool
>
(
"lazy_mode"
);
T
beta1
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
T
beta1
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta1"
));
T
beta2
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
T
beta2
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"beta2"
));
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
T
epsilon
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"epsilon"
));
...
@@ -351,11 +361,23 @@ class AdamOpKernel : public framework::OpKernel<T> {
...
@@ -351,11 +361,23 @@ class AdamOpKernel : public framework::OpKernel<T> {
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
mom2_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
lr
.
template
data
<
T
>(),
grad_data
,
param
.
template
data
<
T
>(),
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
param_out
.
template
mutable_data
<
T
>(
ctx
.
GetPlace
()),
rows
,
row_numel
,
grad_merge
.
rows
().
size
());
grad_merge
.
rows
().
size
(),
lazy_mode
);
VLOG
(
3
)
<<
"lazy_mode :"
<<
lazy_mode
;
if
(
lazy_mode
&&
platform
::
is_cpu_place
(
ctx
.
GetPlace
()))
{
size_t
row_count
=
grad_merge
.
rows
().
size
();
std
::
vector
<
int64_t
>
cpu_rows
(
grad_merge
.
rows
());
for
(
size_t
row_index
=
0
;
row_index
<
row_count
;
++
row_index
)
{
for
(
size_t
offset
=
0
;
offset
<
row_numel
;
++
offset
)
{
size_t
i
=
cpu_rows
[
row_index
]
*
row_numel
+
offset
;
functor
.
adam_update
(
i
,
grad_data
[
row_index
*
row_numel
+
offset
]);
}
}
}
else
{
platform
::
ForRange
<
DeviceContext
>
for_range
(
platform
::
ForRange
<
DeviceContext
>
for_range
(
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
static_cast
<
const
DeviceContext
&>
(
ctx
.
device_context
()),
param
.
numel
());
param
.
numel
());
for_range
(
functor
);
for_range
(
functor
);
}
}
else
{
}
else
{
PADDLE_THROW
(
"Variable type not supported by adam_op"
);
PADDLE_THROW
(
"Variable type not supported by adam_op"
);
}
}
...
...
paddle/fluid/operators/transpose_mkldnn_op.cc
0 → 100644
浏览文件 @
1c116462
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/data_layout_transform.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/memory/malloc.h"
#include "paddle/fluid/platform/mkldnn_reuse.h"
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
framework
::
DataLayout
;
template
<
typename
T
>
class
TransposeMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
const
bool
is_test
=
ctx
.
Attr
<
bool
>
(
"is_test"
);
PADDLE_ENFORCE
(
is_test
==
true
,
"TransposeMKLDNN works only for inference!. Set is_test = True"
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
paddle
::
platform
::
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
std
::
vector
<
int
>
axis
=
ctx
.
Attr
<
std
::
vector
<
int
>>
(
"axis"
);
int
ndims
=
axis
.
size
();
auto
*
input
=
ctx
.
Input
<
Tensor
>
(
"X"
);
auto
*
output
=
ctx
.
Output
<
Tensor
>
(
"Out"
);
const
T
*
input_data
=
input
->
data
<
T
>
();
if
(
ndims
==
1
)
{
output
->
ShareDataWith
(
*
input
);
return
;
}
std
::
vector
<
int
>
nchw_tz
=
paddle
::
framework
::
vectorize2int
(
input
->
dims
());
const
std
::
string
key
=
platform
::
TransposeMKLDNNHandler
::
GetHash
(
nchw_tz
,
axis
,
ctx
.
op
().
Output
(
"Out"
));
platform
::
TransposeMKLDNNHandler
handler
(
nchw_tz
,
axis
,
dev_ctx
,
mkldnn_engine
,
key
);
auto
transpose_src_memory_p
=
handler
.
AcquireSrcMemory
(
input
->
format
(),
platform
::
to_void_cast
<
T
>
(
input_data
));
auto
transpose_dst_memory_p
=
handler
.
AcquireDstMemory
(
output
,
ctx
.
GetPlace
());
auto
transpose_p
=
handler
.
AcquireTranspose
(
transpose_dst_memory_p
,
transpose_src_memory_p
);
std
::
vector
<
mkldnn
::
primitive
>
pipeline
;
pipeline
.
push_back
(
*
transpose_p
);
mkldnn
::
stream
(
mkldnn
::
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
transpose2
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
TransposeMKLDNNOpKernel
<
float
>
);
REGISTER_OP_KERNEL
(
transpose
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
TransposeMKLDNNOpKernel
<
float
>
);
paddle/fluid/operators/transpose_op.cc
浏览文件 @
1c116462
...
@@ -16,6 +16,10 @@ limitations under the License. */
...
@@ -16,6 +16,10 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -53,11 +57,32 @@ class TransposeOp : public framework::OperatorWithKernel {
...
@@ -53,11 +57,32 @@ class TransposeOp : public framework::OperatorWithKernel {
}
}
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
ctx
->
SetOutputDim
(
"Out"
,
out_dims
);
}
}
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
};
};
class
TransposeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
class
TransposeOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
public:
void
Make
()
override
{
void
Make
()
override
{
AddAttr
<
bool
>
(
"is_test"
,
"(bool, default false) Set to true for inference only, false "
"for training. Some layers may run faster when this is true."
)
.
SetDefault
(
false
);
AddInput
(
AddInput
(
"X"
,
"X"
,
"(Tensor) The input tensor, tensors with rank up to 6 are supported."
);
"(Tensor) The input tensor, tensors with rank up to 6 are supported."
);
...
@@ -67,6 +92,16 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -67,6 +92,16 @@ class TransposeOpMaker : public framework::OpProtoAndCheckerMaker {
"(vector<int>) A list of values, and the size of the list should be "
"(vector<int>) A list of values, and the size of the list should be "
"the same with the input tensor rank. This operator permutes the input "
"the same with the input tensor rank. This operator permutes the input "
"tensor's axes according to the values given."
);
"tensor's axes according to the values given."
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddAttr
<
std
::
string
>
(
"data_format"
,
"(string, default NCHW) Only used in "
"An optional string from:
\"
NHWC
\"
,
\"
NCHW
\"
. "
"Defaults to
\"
NHWC
\"
. Specify the data format of the output data, "
"the input will be transformed automatically. "
)
.
SetDefault
(
"AnyLayout"
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Transpose Operator.
Transpose Operator.
...
@@ -144,8 +179,18 @@ class Transpose2Op : public TransposeOp {
...
@@ -144,8 +179,18 @@ class Transpose2Op : public TransposeOp {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
return
framework
::
OpKernelType
(
ctx
.
Input
<
framework
::
LoDTensor
>
(
"X"
)
->
type
(),
framework
::
LibraryType
library_
{
framework
::
LibraryType
::
kPlain
};
ctx
.
device_context
());
std
::
string
data_format
=
ctx
.
Attr
<
std
::
string
>
(
"data_format"
);
framework
::
DataLayout
layout_
=
framework
::
StringToDataLayout
(
data_format
);
#ifdef PADDLE_WITH_MKLDNN
if
(
library_
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library_
=
framework
::
LibraryType
::
kMKLDNN
;
layout_
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
return
framework
::
OpKernelType
(
ctx
.
Input
<
Tensor
>
(
"X"
)
->
type
(),
ctx
.
GetPlace
(),
layout_
,
library_
);
}
}
};
};
...
...
paddle/fluid/platform/dynload/CMakeLists.txt
浏览文件 @
1c116462
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
...
@@ -16,9 +16,7 @@ if (CUPTI_FOUND)
list
(
APPEND CUDA_SRCS cupti.cc
)
list
(
APPEND CUDA_SRCS cupti.cc
)
endif
(
CUPTI_FOUND
)
endif
(
CUPTI_FOUND
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
nv_library
(
dynload_cuda SRCS
${
CUDA_SRCS
}
DEPS dynamic_loader
)
if
(
NOT WIN32
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
cc_library
(
dynload_warpctc SRCS warpctc.cc DEPS dynamic_loader warpctc
)
endif
(
NOT WIN32
)
if
(
WITH_MKLML
)
if
(
WITH_MKLML
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
cc_library
(
dynload_mklml SRCS mklml.cc DEPS dynamic_loader mklml
)
endif
()
endif
()
...
...
paddle/fluid/platform/dynload/cudnn.h
浏览文件 @
1c116462
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
...
@@ -34,7 +34,7 @@ extern void EnforceCUDNNLoaded(const char* fn_name);
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_CUDNN_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using cudnn_func = decltype(&::__name); \
using cudnn_func = decltype(&::__name); \
std::call_once(cudnn_dso_flag, []() { \
std::call_once(cudnn_dso_flag, []() { \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
cudnn_dso_handle = paddle::platform::dynload::GetCUDNNDsoHandle(); \
...
...
paddle/fluid/platform/dynload/dynamic_loader.cc
浏览文件 @
1c116462
...
@@ -201,6 +201,8 @@ void* GetCurandDsoHandle() {
...
@@ -201,6 +201,8 @@ void* GetCurandDsoHandle() {
void
*
GetWarpCTCDsoHandle
()
{
void
*
GetWarpCTCDsoHandle
()
{
#if defined(__APPLE__) || defined(__OSX__)
#if defined(__APPLE__) || defined(__OSX__)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.dylib"
);
#elif defined(_WIN32)
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"warpctc.dll"
);
#else
#else
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
return
GetDsoHandleFromSearchPath
(
FLAGS_warpctc_dir
,
"libwarpctc.so"
);
#endif
#endif
...
...
paddle/fluid/platform/dynload/dynamic_loader.h
浏览文件 @
1c116462
...
@@ -18,6 +18,12 @@ namespace paddle {
...
@@ -18,6 +18,12 @@ namespace paddle {
namespace
platform
{
namespace
platform
{
namespace
dynload
{
namespace
dynload
{
#ifndef _WIN32
#define DECLARE_TYPE(__name, ...) decltype(__name(__VA_ARGS__))
#else
#define DECLARE_TYPE(__name, ...) decltype(auto)
#endif
void
*
GetCublasDsoHandle
();
void
*
GetCublasDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUDNNDsoHandle
();
void
*
GetCUPTIDsoHandle
();
void
*
GetCUPTIDsoHandle
();
...
...
paddle/fluid/platform/dynload/mklml.h
浏览文件 @
1c116462
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
...
@@ -34,7 +34,7 @@ extern void* mklml_dso_handle;
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
#define DYNAMIC_LOAD_MKLML_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using mklmlFunc = decltype(&::__name); \
using mklmlFunc = decltype(&::__name); \
std::call_once(mklml_dso_flag, []() { \
std::call_once(mklml_dso_flag, []() { \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
mklml_dso_handle = paddle::platform::dynload::GetMKLMLDsoHandle(); \
...
...
paddle/fluid/platform/dynload/tensorrt.h
浏览文件 @
1c116462
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
...
@@ -33,7 +33,7 @@ extern void* tensorrt_dso_handle;
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
#define DECLARE_DYNAMIC_LOAD_TENSORRT_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
using tensorrt_func = decltype(__name(args...)) (*)(Args...); \
std::call_once(tensorrt_dso_flag, []() { \
std::call_once(tensorrt_dso_flag, []() { \
tensorrt_dso_handle = \
tensorrt_dso_handle = \
...
...
paddle/fluid/platform/dynload/warpctc.h
浏览文件 @
1c116462
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
...
@@ -34,7 +34,7 @@ extern void* warpctc_dso_handle;
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
#define DYNAMIC_LOAD_WARPCTC_WRAP(__name) \
struct DynLoad__##__name { \
struct DynLoad__##__name { \
template <typename... Args> \
template <typename... Args> \
auto operator()(Args... args) ->
decltype(__name(args...)) {
\
auto operator()(Args... args) ->
DECLARE_TYPE(__name, args...) {
\
using warpctcFunc = decltype(&::__name); \
using warpctcFunc = decltype(&::__name); \
std::call_once(warpctc_dso_flag, []() { \
std::call_once(warpctc_dso_flag, []() { \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
warpctc_dso_handle = paddle::platform::dynload::GetWarpCTCDsoHandle(); \
...
...
paddle/fluid/platform/mkldnn_reuse.h
浏览文件 @
1c116462
...
@@ -197,6 +197,130 @@ class MKLDNNHandler {
...
@@ -197,6 +197,130 @@ class MKLDNNHandler {
bool
is_reusing_
;
bool
is_reusing_
;
};
};
class
TransposeMKLDNNHandler
:
public
MKLDNNHandler
{
public:
TransposeMKLDNNHandler
(
std
::
vector
<
int
>&
dims
,
std
::
vector
<
int
>&
axis
,
const
platform
::
MKLDNNDeviceContext
&
dev_ctx
,
mkldnn
::
engine
engine
,
const
std
::
string
&
base_key
)
:
platform
::
MKLDNNHandler
(
dev_ctx
,
engine
,
base_key
),
dims_
(
dims
),
axis_
(
axis
),
logical_axis_
(
dims
.
size
(),
0
)
{}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireSrcMemory
(
const
mkldnn
::
memory
::
format
&
fmt
,
void
*
ptr
)
{
auto
local_key
=
key_
+
"@user_src_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
// Make memory descriptor using input format, unless it
// cannot be trusted (nchw) then make up memory fmt manually
for
(
size_t
i
=
0
;
i
<
logical_axis_
.
size
();
++
i
)
{
logical_axis_
[
i
]
=
i
;
}
auto
src_md
=
fmt
!=
mkldnn
::
memory
::
format
::
nchw
?
platform
::
MKLDNNMemDesc
(
dims_
,
platform
::
MKLDNNGetDataType
<
float
>
(),
fmt
)
:
Axis2MemoryDesc
(
dims_
,
logical_axis_
);
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
mkldnn
::
memory
::
primitive_desc
{
src_md
,
engine_
},
ptr
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
mem_p
->
set_data_handle
(
ptr
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
memory
>
AcquireDstMemory
(
framework
::
Tensor
*
output
,
platform
::
Place
place
)
{
auto
local_key
=
key_
+
"@user_dst_mem_p"
;
auto
mem_p
=
std
::
static_pointer_cast
<
mkldnn
::
memory
>
(
dev_ctx_
.
GetBlob
(
local_key
));
PADDLE_ENFORCE
((
mem_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
" find mem primitive in device context"
);
if
(
mem_p
==
nullptr
)
{
auto
dst_mdp
=
mkldnn
::
memory
::
primitive_desc
{
Axis2MemoryDesc
(
dims_
,
axis_
),
engine_
};
auto
dst_data
=
output
->
mutable_data
<
float
>
(
place
,
paddle
::
memory
::
Allocator
::
kDefault
,
dst_mdp
.
get_size
());
mem_p
=
std
::
make_shared
<
mkldnn
::
memory
>
(
dst_mdp
,
dst_data
);
dev_ctx_
.
SetBlob
(
local_key
,
mem_p
);
}
else
{
auto
dst_data
=
output
->
mutable_data
<
float
>
(
place
);
mem_p
->
set_data_handle
(
dst_data
);
// Mark that reusing happenned. All primitives from operator instance
// should be reused or none of them. So we check consistency
is_reusing_
=
true
;
}
return
mem_p
;
}
std
::
shared_ptr
<
mkldnn
::
reorder
>
AcquireTranspose
(
std
::
shared_ptr
<
mkldnn
::
memory
>
dst_memory_p
,
std
::
shared_ptr
<
mkldnn
::
memory
>
src_memory_p
)
{
auto
prim_key
=
key_
+
"@transpose_p"
;
auto
transpose_p
=
std
::
static_pointer_cast
<
mkldnn
::
reorder
>
(
dev_ctx_
.
GetBlob
(
prim_key
));
PADDLE_ENFORCE
((
transpose_p
!=
nullptr
)
||
(
is_reusing_
==
false
),
"Fail to find convolution primitive in device context"
);
if
(
transpose_p
==
nullptr
)
{
transpose_p
=
std
::
make_shared
<
mkldnn
::
reorder
>
(
*
(
src_memory_p
),
*
(
dst_memory_p
));
dev_ctx_
.
SetBlob
(
prim_key
,
transpose_p
);
}
else
{
is_reusing_
=
true
;
}
return
transpose_p
;
}
static
std
::
string
GetHash
(
std
::
vector
<
int
>&
shape
,
// NOLINT
std
::
vector
<
int
>&
axis
,
// NOLINT
const
std
::
string
&
suffix
)
{
return
dims2str
(
shape
)
+
dims2str
(
axis
)
+
suffix
;
}
protected:
mkldnn_memory_desc_t
Axis2MemoryDesc
(
std
::
vector
<
int
>&
nchw_tz
,
std
::
vector
<
int
>&
axis
)
{
mkldnn_memory_desc_t
mem_fmt
;
mem_fmt
.
primitive_kind
=
mkldnn_memory
;
mem_fmt
.
ndims
=
axis
.
size
();
for
(
unsigned
int
i
=
0
;
i
<
nchw_tz
.
size
();
++
i
)
{
mem_fmt
.
dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format,
// regardless physical layout)
}
mem_fmt
.
data_type
=
mkldnn_f32
;
mem_fmt
.
format
=
mkldnn_blocked
;
unsigned
int
total_stride
=
1
;
for
(
int
i
=
nchw_tz
.
size
()
-
1
;
i
>=
0
;
--
i
)
{
mem_fmt
.
layout_desc
.
blocking
.
padding_dims
[
i
]
=
nchw_tz
[
i
];
// logical dimensions (nchw format, regardless physical
// layout)
mem_fmt
.
layout_desc
.
blocking
.
block_dims
[
i
]
=
1
;
mem_fmt
.
layout_desc
.
blocking
.
offset_padding_to_data
[
i
]
=
0
;
// no offset
mem_fmt
.
layout_desc
.
blocking
.
strides
[
0
][
axis
[
i
]]
=
total_stride
;
mem_fmt
.
layout_desc
.
blocking
.
strides
[
1
][
axis
[
i
]]
=
1
;
total_stride
*=
nchw_tz
[
axis
[
i
]];
}
mem_fmt
.
layout_desc
.
blocking
.
offset_padding
=
0
;
// no initial offset
return
mem_fmt
;
}
private:
std
::
vector
<
int
>
dims_
;
std
::
vector
<
int
>
axis_
;
std
::
vector
<
int
>
logical_axis_
;
};
template
<
class
forward_t
,
class
backward_data_t
,
class
backward_weights_t
>
template
<
class
forward_t
,
class
backward_data_t
,
class
backward_weights_t
>
class
ConvMKLDNNTemplateHandler
:
public
MKLDNNHandler
{
class
ConvMKLDNNTemplateHandler
:
public
MKLDNNHandler
{
public:
public:
...
...
paddle/fluid/platform/ngraph_helper.h
0 → 100644
浏览文件 @
1c116462
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#ifdef PADDLE_WITH_NGRAPH
#pragma once
#include <functional>
#include <string>
#include <vector>
#include "ngraph/ngraph.hpp"
namespace
paddle
{
namespace
platform
{
static
ngraph
::
Shape
FlattenTo2d
(
ngraph
::
Shape
sh
,
int
num
)
{
auto
x1
=
std
::
accumulate
(
std
::
begin
(
sh
),
std
::
begin
(
sh
)
+
num
,
1
,
std
::
multiplies
<
size_t
>
());
auto
x2
=
std
::
accumulate
(
std
::
begin
(
sh
)
+
num
,
std
::
end
(
sh
),
1
,
std
::
multiplies
<
size_t
>
());
size_t
x1_l
=
static_cast
<
size_t
>
(
x1
);
size_t
x2_l
=
static_cast
<
size_t
>
(
x2
);
return
ngraph
::
Shape
{
x1_l
,
x2_l
};
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
NgReshaper
(
std
::
shared_ptr
<
ngraph
::
Node
>
input
,
ngraph
::
Shape
shape
)
{
std
::
vector
<
size_t
>
input_order
(
input
->
get_shape
().
size
());
std
::
iota
(
std
::
begin
(
input_order
),
std
::
end
(
input_order
),
0
);
return
std
::
make_shared
<
ngraph
::
op
::
Reshape
>
(
input
,
ngraph
::
AxisVector
(
input_order
),
shape
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
const
paddle
::
framework
::
VariableNameMap
&
var_map
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
var_map
.
at
(
prm
);
PADDLE_ENFORCE_EQ
(
var_names
.
size
(),
1
,
"op %s prm %s expects one associated var"
,
op
->
Type
(),
prm
);
if
(
ngb_node_map
->
find
(
var_names
[
0
])
!=
ngb_node_map
->
end
())
{
return
(
*
ngb_node_map
)[
var_names
[
0
]];
}
else
{
return
nullptr
;
}
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetInputNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
prm
,
op
->
Inputs
(),
ngb_node_map
);
}
static
std
::
shared_ptr
<
ngraph
::
Node
>
GetOutputNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
return
GetNode
(
op
,
prm
,
op
->
Outputs
(),
ngb_node_map
);
}
static
void
SetOutputNode
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
,
std
::
shared_ptr
<
ngraph
::
Node
>
node
,
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
ngraph
::
Node
>>>
ngb_node_map
)
{
auto
&
var_names
=
op
->
Outputs
().
at
(
prm
);
if
(
var_names
.
size
()
==
1
)
{
(
*
ngb_node_map
)[
var_names
[
0
]]
=
node
;
}
else
if
(
var_names
.
size
()
==
0
)
{
(
*
ngb_node_map
)[
""
]
=
node
;
}
else
{
PADDLE_THROW
(
"prm %s has more than 1 var_names."
,
prm
);
}
}
static
bool
HasOutput
(
const
std
::
shared_ptr
<
paddle
::
framework
::
OperatorBase
>&
op
,
const
std
::
string
prm
)
{
auto
&
outputs
=
op
->
Outputs
();
if
(
outputs
.
find
(
prm
)
==
outputs
.
end
())
return
false
;
return
outputs
.
at
(
prm
).
size
()
>
0
;
}
}
// namespace platform
}
// namespace paddle
#endif
paddle/fluid/platform/port.h
浏览文件 @
1c116462
...
@@ -55,7 +55,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
...
@@ -55,7 +55,6 @@ static void *dlsym(void *handle, const char *symbol_name) {
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
static
void
*
dlopen
(
const
char
*
filename
,
int
flag
)
{
std
::
string
file_name
(
filename
);
std
::
string
file_name
(
filename
);
file_name
.
replace
(
0
,
file_name
.
size
()
-
1
,
'/'
,
'\\'
);
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
HMODULE
hModule
=
LoadLibrary
(
file_name
.
c_str
());
if
(
!
hModule
)
{
if
(
!
hModule
)
{
throw
std
::
runtime_error
(
file_name
+
" not found."
);
throw
std
::
runtime_error
(
file_name
+
" not found."
);
...
...
paddle/fluid/pybind/imperative.cc
浏览文件 @
1c116462
...
@@ -24,8 +24,9 @@ namespace pybind {
...
@@ -24,8 +24,9 @@ namespace pybind {
void
BindTracer
(
pybind11
::
module
*
m
)
{
void
BindTracer
(
pybind11
::
module
*
m
)
{
pybind11
::
class_
<
imperative
::
Tracer
>
(
*
m
,
"Tracer"
,
""
)
pybind11
::
class_
<
imperative
::
Tracer
>
(
*
m
,
"Tracer"
,
""
)
.
def
(
"__init__"
,
.
def
(
"__init__"
,
[](
imperative
::
Tracer
&
self
,
framework
::
BlockDesc
*
root_block
)
{
[](
imperative
::
Tracer
&
self
,
framework
::
BlockDesc
*
root_block
,
new
(
&
self
)
imperative
::
Tracer
(
root_block
);
framework
::
BlockDesc
*
startup_block
)
{
new
(
&
self
)
imperative
::
Tracer
(
root_block
,
startup_block
);
})
})
.
def
(
"trace"
,
&
imperative
::
Tracer
::
Trace
)
.
def
(
"trace"
,
&
imperative
::
Tracer
::
Trace
)
.
def
(
"get_scope"
,
&
imperative
::
Tracer
::
GetScope
,
.
def
(
"get_scope"
,
&
imperative
::
Tracer
::
GetScope
,
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
1c116462
...
@@ -990,7 +990,6 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -990,7 +990,6 @@ All parameter, weight, gradient are variables in Paddle.
cannot be updated after being finalized.)DOC"
);
cannot be updated after being finalized.)DOC"
);
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
pe
.
def
(
py
::
init
<
const
std
::
vector
<
platform
::
Place
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
ProgramDesc
&
,
const
std
::
unordered_set
<
std
::
string
>
&
,
const
ProgramDesc
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
std
::
string
&
,
Scope
*
,
std
::
vector
<
Scope
*>
&
,
const
ExecutionStrategy
&
,
const
BuildStrategy
&
,
size_t
,
const
ExecutionStrategy
&
,
const
BuildStrategy
&
,
size_t
,
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
1c116462
...
@@ -509,10 +509,10 @@ function assert_api_spec_approvals() {
...
@@ -509,10 +509,10 @@ function assert_api_spec_approvals() {
if
[
${
API_CHANGE
}
]
&&
[
"
${
GIT_PR_ID
}
"
!=
""
]
;
then
if
[
${
API_CHANGE
}
]
&&
[
"
${
GIT_PR_ID
}
"
!=
""
]
;
then
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
# NOTE: per_page=10000 should be ok for all cases, a PR review > 10000 is not human readable.
APPROVALS
=
`
curl
-H
"Authorization: token
${
GITHUB_API_TOKEN
}
"
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/
${
GIT_PR_ID
}
/reviews?per_page
=
10000 |
\
APPROVALS
=
`
curl
-H
"Authorization: token
${
GITHUB_API_TOKEN
}
"
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/
${
GIT_PR_ID
}
/reviews?per_page
=
10000 |
\
python
${
PADDLE_ROOT
}
/tools/check_pr_approval.py
2 7845005 2887803 728699 1334843
3
`
python
${
PADDLE_ROOT
}
/tools/check_pr_approval.py
1 288780
3
`
echo
"current pr
${
GIT_PR_ID
}
got approvals:
${
APPROVALS
}
"
echo
"current pr
${
GIT_PR_ID
}
got approvals:
${
APPROVALS
}
"
if
[
"
${
APPROVALS
}
"
==
"FALSE"
]
;
then
if
[
"
${
APPROVALS
}
"
==
"FALSE"
]
;
then
echo
"You must have
at least 2 approvals
for the api change!
${
API_FILE
}
"
echo
"You must have
panyx0718 approval
for the api change!
${
API_FILE
}
"
exit
1
exit
1
fi
fi
fi
fi
...
@@ -521,10 +521,10 @@ function assert_api_spec_approvals() {
...
@@ -521,10 +521,10 @@ function assert_api_spec_approvals() {
HAS_CONST_CAST
=
`
git diff
-U0
upstream/
$BRANCH
|grep
-o
-m
1
"const_cast"
||
true
`
HAS_CONST_CAST
=
`
git diff
-U0
upstream/
$BRANCH
|grep
-o
-m
1
"const_cast"
||
true
`
if
[
${
HAS_CONST_CAST
}
]
&&
[
"
${
GIT_PR_ID
}
"
!=
""
]
;
then
if
[
${
HAS_CONST_CAST
}
]
&&
[
"
${
GIT_PR_ID
}
"
!=
""
]
;
then
APPROVALS
=
`
curl
-H
"Authorization: token
${
GITHUB_API_TOKEN
}
"
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/
${
GIT_PR_ID
}
/reviews?per_page
=
10000 |
\
APPROVALS
=
`
curl
-H
"Authorization: token
${
GITHUB_API_TOKEN
}
"
https://api.github.com/repos/PaddlePaddle/Paddle/pulls/
${
GIT_PR_ID
}
/reviews?per_page
=
10000 |
\
python
${
PADDLE_ROOT
}
/tools/check_pr_approval.py
2 7845005 2887803 728699 1334843
3
`
python
${
PADDLE_ROOT
}
/tools/check_pr_approval.py
1 288780
3
`
echo
"current pr
${
GIT_PR_ID
}
got approvals:
${
APPROVALS
}
"
echo
"current pr
${
GIT_PR_ID
}
got approvals:
${
APPROVALS
}
"
if
[
"
${
APPROVALS
}
"
==
"FALSE"
]
;
then
if
[
"
${
APPROVALS
}
"
==
"FALSE"
]
;
then
echo
"You must have
at least 2 approvals
for the const_cast"
echo
"You must have
panyx0718 approval
for the const_cast"
exit
1
exit
1
fi
fi
fi
fi
...
...
python/paddle/fluid/__init__.py
浏览文件 @
1c116462
...
@@ -102,6 +102,13 @@ def __bootstrap__():
...
@@ -102,6 +102,13 @@ def __bootstrap__():
import
sys
import
sys
import
os
import
os
import
platform
import
platform
if
os
.
name
==
'nt'
:
third_lib_path
=
os
.
path
.
abspath
(
os
.
path
.
dirname
(
__file__
))
+
os
.
sep
+
'..'
+
os
.
sep
+
'libs'
os
.
environ
[
'path'
]
+=
';'
+
third_lib_path
sys
.
path
.
append
(
third_lib_path
)
from
.
import
core
from
.
import
core
in_test
=
'unittest'
in
sys
.
modules
in_test
=
'unittest'
in
sys
.
modules
...
@@ -128,13 +135,12 @@ def __bootstrap__():
...
@@ -128,13 +135,12 @@ def __bootstrap__():
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'free_idle_memory'
,
'paddle_num_threads'
,
"dist_threadpool_size"
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'eager_delete_tensor_gb'
,
'fast_eager_deletion_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'allocator_strategy'
,
'reader_queue_speed_test_mode'
,
'print_sub_graph_dir'
,
'pe_profile_fname'
'print_sub_graph_dir'
,
'pe_profile_fname'
,
'warpctc_dir'
]
]
if
'Darwin'
not
in
sysstr
:
if
'Darwin'
not
in
sysstr
:
read_env_flags
.
append
(
'use_pinned_memory'
)
read_env_flags
.
append
(
'use_pinned_memory'
)
if
os
.
name
!=
'nt'
:
if
os
.
name
!=
'nt'
:
read_env_flags
.
append
(
'warpctc_dir'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
read_env_flags
.
append
(
'cpu_deterministic'
)
if
core
.
is_compiled_with_dist
():
if
core
.
is_compiled_with_dist
():
...
...
python/paddle/fluid/backward.py
浏览文件 @
1c116462
...
@@ -249,69 +249,6 @@ def serialize_op_decs(op_desc):
...
@@ -249,69 +249,6 @@ def serialize_op_decs(op_desc):
return
proto
.
__str__
()
return
proto
.
__str__
()
def
_callback_lookup_
(
op
):
"""
Only used in _append_backward_ops_
Build and returns a callback function for certain op. For example
parallel_do: AllReduce
:param op:
:return: callback function
"""
if
op
.
type
==
'parallel_do'
and
op
.
attr
(
'use_nccl'
):
all_vars
=
op
.
block
.
vars
param_names
=
set
(
op
.
input
(
'parameters'
))
param_names
=
[
name
for
name
in
param_names
if
all_vars
[
name
].
stop_gradient
is
False
]
param_grad_names
=
[
n
+
"@GRAD"
for
n
in
param_names
]
class
ParallelDoCallBack
(
object
):
def
__init__
(
self
,
param_grad_names
,
parallel_scopes_name
):
self
.
has_inserted_nccl_init
=
False
self
.
param_grad_names
=
param_grad_names
self
.
parallel_scopes_name
=
parallel_scopes_name
def
__call__
(
self
,
block
,
context
):
if
not
self
.
has_inserted_nccl_init
:
op_desc
=
_create_op_desc_
(
"ncclInit"
,
{
"parallel_scopes"
:
self
.
parallel_scopes_name
},
{
"Communicator"
:
[
'nccl_com__do_not_change_'
]},
{})
block
.
program
.
global_block
().
desc
.
append_op
().
copy_from
(
op_desc
)
self
.
has_inserted_nccl_init
=
True
current_op_desc
=
context
[
"__current_op_desc__"
]
for
o_param
in
current_op_desc
.
output_names
():
for
o_argu
in
current_op_desc
.
output
(
o_param
):
if
o_argu
in
self
.
param_grad_names
:
allreduce_out_name
=
o_argu
+
"__nccl_all_reduce__"
op_desc
=
_create_op_desc_
(
"ncclReduce"
,
{
"X"
:
[
o_argu
],
"Communicator"
:
[
'nccl_com__do_not_change_'
]
},
{
"Out"
:
[
allreduce_out_name
]},
{
"reduction"
:
"ncclSum"
,
"root"
:
0
},
)
block
.
desc
.
append_op
().
copy_from
(
op_desc
)
op_desc
=
_create_op_desc_
(
"assign"
,
{
"X"
:
[
allreduce_out_name
]},
{
"Out"
:
[
o_argu
]},
{})
block
.
desc
.
append_op
().
copy_from
(
op_desc
)
return
ParallelDoCallBack
(
param_grad_names
,
op
.
output
(
"parallel_scopes"
))
else
:
return
None
def
_append_backward_ops_
(
block
,
def
_append_backward_ops_
(
block
,
ops
,
ops
,
target_block
,
target_block
,
...
@@ -349,15 +286,6 @@ def _append_backward_ops_(block,
...
@@ -349,15 +286,6 @@ def _append_backward_ops_(block,
sub_block
=
program
.
block
(
op
.
_block_attr_id
(
"sub_block"
))
sub_block
=
program
.
block
(
op
.
_block_attr_id
(
"sub_block"
))
grad_sub_block
=
program
.
_create_block
()
grad_sub_block
=
program
.
_create_block
()
grad_sub_block
.
_set_forward_block_idx
(
sub_block
.
idx
)
grad_sub_block
.
_set_forward_block_idx
(
sub_block
.
idx
)
cb
=
_callback_lookup_
(
op
)
if
cb
is
not
None
:
if
callbacks
is
None
:
new_callbacks
=
[
cb
]
else
:
new_callbacks
=
callbacks
+
[
_callback_lookup_
(
op
)]
_append_backward_ops_
(
sub_block
,
sub_block
.
ops
,
grad_sub_block
,
no_grad_dict
,
grad_to_var
,
new_callbacks
)
else
:
_append_backward_ops_
(
sub_block
,
sub_block
.
ops
,
grad_sub_block
,
_append_backward_ops_
(
sub_block
,
sub_block
.
ops
,
grad_sub_block
,
no_grad_dict
,
grad_to_var
,
callbacks
)
no_grad_dict
,
grad_to_var
,
callbacks
)
...
@@ -424,9 +352,6 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
...
@@ -424,9 +352,6 @@ def _append_backward_vars_(block, start_op_idx, grad_to_var, grad_info_map):
# infer_shape and infer_type
# infer_shape and infer_type
op_desc
.
infer_var_type
(
block
.
desc
)
op_desc
.
infer_var_type
(
block
.
desc
)
op_desc
.
infer_shape
(
block
.
desc
)
op_desc
.
infer_shape
(
block
.
desc
)
# ncclInit dones't need to set data_type
if
op_desc
.
type
()
==
'ncclInit'
:
continue
for
arg
in
op_desc
.
output_arg_names
():
for
arg
in
op_desc
.
output_arg_names
():
if
arg
in
new_vars
:
if
arg
in
new_vars
:
_infer_var_data_type_
(
arg
,
block
)
_infer_var_data_type_
(
arg
,
block
)
...
...
python/paddle/fluid/contrib/__init__.py
浏览文件 @
1c116462
...
@@ -22,9 +22,12 @@ from . import op_frequence
...
@@ -22,9 +22,12 @@ from . import op_frequence
from
.op_frequence
import
*
from
.op_frequence
import
*
from
.
import
quantize
from
.
import
quantize
from
.quantize
import
*
from
.quantize
import
*
from
.
import
utils
from
.utils
import
*
__all__
=
[]
__all__
=
[]
__all__
+=
decoder
.
__all__
__all__
+=
decoder
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
memory_usage_calc
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
op_frequence
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
quantize
.
__all__
__all__
+=
utils
.
__all__
python/paddle/fluid/contrib/utils/__init__.py
浏览文件 @
1c116462
...
@@ -13,10 +13,11 @@
...
@@ -13,10 +13,11 @@
# limitations under the License.
# limitations under the License.
from
__future__
import
print_function
from
__future__
import
print_function
#
from . import lookup_table_utils
from
.
import
lookup_table_utils
#
from .lookup_table_utils import *
from
.lookup_table_utils
import
*
from
.
import
hdfs_utils
from
.
import
hdfs_utils
from
.hdfs_utils
import
*
from
.hdfs_utils
import
*
#__all__ = lookup_table_utils.__all__
__all__
=
[]
__all__
=
hdfs_utils
.
__all__
__all__
+=
lookup_table_utils
.
__all__
__all__
+=
hdfs_utils
.
__all__
python/paddle/fluid/contrib/utils/hdfs_utils.py
浏览文件 @
1c116462
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
"""HDFS Utils"""
"""HDFS Utils"""
import
os
import
os
import
sys
import
subprocess
import
subprocess
import
multiprocessing
import
multiprocessing
from
datetime
import
datetime
from
datetime
import
datetime
...
@@ -24,7 +25,7 @@ import errno
...
@@ -24,7 +25,7 @@ import errno
import
logging
import
logging
__all__
=
[
"HDFSClient"
,
"multi_download"
]
__all__
=
[
"HDFSClient"
,
"multi_download"
,
"multi_upload"
]
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
)
logging
.
basicConfig
(
format
=
'%(asctime)s - %(levelname)s - %(message)s'
)
_logger
=
logging
.
getLogger
(
"hdfs_utils"
)
_logger
=
logging
.
getLogger
(
"hdfs_utils"
)
...
@@ -94,11 +95,13 @@ class HDFSClient(object):
...
@@ -94,11 +95,13 @@ class HDFSClient(object):
def
upload
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
retry_times
=
5
):
def
upload
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
retry_times
=
5
):
"""
"""
upload the local file to hdfs
upload the local file to hdfs
Args:
Args:
hdfs_path: hdfs path, target path
hdfs_path(str): the hdfs file path
local_path: local file path, source path
local_path(str): the local file path
overwrite: will overwrite the original file
overwrite(bool|None): will overwrite the file on HDFS or not
retry_times: max times retry to upload
retry_times(int|5): retry times
Returns:
Returns:
True or False
True or False
"""
"""
...
@@ -109,7 +112,7 @@ class HDFSClient(object):
...
@@ -109,7 +112,7 @@ class HDFSClient(object):
_logger
.
warn
(
_logger
.
warn
(
"The Local path: {} is dir and I will support it later, return"
.
"The Local path: {} is dir and I will support it later, return"
.
format
(
local_path
))
format
(
local_path
))
return
return
False
base
=
os
.
path
.
basename
(
local_path
)
base
=
os
.
path
.
basename
(
local_path
)
if
not
self
.
is_exist
(
hdfs_path
):
if
not
self
.
is_exist
(
hdfs_path
):
...
@@ -141,13 +144,15 @@ class HDFSClient(object):
...
@@ -141,13 +144,15 @@ class HDFSClient(object):
def
download
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
unzip
=
False
):
def
download
(
self
,
hdfs_path
,
local_path
,
overwrite
=
False
,
unzip
=
False
):
"""
"""
download from hdfs
download file from HDFS
Args:
Args:
hdfs_path: hdfs path, target path
hdfs_path(str): the hdfs file path
local_path: local file path, source path
local_path(str): the local file path
overwrite: will remove original file and overwrite it.
overwrite(bool|None): will overwrite the file on HDFS or not
unzip: ignore this param
unzip(bool|False): if the download file is compressed by zip, unzip it or not.
Returns
Returns:
True or False
True or False
"""
"""
_logger
.
info
(
'Downloading %r to %r.'
,
hdfs_path
,
local_path
)
_logger
.
info
(
'Downloading %r to %r.'
,
hdfs_path
,
local_path
)
...
@@ -188,11 +193,11 @@ class HDFSClient(object):
...
@@ -188,11 +193,11 @@ class HDFSClient(object):
def
is_exist
(
self
,
hdfs_path
=
None
):
def
is_exist
(
self
,
hdfs_path
=
None
):
"""
"""
whether the remote hdfs path exists?
whether the remote HDFS path exists
Args:
Args:
hdfs_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
hdfs_path(str): the hdfs file path
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
Returns:
Returns:
True or False
True or False
"""
"""
...
@@ -211,11 +216,11 @@ class HDFSClient(object):
...
@@ -211,11 +216,11 @@ class HDFSClient(object):
def
is_dir
(
self
,
hdfs_path
=
None
):
def
is_dir
(
self
,
hdfs_path
=
None
):
"""
"""
whether the remote hdfs path exists?
whether the remote HDFS path is directory
Args:
Args:
remote_file_path: default value(${OUTPUT_PATH}/${SYS_USER_ID}/${SYS_JOB_ID}/tmp)
hdfs_path(str): the hdfs file path
fs_name: The default values are the same as in the job configuration
fs_ugi: The default values are the same as in the job configuration
Returns:
Returns:
True or False
True or False
"""
"""
...
@@ -239,15 +244,15 @@ class HDFSClient(object):
...
@@ -239,15 +244,15 @@ class HDFSClient(object):
"""
"""
Remove a file or directory from HDFS.
Remove a file or directory from HDFS.
whether the remote HDFS path exists
Args:
Args:
param hdfs_path: HDFS path.
hdfs_path: HDFS path.
param recursive: Recursively delete files and directories. By default,
this method will raise an :class:`HdfsError` if trying to delete a
non-empty directory.
Returns:
Returns:
True or False
This function returns `True` if the deletion was successful and `False` if
This function returns `True` if the deletion was successful and `False` if
no file or directory previously existed at `hdfs_path`.
no file or directory previously existed at `hdfs_path`.
"""
"""
_logger
.
info
(
'Deleting %r.'
,
hdfs_path
)
_logger
.
info
(
'Deleting %r.'
,
hdfs_path
)
...
@@ -273,16 +278,14 @@ class HDFSClient(object):
...
@@ -273,16 +278,14 @@ class HDFSClient(object):
def
rename
(
self
,
hdfs_src_path
,
hdfs_dst_path
,
overwrite
=
False
):
def
rename
(
self
,
hdfs_src_path
,
hdfs_dst_path
,
overwrite
=
False
):
"""
"""
Rename a file or folder.
Move a file or folder on HDFS.
Args:
Args:
:param hdfs_src_path: Source path.
hdfs_path(str): HDFS path.
:param hdfs_dst_path: Destination path. If the path already exists and is
overwrite(bool|False): If the path already exists and overwrite is False, will return False.
a directory, the source will be moved into it. If the path exists and is
a file, or if a parent destination directory is missing, this method will
raise an :class:`HdfsError`.
Returns:
Returns:
This function returns `True` if the rename was successful and `False` if
True or False
rename was faild.
"""
"""
assert
hdfs_src_path
is
not
None
assert
hdfs_src_path
is
not
None
assert
hdfs_dst_path
is
not
None
assert
hdfs_dst_path
is
not
None
...
@@ -320,17 +323,20 @@ class HDFSClient(object):
...
@@ -320,17 +323,20 @@ class HDFSClient(object):
raise
raise
def
makedirs
(
self
,
hdfs_path
):
def
makedirs
(
self
,
hdfs_path
):
"""Create a remote directory, recursively if necessary.
"""
Create a remote directory, recursively if necessary.
Args:
Args:
:param hdfs_path: Remote path. Intermediate directories will be created
hdfs_path(str): Remote path. Intermediate directories will be created appropriately.
appropriately.
Returns:
Returns:
True
if make a directories was successful, False when make a directiries was failed.
True
or False
"""
"""
_logger
.
info
(
'Creating directories to %r.'
,
hdfs_path
)
_logger
.
info
(
'Creating directories to %r.'
,
hdfs_path
)
assert
hdfs_path
is
not
None
assert
hdfs_path
is
not
None
if
self
.
is_exist
(
hdfs_path
):
if
self
.
is_exist
(
hdfs_path
):
_logger
.
error
(
"HDFS path is exist: {}"
.
format
(
hdfs_path
))
return
return
mkdirs_commands
=
[
'-mkdir'
,
hdfs_path
]
mkdirs_commands
=
[
'-mkdir'
,
hdfs_path
]
...
@@ -346,11 +352,13 @@ class HDFSClient(object):
...
@@ -346,11 +352,13 @@ class HDFSClient(object):
def
ls
(
self
,
hdfs_path
):
def
ls
(
self
,
hdfs_path
):
"""
"""
ls a hdfs_path.
ls directory contents about HDFS hdfs_path
Args:
Args:
:param hdfs_path: hdfs_path will be ls.
hdfs_path(str): Remote HDFS path will be ls.
Returns:
Returns:
This function returns a `list` that contaion all files in the hdfs_path.
List: a contents list about hdfs_path.
"""
"""
assert
hdfs_path
is
not
None
assert
hdfs_path
is
not
None
...
@@ -378,11 +386,15 @@ class HDFSClient(object):
...
@@ -378,11 +386,15 @@ class HDFSClient(object):
def
lsr
(
self
,
hdfs_path
,
only_file
=
True
,
sort
=
True
):
def
lsr
(
self
,
hdfs_path
,
only_file
=
True
,
sort
=
True
):
"""
"""
ls a hdfs_path sort by time.
list directory contents about HDFS hdfs_path recursively
Args:
Args:
:param hdfs_path: hdfs_path will be ls.
hdfs_path(str): Remote HDFS path.
only_file(bool|True): will discard folders.
sort(bool|True): will be sorted by create time.
Returns:
Returns:
This function returns a `list` that contaion all files sorted by time in the hdfs_path.
List: a contents list about hdfs_path.
"""
"""
def
sort_by_time
(
v1
,
v2
):
def
sort_by_time
(
v1
,
v2
):
...
@@ -422,61 +434,54 @@ class HDFSClient(object):
...
@@ -422,61 +434,54 @@ class HDFSClient(object):
return
ret_lines
return
ret_lines
def
multi_
up
load
(
client
,
def
multi_
down
load
(
client
,
hdfs_path
,
hdfs_path
,
local_path
,
local_path
,
multi_processes
=
5
,
trainer_id
,
overwrite
=
False
):
trainers
,
multi_processes
=
5
):
"""
"""
Upload file to hdfs.
Download files from HDFS using multi process.
Args:
Args:
:param overwrite: will overwrite hdfs file or no
t
client(HDFSClient): instance of HDFSClien
t
:param multi_processes: the upload data process at the same time, default=5
hdfs_path(str): path on hdfs
:param client: instance of HDFSClient
local_path(str): path on local
:param hdfs_path: path on hdfs
trainer_id(int): current trainer id
:param local_path: path on local
trainers(int): all trainers number
Returns:
multi_processes(int|5): the download data process at the same time, default=5
Returns:
List:
Download files in local folder.
"""
"""
def
__subprocess_
up
load
(
datas
):
def
__subprocess_
down
load
(
datas
):
for
data
in
datas
:
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
local_path
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
hdfs_re_path
=
os
.
path
.
join
(
hdfs_path
,
re_path
)
if
re_path
==
os
.
curdir
:
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
sub_local_re_path
=
local_path
else
:
def
get_local_files
(
path
):
sub_local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
)
"""
client
.
download
(
data
,
sub_local_re_path
)
Get all local files
Args:
path: local file path
Returns:
A list that contation all files in the path.
"""
rlist
=
[]
if
not
os
.
path
.
isdir
(
path
):
assert
isinstance
(
client
,
HDFSClient
)
return
rlist
for
dirname
,
folder
,
files
in
os
.
walk
(
path
):
client
.
make_local_dirs
(
local_path
)
for
i
in
files
:
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
t
=
os
.
path
.
join
(
dirname
,
i
)
rlist
.
append
(
t
)
return
rlist
assert
isinstance
(
client
,
HDFSClient
)
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)
need_download
=
all_need_download
[
trainer_id
::
trainers
]
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
all_files
=
get_local_files
(
local_path
)
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
if
not
all_files
:
_logger
.
info
(
"there are nothing need to upload, exit"
)
return
_logger
.
info
(
"Start {} multi process to upload datas"
.
format
(
multi_processes
))
multi_processes
))
procs
=
[]
procs
=
[]
for
i
in
range
(
multi_processes
):
for
i
in
range
(
multi_processes
):
process_datas
=
all_files
[
i
::
multi_processes
]
process_datas
=
need_download
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
p
=
multiprocessing
.
Process
(
target
=
__subprocess_
up
load
,
args
=
(
process_datas
,
))
target
=
__subprocess_
down
load
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
procs
.
append
(
p
)
p
.
start
()
p
.
start
()
...
@@ -484,55 +489,84 @@ def multi_upload(client,
...
@@ -484,55 +489,84 @@ def multi_upload(client,
for
proc
in
procs
:
for
proc
in
procs
:
proc
.
join
()
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to
up
load datas"
.
format
(
_logger
.
info
(
"Finish {} multi process to
down
load datas"
.
format
(
multi_processes
))
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
if
re_path
==
os
.
curdir
:
local_re_path
=
os
.
path
.
join
(
local_path
,
data_name
)
else
:
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
def
multi_download
(
client
,
def
getfilelist
(
path
):
rlist
=
[]
for
dir
,
folder
,
file
in
os
.
walk
(
path
):
for
i
in
file
:
t
=
os
.
path
.
join
(
dir
,
i
)
rlist
.
append
(
t
)
for
r
in
rlist
:
print
(
r
)
def
multi_upload
(
client
,
hdfs_path
,
hdfs_path
,
local_path
,
local_path
,
trainer_id
,
multi_processes
=
5
,
trainers
,
overwrite
=
False
,
file_cnt
,
sync
=
True
):
multi_processes
=
5
):
"""
"""
multi_download
Upload files to HDFS using multi process.
Args:
Args:
:param client: instance of HDFSClient
client(HDFSClient): instance of HDFSClient
:param hdfs_path: path on hdfs
hdfs_path(str): path on hdfs
:param local_path: path on local
local_path(str): path on local
:param trainer_id: current trainer id
multi_processes(int|5): the upload data process at the same time, default=5
:param trainers: all trainers number
overwrite(bool|False): will overwrite file on HDFS or not
:param file_cnt: all file number
sync(bool|True): upload files sync or not.
:param multi_processes: the download data process at the same time, default=5
:return: None
Returns:
Returns:
A list that be downloaded.
None
"""
"""
def
__subprocess_
down
load
(
datas
):
def
__subprocess_
up
load
(
datas
):
for
data
in
datas
:
for
data
in
datas
:
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs
_path
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
local
_path
)
local_re_path
=
os
.
path
.
join
(
local
_path
,
re_path
)
hdfs_re_path
=
os
.
path
.
join
(
hdfs
_path
,
re_path
)
client
.
download
(
data
,
local_re_path
)
client
.
upload
(
hdfs_re_path
,
data
,
overwrite
,
retry_times
=
5
)
assert
isinstance
(
client
,
HDFSClient
)
def
get_local_files
(
path
):
rlist
=
[]
client
.
make_local_dirs
(
local_path
)
if
not
os
.
path
.
isdir
(
path
):
_logger
.
info
(
"Make local dir {} successfully"
.
format
(
local_path
))
return
rlist
all_need_download
=
client
.
lsr
(
hdfs_path
,
sort
=
True
)[:
file_cnt
]
for
dirname
,
folder
,
files
in
os
.
walk
(
path
):
need_download
=
all_need_download
[
trainer_id
::
trainers
]
for
i
in
files
:
_logger
.
info
(
"Get {} files From all {} files need to be download from {}"
.
t
=
os
.
path
.
join
(
dirname
,
i
)
format
(
len
(
need_download
),
len
(
all_need_download
),
hdfs_path
))
rlist
.
append
(
t
)
return
rlist
_logger
.
info
(
"Start {} multi process to download datas"
.
format
(
assert
isinstance
(
client
,
HDFSClient
)
all_files
=
get_local_files
(
local_path
)
if
not
all_files
:
_logger
.
info
(
"there are nothing need to upload, exit"
)
return
_logger
.
info
(
"Start {} multi process to upload datas"
.
format
(
multi_processes
))
multi_processes
))
procs
=
[]
procs
=
[]
for
i
in
range
(
multi_processes
):
for
i
in
range
(
multi_processes
):
process_datas
=
need_download
[
i
::
multi_processes
]
process_datas
=
all_files
[
i
::
multi_processes
]
p
=
multiprocessing
.
Process
(
p
=
multiprocessing
.
Process
(
target
=
__subprocess_
down
load
,
args
=
(
process_datas
,
))
target
=
__subprocess_
up
load
,
args
=
(
process_datas
,
))
procs
.
append
(
p
)
procs
.
append
(
p
)
p
.
start
()
p
.
start
()
...
@@ -540,18 +574,9 @@ def multi_download(client,
...
@@ -540,18 +574,9 @@ def multi_download(client,
for
proc
in
procs
:
for
proc
in
procs
:
proc
.
join
()
proc
.
join
()
_logger
.
info
(
"Finish {} multi process to
down
load datas"
.
format
(
_logger
.
info
(
"Finish {} multi process to
up
load datas"
.
format
(
multi_processes
))
multi_processes
))
local_downloads
=
[]
for
data
in
need_download
:
data_name
=
os
.
path
.
basename
(
data
)
re_path
=
os
.
path
.
relpath
(
os
.
path
.
dirname
(
data
),
hdfs_path
)
local_re_path
=
os
.
path
.
join
(
local_path
,
re_path
,
data_name
)
local_downloads
.
append
(
local_re_path
)
return
local_downloads
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
hadoop_home
=
"/home/client/hadoop-client/hadoop/"
hadoop_home
=
"/home/client/hadoop-client/hadoop/"
...
...
python/paddle/fluid/contrib/utils/lookup_table_utils.py
浏览文件 @
1c116462
...
@@ -18,14 +18,12 @@ import os
...
@@ -18,14 +18,12 @@ import os
import
time
import
time
import
logging
import
logging
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid
import
io
from
paddle.fluid
import
io
from
paddle.fluid
import
Program
from
paddle.fluid
import
Program
__all__
=
[
__all__
=
[
"load_
inference_model"
,
"load_persistable_vars
"
,
"load_
persistables_for_increment"
,
"load_persistables_for_inference
"
,
"convert_dist_to_sparse_program"
"convert_dist_to_sparse_program"
]
]
...
@@ -80,19 +78,28 @@ def __get_prefetch_op_tuples(main_program):
...
@@ -80,19 +78,28 @@ def __get_prefetch_op_tuples(main_program):
return
prefetch_op_tuples
return
prefetch_op_tuples
def
convert_dist_to_sparse_program
(
main_program
):
def
convert_dist_to_sparse_program
(
program
):
if
not
main_program
.
_distributed_lookup_table
:
"""
WARNING: this function will only be used for distributed training with distributed lookup table.
when we train model with distributed lookup table but want to do the local inference, we can use
this function to convert the train program with distributed lookup table to sparse lookup table.
:param program(Program): the program must be the trainer program, which will be get by the distribute transpiler.
:return:
program: The `program` is a Program, it's the program replace distributed lookup table to sparse lookup table.
"""
if
not
program
.
_distributed_lookup_table
:
_logger
.
warn
(
_logger
.
warn
(
"There are no distributed lookup tables need to be converted"
)
"There are no distributed lookup tables need to be converted"
)
return
return
# create table param and grad var in pserver program
# create table param and grad var in pserver program
origin_emb_var
=
"{}.origin"
.
format
(
main_
program
.
_distributed_lookup_table
)
origin_emb_var
=
"{}.origin"
.
format
(
program
.
_distributed_lookup_table
)
emb_var
=
main_
program
.
_distributed_lookup_table
emb_var
=
program
.
_distributed_lookup_table
main_
program
.
global_block
().
_rename_var
(
emb_var
,
origin_emb_var
)
program
.
global_block
().
_rename_var
(
emb_var
,
origin_emb_var
)
origin_param_var
=
main_
program
.
global_block
().
vars
[
origin_emb_var
]
origin_param_var
=
program
.
global_block
().
vars
[
origin_emb_var
]
param_var
=
main_
program
.
global_block
().
create_var
(
param_var
=
program
.
global_block
().
create_var
(
name
=
emb_var
,
name
=
emb_var
,
shape
=
origin_param_var
.
shape
,
shape
=
origin_param_var
.
shape
,
dtype
=
origin_param_var
.
dtype
,
dtype
=
origin_param_var
.
dtype
,
...
@@ -100,28 +107,28 @@ def convert_dist_to_sparse_program(main_program):
...
@@ -100,28 +107,28 @@ def convert_dist_to_sparse_program(main_program):
persistable
=
True
)
persistable
=
True
)
# parameter must be selected rows
# parameter must be selected rows
param_var
.
desc
.
set_type
(
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
param_var
.
desc
.
set_type
(
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
)
main_
program
.
_sync_with_cpp
()
program
.
_sync_with_cpp
()
prefetch_op_tuples
=
__get_prefetch_op_tuples
(
main_
program
)
prefetch_op_tuples
=
__get_prefetch_op_tuples
(
program
)
split_ids_id
=
prefetch_op_tuples
[
0
]
split_ids_id
=
prefetch_op_tuples
[
0
]
for
idx
in
range
(
split_ids_id
+
2
,
split_ids_id
-
1
,
-
1
):
for
idx
in
range
(
split_ids_id
+
2
,
split_ids_id
-
1
,
-
1
):
main_
program
.
global_block
().
_remove_op
(
idx
)
program
.
global_block
().
_remove_op
(
idx
)
main_
program
.
desc
.
flush
()
program
.
desc
.
flush
()
in_out_pairs
=
zip
(
prefetch_op_tuples
[
1
],
prefetch_op_tuples
[
2
])
in_out_pairs
=
zip
(
prefetch_op_tuples
[
1
],
prefetch_op_tuples
[
2
])
for
in_out_pair
in
in_out_pairs
:
for
in_out_pair
in
in_out_pairs
:
idx
=
split_ids_id
idx
=
split_ids_id
ids
=
main_
program
.
global_block
().
vars
[
in_out_pair
[
0
]]
ids
=
program
.
global_block
().
vars
[
in_out_pair
[
0
]]
out
=
main_
program
.
global_block
().
vars
[
in_out_pair
[
1
]]
out
=
program
.
global_block
().
vars
[
in_out_pair
[
1
]]
__insert_lookup_sparse_table_op
(
main_
program
,
idx
,
ids
,
param_var
,
out
)
__insert_lookup_sparse_table_op
(
program
,
idx
,
ids
,
param_var
,
out
)
main_
program
.
desc
.
flush
()
program
.
desc
.
flush
()
return
main_
program
return
program
def
load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_var
):
def
_load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_vars
):
def
_is_checkpoint_var
(
exclude_fluid_vars
=
None
):
def
_is_checkpoint_var
(
exclude_fluid_vars
=
None
):
"""
"""
the checkpoint will not save or load all the variables.
the checkpoint will not save or load all the variables.
...
@@ -159,7 +166,81 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
...
@@ -159,7 +166,81 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
return
is_valid
return
is_valid
def
_load_lookup_table_vars
(
executor
,
dirname
,
main_program
,
io
.
load_vars
(
executor
,
dirname
=
dirname
,
main_program
=
program
,
predicate
=
_is_checkpoint_var
(
lookup_table_vars
),
filename
=
None
)
def
load_persistables_for_increment
(
dirname
,
executor
,
program
,
lookup_table_var
,
lookup_table_var_path
):
"""
WARNING: this function will only be used for distributed training with distributed lookup table.
for increment trainning, the pserver will not only load dense variables,
but also load the suitable lookup table var. Because of slice lookup table
var with HASH, we must load the correct slice var.
:param dirname(str): The directory path
:param executor(Executor): The executor to run for loading inference model.
:param program(Program): The parameter server program, which will run on Pserver.
:param lookup_table_var: the distributed lookup tables var name.
:param lookup_table_var_path: the the distributed lookup tables var location.
:return: None
"""
def
__load_lookup_table_vars
(
executor
,
main_program
,
lookup_table_var
,
lookup_table_var_path
):
emb_var
=
main_program
.
global_block
().
var
(
lookup_table_var
)
load_program
=
Program
()
load_block
=
load_program
.
global_block
()
load_block
.
append_op
(
type
=
'load'
,
inputs
=
{},
outputs
=
{
'Out'
:
[
emb_var
]},
attrs
=
{
'file_path'
:
lookup_table_var_path
})
executor
.
run
(
load_program
)
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
if
not
os
.
path
.
exists
(
lookup_table_var_path
):
raise
ValueError
(
"There is no file named '%s'"
,
lookup_table_var_path
)
if
not
isinstance
(
program
,
Program
):
raise
ValueError
(
"program must be an instance of fluid.Program"
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
_load_persistable_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var
])
__load_lookup_table_vars
(
executor
,
program
,
lookup_table_var
,
lookup_table_var_path
)
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
def
load_persistables_for_inference
(
dirname
,
executor
,
program
,
lookup_table_var_name
):
"""
WARNING: this function will only be used for inference with distributed lookup table.
Inference with distributed lookup table is a little funky, this function will load distributed
lookup table vars into sparse var, can be used in local inference mode.
:param dirname(str): The directory path
:param executor(Executor): The executor to run for loading inference model.
:param program(Program): The parameter server program, which will run on Pserver.
:param lookup_table_var_name: the distributed lookup tables var name.
:return: None
"""
def
__load_lookup_table_vars
(
executor
,
dirname
,
main_program
,
lookup_table_vars
):
lookup_table_vars
):
if
not
os
.
path
.
isdir
(
dirname
):
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
...
@@ -209,30 +290,13 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
...
@@ -209,30 +290,13 @@ def load_persistable_vars(executor, dirname, program, lookup_table_var):
global_block
.
append_op
(
type
=
'delete_var'
,
inputs
=
{
'X'
:
sums
})
global_block
.
append_op
(
type
=
'delete_var'
,
inputs
=
{
'X'
:
sums
})
executor
.
run
(
convert_program
)
executor
.
run
(
convert_program
)
_logger
.
info
(
"Start Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
lookup_table_vars
=
[
lookup_table_var
]
io
.
load_vars
(
executor
,
dirname
=
dirname
,
main_program
=
program
,
predicate
=
_is_checkpoint_var
(
lookup_table_vars
),
filename
=
None
)
_load_lookup_table_vars
(
executor
,
dirname
,
program
,
lookup_table_vars
)
_logger
.
info
(
"Finish Load Sparse Program With "
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
def
load_inference_model
(
dirname
,
executor
,
lookup_table_var_name
):
if
not
os
.
path
.
isdir
(
dirname
):
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
if
program
:
if
not
isinstance
(
program
,
Program
):
raise
ValueError
(
"program must be an instance of fluid.Program"
)
else
:
local_model
=
os
.
path
.
join
(
dirname
,
model_filename
)
local_model
=
os
.
path
.
join
(
dirname
,
model_filename
)
with
open
(
local_model
,
"rb"
)
as
f
:
with
open
(
local_model
,
"rb"
)
as
f
:
...
@@ -244,13 +308,16 @@ def load_inference_model(dirname, executor, lookup_table_var_name):
...
@@ -244,13 +308,16 @@ def load_inference_model(dirname, executor, lookup_table_var_name):
raise
ValueError
(
"Unsupported program version: %d
\n
"
%
raise
ValueError
(
"Unsupported program version: %d
\n
"
%
program
.
_version
())
program
.
_version
())
# Binary data also need version.
_logger
.
info
(
"Start Load Sparse Program With "
load_persistable_vars
(
executor
,
dirname
,
program
,
lookup_table_var_name
)
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
dirname
,
time
.
ctime
()))
_load_persistable_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var_name
])
__load_lookup_table_vars
(
executor
,
dirname
,
program
,
[
lookup_table_var_name
])
feed_target_names
=
program
.
desc
.
get_feed_target_names
()
_logger
.
info
(
"Finish Load Sparse Program With "
fetch_target_names
=
program
.
desc
.
get_fetch_target_names
()
"Distributed Lookup Table Vars from {}, time = {}"
.
format
(
fetch_targets
=
[
dirname
,
time
.
ctime
()))
program
.
global_block
().
var
(
name
)
for
name
in
fetch_target_names
]
return
[
program
,
feed_target_names
,
fetch_targets
]
return
program
python/paddle/fluid/framework.py
浏览文件 @
1c116462
...
@@ -16,6 +16,7 @@ from __future__ import print_function
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
collections
import
collections
import
contextlib
import
contextlib
import
os
import
re
import
re
import
six
import
six
import
sys
import
sys
...
@@ -27,6 +28,13 @@ from .proto import framework_pb2
...
@@ -27,6 +28,13 @@ from .proto import framework_pb2
try
:
try
:
from
.
import
core
from
.
import
core
except
ImportError
as
e
:
except
ImportError
as
e
:
if
os
.
name
==
'nt'
:
raise
ImportError
(
"""NOTE: You may need to run
\"
set PATH=c:\python27\lib:%PATH%
\"
if you encounters
\"
mkldnn.dll not found
\"
errors. If you have python
installed in other directory, replace
\"
c:\python27\lib" with your own
directory. The original error is:
\n
"""
+
cpt
.
get_exception_message
(
e
))
else
:
raise
ImportError
(
raise
ImportError
(
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
"""NOTE: You may need to run
\"
export LD_LIBRARY_PATH=/usr/local/lib:$LD_LIBRARY_PATH
\"
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
if you encounters
\"
libmkldnn.so not found
\"
errors. If you have python
...
@@ -571,8 +579,8 @@ class Operator(object):
...
@@ -571,8 +579,8 @@ class Operator(object):
OP_WITHOUT_KERNEL_SET
=
{
OP_WITHOUT_KERNEL_SET
=
{
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'rnn_memory_helper_grad'
,
'conditional_block'
,
'while'
,
'send'
,
'recv'
,
'listen_and_serv'
,
'
parallel_do'
,
'save_combine'
,
'load_combine
'
,
'listen_and_serv'
,
'
save_combine'
,
'load_combine'
,
'ncclInit'
,
'select
'
,
'
ncclInit'
,
'select'
,
'
checkpoint_notify'
,
'gen_nccl_id'
'checkpoint_notify'
,
'gen_nccl_id'
}
}
def
__init__
(
self
,
def
__init__
(
self
,
...
...
python/paddle/fluid/imperative/base.py
浏览文件 @
1c116462
...
@@ -28,7 +28,8 @@ def enabled():
...
@@ -28,7 +28,8 @@ def enabled():
def
guard
():
def
guard
():
train
=
framework
.
Program
()
train
=
framework
.
Program
()
startup
=
framework
.
Program
()
startup
=
framework
.
Program
()
tracer
=
core
.
Tracer
(
train
.
current_block
().
desc
)
tracer
=
core
.
Tracer
(
train
.
current_block
().
desc
,
startup
.
current_block
().
desc
)
with
framework
.
program_guard
(
train
,
startup
):
with
framework
.
program_guard
(
train
,
startup
):
with
framework
.
unique_name
.
guard
():
with
framework
.
unique_name
.
guard
():
with
framework
.
_imperative_guard
(
tracer
):
with
framework
.
_imperative_guard
(
tracer
):
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
1c116462
...
@@ -226,156 +226,6 @@ class BlockGuard(object):
...
@@ -226,156 +226,6 @@ class BlockGuard(object):
return
True
return
True
class
ParallelDo
(
object
):
"""
ParallelDo is used to represent multi-thread data parallel processing.
Its vanilla implementation can be shown as the following (:math:`|` means
single thread and :math:`||||` means multiple threads)
.. code-block:: text
In the forward pass
| Split input onto different devices
| Copy parameter onto different devices
|||| Compute forward pass in parallel
| Merge output from different devices
In the backward pass
| Split output@grad onto different devices
|||| Compute backward pass in parallel
| accumulate param@grad from different devices to the first device
| Merge input@grad from different devices
| Copy param@grad to the place of parallel_do_op
Examples:
.. code-block:: python
images = fluid.layers.data(name='pixel', shape=[1, 28, 28], dtype=DTYPE)
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
# ParallelDo version & Single-thread version
if thread_num > 1:
places = fluid.layers.get_places(thread_num)
pd = fluid.layers.control_flow.ParallelDo(places)
with pd.do():
images = pd.read_input(images)
label = pd.read_input(label)
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
pd.write_output(avg_cost)
avg_cost = pd()
avg_cost = fluid.layers.mean(avg_cost)
else:
predict = cnn_model(images)
cost = fluid.layers.cross_entropy(input=predict, label=label)
avg_cost = fluid.layers.mean(x=cost)
.. warning::
It will be soon deprecated, please use ParallelExecutor instead.
"""
def
__init__
(
self
,
places
,
use_nccl
=
False
,
name
=
None
):
warnings
.
warn
(
"API ParallelDo is deprecated since 0.15.0. Please use ParallelExecutor instead."
,
Warning
)
self
.
helper
=
LayerHelper
(
"parallel_do"
,
name
=
name
)
self
.
inputs
=
[]
self
.
places
=
places
self
.
outputs
=
[]
self
.
status
=
StaticRNN
.
BEFORE_RNN_BLOCK
self
.
use_nccl
=
use_nccl
def
do
(
self
):
return
BlockGuardWithCompletion
(
self
)
def
parent_block
(
self
):
prog
=
self
.
helper
.
main_program
parent_idx
=
prog
.
current_block
().
parent_idx
assert
parent_idx
>=
0
parent_block
=
prog
.
block
(
parent_idx
)
return
parent_block
def
__call__
(
self
,
*
args
,
**
kwargs
):
if
self
.
status
!=
StaticRNN
.
AFTER_RNN_BLOCK
:
raise
ValueError
(
"RNN output can only be retrieved after rnn block"
)
if
len
(
self
.
outputs
)
==
0
:
raise
ValueError
(
"RNN has no output"
)
elif
len
(
self
.
outputs
)
==
1
:
return
self
.
outputs
[
0
]
else
:
return
self
.
outputs
def
read_input
(
self
,
var
):
self
.
inputs
.
append
(
var
)
return
var
def
write_output
(
self
,
var
):
self
.
outputs
.
append
(
var
)
def
get_parameters
(
self
):
main_program
=
self
.
helper
.
main_program
current_block
=
main_program
.
current_block
()
parent_block
=
self
.
parent_block
()
local_inputs
=
set
()
params
=
list
()
for
var
in
self
.
inputs
:
local_inputs
.
add
(
var
.
name
)
for
op
in
current_block
.
ops
:
for
iname
in
op
.
input_names
:
for
in_var_name
in
op
.
input
(
iname
):
if
in_var_name
not
in
local_inputs
:
params
.
append
(
in_var_name
)
for
oname
in
op
.
output_names
:
for
out_var_name
in
op
.
output
(
oname
):
local_inputs
.
add
(
out_var_name
)
params
=
list
(
set
(
params
))
return
[
parent_block
.
var
(
name
)
for
name
in
params
]
def
_complete_op
(
self
):
main_program
=
self
.
helper
.
main_program
current_block
=
main_program
.
current_block
()
parent_block
=
self
.
parent_block
()
step_scope
=
parent_block
.
create_var
(
type
=
core
.
VarDesc
.
VarType
.
STEP_SCOPES
)
self
.
outputs
=
[
parent_block
.
create_var
(
name
=
o
.
name
,
shape
=
o
.
shape
,
dtype
=
o
.
dtype
,
lod_level
=
o
.
lod_level
,
persistable
=
o
.
persistable
,
stop_gradient
=
o
.
stop_gradient
)
for
o
in
self
.
outputs
]
inputs
=
[
parent_block
.
var
(
i
.
name
)
for
i
in
self
.
inputs
]
outputs
=
[
parent_block
.
var
(
o
.
name
)
for
o
in
self
.
outputs
]
parent_block
.
append_op
(
type
=
'parallel_do'
,
inputs
=
{
'inputs'
:
inputs
,
'parameters'
:
self
.
get_parameters
(),
'places'
:
self
.
places
},
outputs
=
{
'outputs'
:
outputs
,
'parallel_scopes'
:
[
step_scope
]},
attrs
=
{
'sub_block'
:
current_block
,
'use_nccl'
:
self
.
use_nccl
})
class
BlockGuardWithCompletion
(
BlockGuard
):
class
BlockGuardWithCompletion
(
BlockGuard
):
"""
"""
BlockGuardWithCompletion class.
BlockGuardWithCompletion class.
...
@@ -384,9 +234,8 @@ class BlockGuardWithCompletion(BlockGuard):
...
@@ -384,9 +234,8 @@ class BlockGuardWithCompletion(BlockGuard):
"""
"""
def
__init__
(
self
,
rnn
):
def
__init__
(
self
,
rnn
):
if
not
(
isinstance
(
rnn
,
StaticRNN
)
or
isinstance
(
rnn
,
ParallelDo
)):
if
not
isinstance
(
rnn
,
StaticRNN
):
raise
TypeError
(
raise
TypeError
(
"BlockGuardWithCompletion takes a StaticRNN"
)
"BlockGuardWithCompletion takes a StaticRNN or ParallelDo"
)
super
(
BlockGuardWithCompletion
,
self
).
__init__
(
rnn
.
helper
.
main_program
)
super
(
BlockGuardWithCompletion
,
self
).
__init__
(
rnn
.
helper
.
main_program
)
self
.
rnn
=
rnn
self
.
rnn
=
rnn
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
1c116462
...
@@ -29,6 +29,7 @@ from . import utils
...
@@ -29,6 +29,7 @@ from . import utils
from
..
import
unique_name
from
..
import
unique_name
from
functools
import
reduce
from
functools
import
reduce
from
..
import
core
from
..
import
core
from
..imperative
import
layers
__all__
=
[
__all__
=
[
'fc'
,
'fc'
,
...
@@ -9426,3 +9427,47 @@ def huber_loss(input, label, delta):
...
@@ -9426,3 +9427,47 @@ def huber_loss(input, label, delta):
'Residual'
:
residual
},
'Residual'
:
residual
},
attrs
=
{
'delta'
:
delta
})
attrs
=
{
'delta'
:
delta
})
return
out
return
out
class
FC
(
layers
.
PyLayer
):
def
__init__
(
self
,
size
,
param_attr
=
None
,
num_flatten_dims
=
1
,
dtype
=
core
.
VarDesc
.
VarType
.
FP32
):
super
(
FC
,
self
).
__init__
()
self
.
_size
=
size
self
.
_num_flatten_dims
=
num_flatten_dims
self
.
_dtype
=
dtype
self
.
_helper
=
LayerHelper
(
'FC'
,
param_attr
=
param_attr
)
def
_build_once
(
self
,
inputs
):
input_shape
=
inputs
[
0
].
shape
param_shape
=
[
reduce
(
lambda
a
,
b
:
a
*
b
,
input_shape
[
self
.
_num_flatten_dims
:],
1
)
]
+
[
self
.
_size
]
self
.
_w
=
self
.
_helper
.
create_parameter
(
attr
=
self
.
_helper
.
param_attr
,
shape
=
param_shape
,
dtype
=
self
.
_dtype
,
is_bias
=
False
)
def
forward
(
self
,
inputs
):
tmp
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
self
.
_helper
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
inputs
[
0
],
"Y"
:
self
.
_w
},
outputs
=
{
"Out"
:
tmp
},
attrs
=
{
"x_num_col_dims"
:
self
.
_num_flatten_dims
,
"y_num_col_dims"
:
1
})
out
=
self
.
_helper
.
create_variable_for_type_inference
(
self
.
_dtype
)
self
.
_helper
.
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
[
tmp
]},
outputs
=
{
"Out"
:
out
},
attrs
=
{
"use_mkldnn"
:
False
})
return
out
python/paddle/fluid/optimizer.py
浏览文件 @
1c116462
...
@@ -641,9 +641,14 @@ class AdamOptimizer(Optimizer):
...
@@ -641,9 +641,14 @@ class AdamOptimizer(Optimizer):
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
epsilon (float): a small float value for numerical stability.
epsilon (float): a small float value for numerical stability.
regularization: A Regularizer, such as
regularization: A Regularizer, such as fluid.regularizer.L2DecayRegularizer.
fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
name: A optional name prefix.
lazy_mode(bool: false): The official Adam algorithm has two moving-average accumulators
the accumulators are updated at every step. Every element of the two moving-average is updated
in both dense mode and sparse mode. If the size of parameter is very large, then the update
may be very slow. The lazy mode only update the element that has gradient is the current
mini-batch, so it will be much more faster. But this mode has different semantics with the
original Adam algorithm and may lead to different result.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -663,7 +668,8 @@ class AdamOptimizer(Optimizer):
...
@@ -663,7 +668,8 @@ class AdamOptimizer(Optimizer):
beta2
=
0.999
,
beta2
=
0.999
,
epsilon
=
1e-8
,
epsilon
=
1e-8
,
regularization
=
None
,
regularization
=
None
,
name
=
None
):
name
=
None
,
lazy_mode
=
False
):
assert
learning_rate
is
not
None
assert
learning_rate
is
not
None
assert
beta1
is
not
None
assert
beta1
is
not
None
assert
beta2
is
not
None
assert
beta2
is
not
None
...
@@ -676,6 +682,7 @@ class AdamOptimizer(Optimizer):
...
@@ -676,6 +682,7 @@ class AdamOptimizer(Optimizer):
self
.
_beta1
=
beta1
self
.
_beta1
=
beta1
self
.
_beta2
=
beta2
self
.
_beta2
=
beta2
self
.
_epsilon
=
epsilon
self
.
_epsilon
=
epsilon
self
.
_lazy_mode
=
lazy_mode
def
_create_accumulators
(
self
,
block
,
parameters
):
def
_create_accumulators
(
self
,
block
,
parameters
):
assert
isinstance
(
block
,
framework
.
Block
)
assert
isinstance
(
block
,
framework
.
Block
)
...
@@ -729,7 +736,8 @@ class AdamOptimizer(Optimizer):
...
@@ -729,7 +736,8 @@ class AdamOptimizer(Optimizer):
attrs
=
{
attrs
=
{
"beta1"
:
self
.
_beta1
,
"beta1"
:
self
.
_beta1
,
"beta2"
:
self
.
_beta2
,
"beta2"
:
self
.
_beta2
,
"epsilon"
:
self
.
_epsilon
"epsilon"
:
self
.
_epsilon
,
"lazy_mode"
:
self
.
_lazy_mode
})
})
return
adam_op
return
adam_op
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
1c116462
...
@@ -92,35 +92,27 @@ class ParallelExecutor(object):
...
@@ -92,35 +92,27 @@ class ParallelExecutor(object):
num_trainers
=
1
,
num_trainers
=
1
,
trainer_id
=
0
,
trainer_id
=
0
,
scope
=
None
):
scope
=
None
):
# step1: get places, the places are used in run too.
self
.
_places
=
[]
self
.
_places
=
[]
self
.
_act_places
=
[]
if
use_cuda
:
if
use_cuda
:
gpus
=
[]
gpus_env
=
os
.
getenv
(
"FLAGS_selected_gpus"
)
gpus_env
=
os
.
getenv
(
"FLAGS_selected_gpus"
)
if
gpus_env
:
if
gpus_env
:
gpus
=
[
int
(
s
)
for
s
in
gpus_env
.
split
(
","
)]
gpus
=
[
int
(
s
)
for
s
in
gpus_env
.
split
(
","
)]
else
:
else
:
for
i
in
six
.
moves
.
range
(
core
.
get_cuda_device_count
()):
gpus
=
[
gpus
.
append
(
i
)
i
for
i
in
six
.
moves
.
range
(
core
.
get_cuda_device_count
())
for
i
in
gpus
:
]
p
=
core
.
Place
()
self
.
_places
=
[
core
.
CUDAPlace
(
i
)
for
i
in
gpus
]
self
.
_act_places
.
append
(
core
.
CUDAPlace
(
i
))
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
else
:
else
:
cpu_num
=
int
(
cpu_num
=
int
(
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
for
i
in
six
.
moves
.
range
(
cpu_num
):
self
.
_places
=
[
core
.
CPUPlace
()
for
_
in
six
.
moves
.
range
(
cpu_num
)]
p
=
core
.
Place
()
self
.
_act_places
.
append
(
core
.
CPUPlace
())
p
.
set_place
(
self
.
_act_places
[
-
1
])
self
.
_places
.
append
(
p
)
assert
self
.
_places
,
"no place for execution"
assert
self
.
_places
,
"no place for execution"
# step2: init exec_strategy
if
exec_strategy
is
None
:
if
exec_strategy
is
None
:
exec_strategy
=
ExecutionStrategy
()
exec_strategy
=
ExecutionStrategy
()
exec_strategy
.
use_cuda
=
use_cuda
exec_strategy
.
use_cuda
=
use_cuda
if
exec_strategy
.
num_threads
==
0
:
if
exec_strategy
.
num_threads
==
0
:
if
use_cuda
:
if
use_cuda
:
# Experiments on se-resnext shows that too many threads hurt
# Experiments on se-resnext shows that too many threads hurt
...
@@ -131,49 +123,54 @@ class ParallelExecutor(object):
...
@@ -131,49 +123,54 @@ class ParallelExecutor(object):
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
os
.
environ
.
get
(
'CPU_NUM'
,
multiprocessing
.
cpu_count
()))
exec_strategy
.
num_threads
=
cpu_num
*
2
exec_strategy
.
num_threads
=
cpu_num
*
2
# step3: init build_strategy
if
build_strategy
is
None
:
if
build_strategy
is
None
:
build_strategy
=
BuildStrategy
()
build_strategy
=
BuildStrategy
()
build_strategy
.
num_trainers
=
num_trainers
build_strategy
.
num_trainers
=
num_trainers
build_strategy
.
trainer_id
=
trainer_id
build_strategy
.
trainer_id
=
trainer_id
main
=
main_program
# step4: get main_program, scope, local_scopes
main
=
main
if
main
else
framework
.
default_main_program
()
main
=
main_program
if
main_program
\
else
framework
.
default_main_program
()
scope
=
scope
if
scope
is
not
None
else
executor
.
global_scope
()
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
()
\
if
share_vars_from
else
[]
# step5: check trainers_endpoints, it is used for distribution.
trainers_endpoints
=
main
.
_trainers_endpoints
trainers_endpoints
=
main
.
_trainers_endpoints
if
num_trainers
>
1
and
trainers_endpoints
:
if
num_trainers
>
1
and
trainers_endpoints
:
assert
num_trainers
==
len
(
assert
num_trainers
==
len
(
trainers_endpoints
),
"num_trainers == len(end_points)"
trainers_endpoints
),
"num_trainers == len(end_points)"
build_strategy
.
trainers_endpoints
=
trainers_endpoints
build_strategy
.
trainers_endpoints
=
trainers_endpoints
if
scope
==
None
:
# step5: get persistable_vars, parameter_vars, places. persistable_vars
scope
=
executor
.
global_scope
()
# need be broadcast to other local_scope.
persistable_vars
=
set
([
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
cpt
.
to_text
(
v
.
name
)
for
v
in
[
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
(
)
if
share_vars_from
else
[]
self
.
persistable_vars
=
[
v
.
name
for
v
in
[
var
for
var
in
main
.
list_vars
()
var
for
var
in
main
.
list_vars
()
if
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
if
var
.
persistable
and
var
.
type
!=
core
.
VarDesc
.
VarType
.
RAW
]
]
]
])
def
place_obj
(
place
):
p
=
core
.
Place
()
p
.
set_place
(
place
)
return
p
places
=
list
(
map
(
place_obj
,
self
.
_places
))
# step6: init ParallelExecutor
self
.
executor
=
core
.
ParallelExecutor
(
self
.
executor
=
core
.
ParallelExecutor
(
self
.
_places
,
places
,
persistable_vars
,
main
.
desc
,
set
([
cpt
.
to_text
(
p
.
name
)
for
p
in
main
.
global_block
().
iter_parameters
()
if
not
p
.
stop_gradient
]),
set
(
cpt
.
to_text
(
var
)
for
var
in
self
.
persistable_vars
),
main
.
desc
,
cpt
.
to_text
(
loss_name
)
cpt
.
to_text
(
loss_name
)
if
loss_name
else
six
.
u
(
''
),
scope
,
local_scopes
,
exec_strategy
,
if
loss_name
else
six
.
u
(
''
),
scope
,
local_scopes
,
exec_strategy
,
build_strategy
,
num_trainers
,
trainer_id
)
build_strategy
,
num_trainers
,
trainer_id
)
self
.
scope
=
scope
self
.
scope
=
scope
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
def
run
(
self
,
fetch_list
,
feed
=
None
,
feed_dict
=
None
,
return_numpy
=
True
):
...
@@ -261,7 +258,7 @@ class ParallelExecutor(object):
...
@@ -261,7 +258,7 @@ class ParallelExecutor(object):
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
self
.
executor
.
feed_and_split_tensor_into_local_scopes
(
feed_tensor_dict
)
feed_tensor_dict
)
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
elif
isinstance
(
feed
,
list
)
or
isinstance
(
feed
,
tuple
):
if
len
(
feed
)
!=
len
(
self
.
_
act_
places
):
if
len
(
feed
)
!=
len
(
self
.
_places
):
raise
ValueError
(
raise
ValueError
(
"Feed a list of tensor, the list should be the same size as places"
"Feed a list of tensor, the list should be the same size as places"
)
)
...
@@ -277,7 +274,7 @@ class ParallelExecutor(object):
...
@@ -277,7 +274,7 @@ class ParallelExecutor(object):
tensor
=
each
[
feed_name
]
tensor
=
each
[
feed_name
]
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
if
not
isinstance
(
tensor
,
core
.
LoDTensor
):
tmp
=
core
.
LoDTensor
()
tmp
=
core
.
LoDTensor
()
tmp
.
set
(
tensor
,
self
.
_
act_
places
[
i
])
tmp
.
set
(
tensor
,
self
.
_places
[
i
])
tensor
=
tmp
tensor
=
tmp
res_dict
[
feed_name
]
=
tensor
res_dict
[
feed_name
]
=
tensor
res
.
append
(
res_dict
)
res
.
append
(
res_dict
)
...
@@ -294,4 +291,4 @@ class ParallelExecutor(object):
...
@@ -294,4 +291,4 @@ class ParallelExecutor(object):
@
property
@
property
def
device_count
(
self
):
def
device_count
(
self
):
return
len
(
self
.
_
act_
places
)
return
len
(
self
.
_places
)
python/paddle/fluid/tests/book/notest_understand_sentiment.py
浏览文件 @
1c116462
...
@@ -15,7 +15,6 @@
...
@@ -15,7 +15,6 @@
from
__future__
import
print_function
from
__future__
import
print_function
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.control_flow
import
ParallelDo
import
unittest
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle
import
paddle
...
@@ -147,22 +146,7 @@ def train(word_dict,
...
@@ -147,22 +146,7 @@ def train(word_dict,
cost
,
acc_out
,
prediction
=
net_method
(
cost
,
acc_out
,
prediction
=
net_method
(
data
,
label
,
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
data
,
label
,
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
else
:
else
:
places
=
get_places
()
raise
NotImplementedError
()
pd
=
ParallelDo
(
places
)
with
pd
.
do
():
cost
,
acc
,
_
=
net_method
(
pd
.
read_input
(
data
),
pd
.
read_input
(
label
),
input_dim
=
dict_dim
,
class_dim
=
class_dim
)
pd
.
write_output
(
cost
)
pd
.
write_output
(
acc
)
cost
,
acc
=
pd
()
cost
=
fluid
.
layers
.
mean
(
cost
)
acc_out
=
fluid
.
layers
.
mean
(
acc
)
prediction
=
None
assert
save_dirname
is
None
adagrad
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.002
)
adagrad
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
0.002
)
adagrad
.
minimize
(
cost
)
adagrad
.
minimize
(
cost
)
...
...
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
1c116462
...
@@ -25,7 +25,6 @@ import numpy
...
@@ -25,7 +25,6 @@ import numpy
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.control_flow
import
ParallelDo
BATCH_SIZE
=
64
BATCH_SIZE
=
64
...
@@ -82,19 +81,7 @@ def train(nn_type,
...
@@ -82,19 +81,7 @@ def train(nn_type,
net_conf
=
conv_net
net_conf
=
conv_net
if
parallel
:
if
parallel
:
places
=
get_places
()
raise
NotImplementedError
()
pd
=
ParallelDo
(
places
)
with
pd
.
do
():
img_
=
pd
.
read_input
(
img
)
label_
=
pd
.
read_input
(
label
)
prediction
,
avg_loss
,
acc
=
net_conf
(
img_
,
label_
)
for
o
in
[
avg_loss
,
acc
]:
pd
.
write_output
(
o
)
avg_loss
,
acc
=
pd
()
# get mean loss and acc through every devices.
avg_loss
=
fluid
.
layers
.
mean
(
avg_loss
)
acc
=
fluid
.
layers
.
mean
(
acc
)
else
:
else
:
prediction
,
avg_loss
,
acc
=
net_conf
(
img
,
label
)
prediction
,
avg_loss
,
acc
=
net_conf
(
img
,
label
)
...
@@ -273,7 +260,7 @@ def inject_all_tests():
...
@@ -273,7 +260,7 @@ def inject_all_tests():
for
use_cuda
in
(
False
,
True
):
for
use_cuda
in
(
False
,
True
):
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
if
use_cuda
and
not
core
.
is_compiled_with_cuda
():
continue
continue
for
parallel
in
(
False
,
True
):
for
parallel
in
(
False
,
):
for
nn_type
in
(
'mlp'
,
'conv'
):
for
nn_type
in
(
'mlp'
,
'conv'
):
inject_test_method
(
use_cuda
,
parallel
,
nn_type
,
True
)
inject_test_method
(
use_cuda
,
parallel
,
nn_type
,
True
)
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
1c116462
...
@@ -17,7 +17,6 @@ from __future__ import print_function
...
@@ -17,7 +17,6 @@ from __future__ import print_function
import
paddle
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.control_flow
import
ParallelDo
import
unittest
import
unittest
import
os
import
os
import
numpy
as
np
import
numpy
as
np
...
@@ -84,18 +83,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
...
@@ -84,18 +83,7 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
avg_cost
,
predict_word
=
__network__
(
avg_cost
,
predict_word
=
__network__
(
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
])
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
])
else
:
else
:
places
=
get_places
()
raise
NotImplementedError
()
pd
=
ParallelDo
(
places
)
with
pd
.
do
():
avg_cost
,
predict_word
=
__network__
(
list
(
map
(
pd
.
read_input
,
[
first_word
,
second_word
,
third_word
,
forth_word
,
next_word
])))
pd
.
write_output
(
avg_cost
)
avg_cost
=
fluid
.
layers
.
mean
(
pd
())
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.001
)
sgd_optimizer
.
minimize
(
avg_cost
)
sgd_optimizer
.
minimize
(
avg_cost
)
...
@@ -262,7 +250,7 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
...
@@ -262,7 +250,7 @@ def inject_test_method(use_cuda, is_sparse, is_parallel):
for
use_cuda
in
(
False
,
True
):
for
use_cuda
in
(
False
,
True
):
for
is_sparse
in
(
False
,
True
):
for
is_sparse
in
(
False
,
True
):
for
is_parallel
in
(
False
,
True
):
for
is_parallel
in
(
False
,
):
inject_test_method
(
use_cuda
,
is_sparse
,
is_parallel
)
inject_test_method
(
use_cuda
,
is_sparse
,
is_parallel
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
python/paddle/fluid/tests/book_memory_optimization/test_memopt_fit_a_line.py
已删除
100644 → 0
浏览文件 @
29697c2e
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
math
import
sys
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.control_flow
import
ParallelDo
# need to fix random seed and training data to compare the loss
# value accurately calculated by the default and the memory optimization
# version.
fluid
.
default_startup_program
().
random_seed
=
111
x
=
fluid
.
layers
.
data
(
name
=
'x'
,
shape
=
[
13
],
dtype
=
'float32'
)
y
=
fluid
.
layers
.
data
(
name
=
'y'
,
shape
=
[
1
],
dtype
=
'float32'
)
device_type
=
'CPU'
use_nccl
=
False
place
=
fluid
.
CPUPlace
()
if
fluid
.
core
.
is_compiled_with_cuda
():
device_type
=
'CUDA'
use_nccl
=
False
place
=
fluid
.
CUDAPlace
(
0
)
places
=
get_places
(
device_count
=
0
,
device_type
=
device_type
)
pd
=
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
with
pd
.
do
():
x_
=
pd
.
read_input
(
x
)
y_
=
pd
.
read_input
(
y
)
y_predict
=
fluid
.
layers
.
fc
(
input
=
x_
,
size
=
1
,
act
=
None
)
cost
=
fluid
.
layers
.
square_error_cost
(
input
=
y_predict
,
label
=
y_
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
pd
.
write_output
(
avg_cost
)
cost
=
pd
()
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
sgd_optimizer
=
fluid
.
optimizer
.
SGD
(
learning_rate
=
0.01
)
sgd_optimizer
.
minimize
(
avg_cost
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
(),
print_log
=
True
)
# fluid.release_memory(fluid.default_main_program())
BATCH_SIZE
=
200
# fix the order of training data
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
uci_housing
.
train
(),
batch_size
=
BATCH_SIZE
,
drop_last
=
False
)
# train_reader = paddle.batch(
# paddle.reader.shuffle(
# paddle.dataset.uci_housing.train(), buf_size=500),
# batch_size=BATCH_SIZE)
feeder
=
fluid
.
DataFeeder
(
place
=
place
,
feed_list
=
[
x
,
y
])
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
PASS_NUM
=
100
for
pass_id
in
range
(
PASS_NUM
):
for
data
in
train_reader
():
avg_loss_value
,
=
exe
.
run
(
fluid
.
default_main_program
(),
feed
=
feeder
.
feed
(
data
),
fetch_list
=
[
avg_cost
])
if
avg_loss_value
[
0
]
<
10.0
:
exit
(
0
)
# if avg cost less than 10.0, we think our code is good.
print
(
avg_loss_value
[
0
])
if
math
.
isnan
(
float
(
avg_loss_value
)):
sys
.
exit
(
"got NaN loss, training failed."
)
exit
(
1
)
python/paddle/fluid/tests/unittests/ngraph/test_activation_ngraph_op.py
0 → 100644
浏览文件 @
1c116462
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
numpy
as
np
import
paddle.fluid.core
as
core
from
paddle.fluid.tests.unittests.op_test
import
OpTest
from
paddle.fluid.tests.unittests.test_activation_op
import
TestRelu
,
TestTanh
class
TestNGRAPHReluDim2
(
TestRelu
):
def
setUp
(
self
):
super
(
TestNGRAPHReluDim2
,
self
).
setUp
()
class
TestNGRAPHTanhDim2
(
TestTanh
):
def
setUp
(
self
):
super
(
TestNGRAPHTanhDim2
,
self
).
setUp
()
class
TestNGRAPHReluDim4
(
TestRelu
):
def
setUp
(
self
):
super
(
TestNGRAPHReluDim4
,
self
).
setUp
()
x
=
np
.
random
.
uniform
(
-
1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
# The same reason with TestAbs
x
[
np
.
abs
(
x
)
<
0.005
]
=
0.02
out
=
np
.
maximum
(
x
,
0
)
self
.
inputs
=
{
'X'
:
OpTest
.
np_dtype_to_fluid_dtype
(
x
)}
self
.
outputs
=
{
'Out'
:
out
}
class
TestNGRAPHTanhDim4
(
TestTanh
):
def
setUp
(
self
):
super
(
TestNGRAPHTanhDim4
,
self
).
setUp
()
self
.
inputs
=
{
'X'
:
np
.
random
.
uniform
(
0.1
,
1
,
[
2
,
4
,
3
,
5
]).
astype
(
"float32"
)
}
self
.
outputs
=
{
'Out'
:
np
.
tanh
(
self
.
inputs
[
'X'
])}
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/ngraph/test_mul_ngraph_op.py
0 → 100644
浏览文件 @
1c116462
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
paddle.fluid.tests.unittests.test_mul_op
import
TestMulOp
,
TestMulOp2
,
TestFP16MulOp1
,
TestFP16MulOp2
class
TestNGRAPHMulOp
(
TestMulOp
):
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHMulOp2
(
TestMulOp2
):
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHFP16MulOp1
(
TestFP16MulOp1
):
def
init_dtype_type
(
self
):
pass
class
TestNGRAPHFP16MulOp2
(
TestFP16MulOp2
):
def
init_dtype_type
(
self
):
pass
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_adam_op.py
浏览文件 @
1c116462
...
@@ -194,7 +194,8 @@ def adam_step(inputs, attributes):
...
@@ -194,7 +194,8 @@ def adam_step(inputs, attributes):
return
param_out
,
moment1_out
,
moment2_out
return
param_out
,
moment1_out
,
moment2_out
def
adam_step_sparse
(
inputs
,
attributes
,
height
,
rows
,
row_numel
,
np_grad
):
def
adam_step_sparse
(
inputs
,
attributes
,
height
,
rows
,
row_numel
,
np_grad
,
lazy_mode
):
'''
'''
Simulate one step of the adam optimizer
Simulate one step of the adam optimizer
:param inputs: dict of inputs
:param inputs: dict of inputs
...
@@ -218,19 +219,30 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad):
...
@@ -218,19 +219,30 @@ def adam_step_sparse(inputs, attributes, height, rows, row_numel, np_grad):
moment2_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
moment2_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
param_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
param_out
=
np
.
zeros
(
shape
=
[
height
,
row_numel
])
for
idx
,
row_id
in
enumerate
(
rows
):
def
update_row
(
row_id
,
update_value
):
moment1_out
[
row_id
]
=
beta1
*
moment1
[
row_id
]
+
(
1
-
beta1
moment1_out
[
row_id
]
=
beta1
*
moment1
[
row_id
]
+
(
1
-
beta1
)
*
np_grad
[
idx
]
)
*
update_value
moment2_out
[
row_id
]
=
beta2
*
moment2
[
row_id
]
+
(
moment2_out
[
row_id
]
=
beta2
*
moment2
[
row_id
]
+
(
1
-
beta2
)
*
np
.
square
(
np_grad
[
idx
]
)
1
-
beta2
)
*
np
.
square
(
update_value
)
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
)
lr_t
=
lr
*
np
.
sqrt
(
1
-
beta2_pow
)
/
(
1
-
beta1_pow
)
param_out
[
row_id
]
=
param
[
row_id
]
-
lr_t
*
(
moment1_out
[
row_id
]
/
(
param_out
[
row_id
]
=
param
[
row_id
]
-
lr_t
*
(
moment1_out
[
row_id
]
/
(
np
.
sqrt
(
moment2_out
[
row_id
])
+
epsilon
))
np
.
sqrt
(
moment2_out
[
row_id
])
+
epsilon
))
if
lazy_mode
:
for
idx
,
row_id
in
enumerate
(
rows
):
update_row
(
row_id
,
np_grad
[
idx
])
else
:
for
row_id
in
range
(
param_out
.
shape
[
0
]):
update_value
=
np
.
zeros
(
np_grad
[
0
].
shape
).
astype
(
"float32"
)
if
row_id
in
rows
:
update_value
=
np_grad
[
rows
.
index
(
row_id
)]
update_row
(
row_id
,
update_value
)
return
param_out
,
moment1_out
,
moment2_out
return
param_out
,
moment1_out
,
moment2_out
class
TestSparseAdamOp
(
unittest
.
TestCase
):
class
TestSparseAdamOp
(
unittest
.
TestCase
):
def
setup
(
self
,
scope
,
place
):
def
setup
(
self
,
scope
,
place
,
lazy_mode
):
beta1
=
0.78
beta1
=
0.78
beta2
=
0.836
beta2
=
0.836
epsilon
=
1e-4
epsilon
=
1e-4
...
@@ -248,6 +260,7 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -248,6 +260,7 @@ class TestSparseAdamOp(unittest.TestCase):
'Beta2Pow'
:
np
.
array
([
beta2
**
10
]).
astype
(
"float32"
),
'Beta2Pow'
:
np
.
array
([
beta2
**
10
]).
astype
(
"float32"
),
"LearningRate"
:
np
.
full
((
1
),
2.0
).
astype
(
"float32"
)
"LearningRate"
:
np
.
full
((
1
),
2.0
).
astype
(
"float32"
)
}
}
self
.
init_output
=
np
.
full
((
height
,
row_numel
),
0.0
).
astype
(
"float32"
)
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
self
.
attrs
=
{
'epsilon'
:
epsilon
,
'beta1'
:
beta1
,
'beta2'
:
beta2
}
grad_selected_rows
=
scope
.
var
(
'Grad'
).
get_selected_rows
()
grad_selected_rows
=
scope
.
var
(
'Grad'
).
get_selected_rows
()
...
@@ -262,19 +275,21 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -262,19 +275,21 @@ class TestSparseAdamOp(unittest.TestCase):
self
.
sparse_inputs
=
[
"Grad"
]
self
.
sparse_inputs
=
[
"Grad"
]
param_out
,
mom1
,
mom2
=
adam_step_sparse
(
param_out
,
mom1
,
mom2
=
adam_step_sparse
(
self
.
dense_inputs
,
self
.
attrs
,
self
.
dense_inputs
,
self
.
attrs
,
height
,
rows
,
row_numel
,
np_array
)
height
,
rows
,
row_numel
,
np_array
,
lazy_mode
)
self
.
outputs
=
{
self
.
outputs
=
{
"ParamOut"
:
param_out
,
"ParamOut"
:
param_out
,
"Moment1Out"
:
mom1
,
"Moment1Out"
:
mom1
,
"Moment2Out"
:
mom2
"Moment2Out"
:
mom2
}
}
def
check_with_place
(
self
,
place
):
def
check_with_place
(
self
,
place
,
lazy_mode
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
self
.
setup
(
scope
,
place
)
self
.
setup
(
scope
,
place
,
lazy_mode
)
op_args
=
dict
()
op_args
=
dict
()
op_args
[
'lazy_mode'
]
=
lazy_mode
for
key
,
np_array
in
self
.
dense_inputs
.
items
():
for
key
,
np_array
in
self
.
dense_inputs
.
items
():
var
=
scope
.
var
(
key
).
get_tensor
()
var
=
scope
.
var
(
key
).
get_tensor
()
var
.
set
(
np_array
,
place
)
var
.
set
(
np_array
,
place
)
...
@@ -283,7 +298,7 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -283,7 +298,7 @@ class TestSparseAdamOp(unittest.TestCase):
op_args
[
s
]
=
s
op_args
[
s
]
=
s
for
s
in
self
.
outputs
:
for
s
in
self
.
outputs
:
var
=
scope
.
var
(
s
).
get_tensor
()
var
=
scope
.
var
(
s
).
get_tensor
()
var
.
set
(
self
.
outputs
[
s
]
,
place
)
var
.
set
(
self
.
init_output
,
place
)
op_args
[
s
]
=
s
op_args
[
s
]
=
s
for
k
in
self
.
attrs
:
for
k
in
self
.
attrs
:
op_args
[
k
]
=
self
.
attrs
[
k
]
op_args
[
k
]
=
self
.
attrs
[
k
]
...
@@ -297,20 +312,17 @@ class TestSparseAdamOp(unittest.TestCase):
...
@@ -297,20 +312,17 @@ class TestSparseAdamOp(unittest.TestCase):
actual
=
np
.
array
(
out_var
)
actual
=
np
.
array
(
out_var
)
actual
=
actual
.
reshape
([
actual
.
size
])
actual
=
actual
.
reshape
([
actual
.
size
])
np_array
=
np_array
.
reshape
([
np_array
.
size
])
np_array
=
np_array
.
reshape
([
np_array
.
size
])
for
idx
,
row_id
in
enumerate
(
self
.
rows
):
j
=
0
for
i
in
range
(
np_array
.
size
):
while
j
<
self
.
row_numel
:
self
.
assertLess
((
actual
[
i
]
-
np_array
[
i
]),
0.00001
)
pos
=
row_id
*
self
.
row_numel
+
j
self
.
assertLess
((
actual
[
pos
]
-
np_array
[
pos
])
/
actual
[
pos
],
def
test_sparse_adam
(
self
):
0.00001
)
j
+=
1
def
test_sparse_sgd
(
self
):
places
=
[
core
.
CPUPlace
()]
places
=
[
core
.
CPUPlace
()]
if
core
.
is_compiled_with_cuda
():
if
core
.
is_compiled_with_cuda
():
places
.
append
(
core
.
CUDAPlace
(
0
))
places
.
append
(
core
.
CUDAPlace
(
0
))
for
place
in
places
:
for
place
in
places
:
self
.
check_with_place
(
place
)
for
lazy_mode
in
(
True
,
False
):
self
.
check_with_place
(
place
,
lazy_mode
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_conv2d_mkldnn_op.py
浏览文件 @
1c116462
...
@@ -16,7 +16,7 @@ from __future__ import print_function
...
@@ -16,7 +16,7 @@ from __future__ import print_function
import
unittest
import
unittest
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
from
test_conv2d_op
import
TestConv2dOp
,
TestWithPad
,
TestWithStride
,
TestWithGroup
,
TestWith1x1
,
TestWithInput1x1Filter1x1
class
TestMKLDNN
(
TestConv2dOp
):
class
TestMKLDNN
(
TestConv2dOp
):
...
@@ -37,5 +37,23 @@ class TestMKLDNNWithStride(TestWithStride):
...
@@ -37,5 +37,23 @@ class TestMKLDNNWithStride(TestWithStride):
self
.
data_format
=
"NCHW"
self
.
data_format
=
"NCHW"
class
TestMKLDNNWithGroup
(
TestWithGroup
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
class
TestMKLDNNWith1x1
(
TestWith1x1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
class
TestMKLDNNWithInput1x1Filter1x1
(
TestWithInput1x1Filter1x1
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
self
.
data_format
=
"NCHW"
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_get_tensor_from_selected_rows_op.py
浏览文件 @
1c116462
...
@@ -29,7 +29,7 @@ class TestGetTensorFromSelectedRows(unittest.TestCase):
...
@@ -29,7 +29,7 @@ class TestGetTensorFromSelectedRows(unittest.TestCase):
def
check_with_place
(
self
,
place
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
x_rows
=
[
0
,
5
,
5
,
4
,
20
]
x_rows
=
[
0
,
5
,
5
,
4
,
19
]
height
=
20
height
=
20
row_numel
=
2
row_numel
=
2
...
...
python/paddle/fluid/tests/unittests/test_imperative.py
0 → 100644
浏览文件 @
1c116462
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
contextlib
import
unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
from
paddle.fluid
import
core
from
paddle.fluid.layers.nn
import
FC
@
contextlib
.
contextmanager
def
new_program_scope
():
prog
=
fluid
.
Program
()
startup_prog
=
fluid
.
Program
()
scope
=
fluid
.
core
.
Scope
()
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
program_guard
(
prog
,
startup_prog
):
yield
class
MyLayer
(
fluid
.
imperative
.
PyLayer
):
def
__init__
(
self
):
super
(
MyLayer
,
self
).
__init__
()
def
forward
(
self
,
inputs
):
x
=
fluid
.
layers
.
relu
(
inputs
[
0
])
self
.
_x_for_debug
=
x
return
[
fluid
.
layers
.
elementwise_mul
(
x
,
x
)]
class
MLP
(
fluid
.
imperative
.
PyLayer
):
def
__init__
(
self
):
super
(
MLP
,
self
).
__init__
()
self
.
_fc1
=
FC
(
3
,
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
self
.
_fc2
=
FC
(
4
,
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.1
)))
def
forward
(
self
,
inputs
):
x
=
self
.
_fc1
(
inputs
[
0
])
x
=
self
.
_fc2
(
x
)
x
=
fluid
.
layers
.
reduce_sum
(
x
)
return
x
class
TestImperative
(
unittest
.
TestCase
):
def
test_layer
(
self
):
with
fluid
.
imperative
.
guard
():
cl
=
core
.
Layer
()
cl
.
forward
([])
l
=
fluid
.
imperative
.
PyLayer
()
l
.
forward
([])
def
test_layer_in_out
(
self
):
np_inp
=
np
.
array
([
1.0
,
2.0
,
-
1.0
],
dtype
=
np
.
float32
)
with
fluid
.
imperative
.
guard
():
l
=
MyLayer
()
x
=
l
(
np_inp
)[
0
]
self
.
assertIsNotNone
(
x
)
dy_out
=
x
.
_numpy
()
x
.
_backward
()
dy_grad
=
l
.
_x_for_debug
.
_gradient
()
with
new_program_scope
():
inp
=
fluid
.
layers
.
data
(
name
=
"inp"
,
shape
=
[
3
],
append_batch_size
=
False
)
l
=
MyLayer
()
x
=
l
(
inp
)[
0
]
param_grads
=
fluid
.
backward
.
append_backward
(
x
,
parameter_list
=
[
l
.
_x_for_debug
.
name
])[
0
]
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
static_out
,
static_grad
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
x
.
name
,
param_grads
[
1
].
name
])
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
def
test_mlp
(
self
):
np_inp
=
np
.
array
([[
1.0
,
2.0
],
[
3.0
,
4.0
]],
dtype
=
np
.
float32
)
with
fluid
.
imperative
.
guard
():
mlp
=
MLP
()
out
=
mlp
(
np_inp
)
dy_out
=
out
.
_numpy
()
out
.
_backward
()
dy_grad
=
mlp
.
_fc1
.
_w
.
_gradient
()
with
new_program_scope
():
inp
=
fluid
.
layers
.
data
(
name
=
"inp"
,
shape
=
[
2
,
2
],
append_batch_size
=
False
)
mlp
=
MLP
()
out
=
mlp
(
inp
)
param_grads
=
fluid
.
backward
.
append_backward
(
out
,
parameter_list
=
[
mlp
.
_fc1
.
_w
.
name
])[
0
]
exe
=
fluid
.
Executor
(
fluid
.
CPUPlace
())
exe
.
run
(
fluid
.
default_startup_program
())
static_out
,
static_grad
=
exe
.
run
(
feed
=
{
inp
.
name
:
np_inp
},
fetch_list
=
[
out
.
name
,
param_grads
[
1
].
name
])
self
.
assertTrue
(
np
.
allclose
(
dy_out
,
static_out
))
self
.
assertTrue
(
np
.
allclose
(
dy_grad
,
static_grad
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_merge_selectedrows_op.py
浏览文件 @
1c116462
...
@@ -29,8 +29,8 @@ class TestMergeSelectedRows(unittest.TestCase):
...
@@ -29,8 +29,8 @@ class TestMergeSelectedRows(unittest.TestCase):
def
check_with_place
(
self
,
place
):
def
check_with_place
(
self
,
place
):
scope
=
core
.
Scope
()
scope
=
core
.
Scope
()
x_rows
=
[
0
,
5
,
5
,
4
,
20
]
x_rows
=
[
0
,
5
,
5
,
4
,
19
]
out_rows
=
[
0
,
4
,
5
,
20
]
out_rows
=
[
0
,
4
,
5
,
19
]
height
=
20
height
=
20
row_numel
=
2
row_numel
=
2
...
...
python/paddle/fluid/tests/unittests/test_parallel_op.py
已删除
100644 → 0
浏览文件 @
29697c2e
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.device
import
get_places
from
paddle.fluid.layers.control_flow
import
ParallelDo
import
paddle.fluid.profiler
as
profiler
import
numpy
import
six
class
BaseParallelForTest
(
unittest
.
TestCase
):
def
run_test
(
self
,
callback
,
feed
,
fetch
):
"""
Run the unittest for parallel.for
Args:
callback(callable): A callable function returns a generator. There
are two yields in the generator function. The first yield
returns the data layers, and the second yield returns the loss.
The modified data variables will be sent back during the first
yield.
feed(dict): The executor feeding dictionary.
fetch(list|basestr): The fetch name lists.
Returns:
None
Raises:
AssertionError when the computation of cpu, parallel.for in cpu,
gpu, parallel.for in gpu are different.
"""
cpu
=
fluid
.
CPUPlace
()
result_cpu
=
self
.
_run_test_impl_
(
callback
=
callback
,
feed
=
feed
,
fetch
=
fetch
,
place
=
cpu
,
use_parallel
=
False
)
result_cpu_parallel
=
self
.
_run_test_impl_
(
callback
=
callback
,
feed
=
feed
,
fetch
=
fetch
,
place
=
cpu
,
use_parallel
=
True
)
if
fluid
.
core
.
is_compiled_with_cuda
():
gpu
=
fluid
.
CUDAPlace
(
0
)
result_gpu
=
self
.
_run_test_impl_
(
callback
=
callback
,
feed
=
feed
,
fetch
=
fetch
,
place
=
gpu
,
use_parallel
=
False
,
use_gpu
=
True
)
result_gpu_parallel
=
self
.
_run_test_impl_
(
callback
=
callback
,
feed
=
feed
,
fetch
=
fetch
,
place
=
gpu
,
use_parallel
=
True
,
use_gpu
=
True
)
result_gpu_nccl
=
self
.
_run_test_impl_
(
callback
=
callback
,
feed
=
feed
,
fetch
=
fetch
,
place
=
gpu
,
use_parallel
=
True
,
use_nccl
=
True
,
use_gpu
=
True
)
self
.
_assert_same_
(
fetch
,
result_cpu
,
result_cpu_parallel
,
result_gpu
,
result_gpu_parallel
,
result_gpu_nccl
)
else
:
self
.
_assert_same_
(
fetch
,
result_cpu
,
result_cpu_parallel
)
def
_run_test_impl_
(
self
,
callback
,
feed
,
fetch
,
place
,
use_parallel
=
False
,
use_nccl
=
False
,
use_gpu
=
False
):
"""
Run a single test, returns the fetch values
Args:
place(Place): the computation place.
use_parallel(bool): Whether use parallel.for or not.
Returns:
Fetched numpy arrays.
"""
if
isinstance
(
fetch
,
six
.
string_types
):
fetch
=
[
fetch
]
main
=
fluid
.
Program
()
startup
=
fluid
.
Program
()
# Fix seed
main
.
random_seed
=
10
startup
.
random_seed
=
10
with
fluid
.
program_guard
(
main
,
startup
):
generator
=
callback
()
# Automatically insert parallel do if use_parallel = True
if
use_parallel
:
thread_num
=
fluid
.
core
.
get_cuda_device_count
(
)
if
use_gpu
else
8
places
=
get_places
(
thread_num
)
pd
=
ParallelDo
(
places
,
use_nccl
=
use_nccl
)
data
=
next
(
generator
)
if
isinstance
(
data
,
fluid
.
framework
.
Variable
):
data
=
[
data
]
with
pd
.
do
():
ins
=
list
(
map
(
pd
.
read_input
,
data
))
if
len
(
ins
)
==
1
:
ins
=
ins
[
0
]
loss
=
generator
.
send
(
ins
)
# patch input
pd
.
write_output
(
loss
)
loss
=
pd
()
else
:
data
=
next
(
generator
)
loss
=
generator
.
send
(
data
)
self
.
assertIsNotNone
(
loss
)
avg_loss
=
fluid
.
layers
.
mean
(
loss
)
fluid
.
backward
.
append_backward
(
loss
=
avg_loss
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
startup
)
if
use_gpu
:
profile_type
=
'GPU'
else
:
profile_type
=
'CPU'
with
profiler
.
profiler
(
profile_type
,
'total'
,
'/tmp/profiler'
):
return
exe
.
run
(
main
,
feed
=
feed
,
fetch_list
=
fetch
)
def
_assert_same_
(
self
,
fetch
,
*
args
):
"""
Assert the return values of `run_test` are same.
Args:
fetch: Fetch list. Used for print error message
*args: The fetch result lists of each situations.
Returns:
None
Raises:
AssertionError
"""
def
_impl_
(
a
,
b
,
fetch_id
,
item_id
):
item_str
=
[
'CPU'
,
'ParallelCPU'
,
'GPU'
,
'ParallelGPU'
,
'ParallelGPUNCCL'
]
flag
=
numpy
.
allclose
(
a
,
b
,
rtol
=
0.1
,
atol
=
1e-3
)
self
.
assertTrue
(
flag
,
"The {0} are different in {1}, {2} vs {3}"
.
format
(
fetch
[
fetch_id
],
item_str
[
item_id
],
a
,
b
))
for
i
,
items
in
enumerate
(
zip
(
*
args
)):
self
.
assertGreater
(
len
(
items
),
0
)
for
j
in
range
(
1
,
len
(
items
)):
_impl_
(
items
[
0
],
items
[
j
],
fetch_id
=
i
,
item_id
=
j
)
class
ParallelOpTest
(
BaseParallelForTest
):
@
staticmethod
def
__network__
():
x
=
fluid
.
layers
.
data
(
shape
=
[
784
],
dtype
=
'float32'
,
name
=
'img'
)
x
=
yield
x
hidden
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
200
,
param_attr
=
'fc1.w'
)
hidden
=
fluid
.
layers
.
batch_norm
(
input
=
hidden
)
loss
=
fluid
.
layers
.
mean
(
hidden
)
yield
loss
def
test_simple_fc
(
self
):
self
.
run_test
(
callback
=
self
.
__network__
,
feed
=
{
'img'
:
numpy
.
random
.
random
(
size
=
(
51
,
784
)).
astype
(
'float32'
)
},
fetch
=
[
'fc1.w@GRAD'
])
def
test_fc_with_tiny_data
(
self
):
self
.
run_test
(
callback
=
self
.
__network__
,
feed
=
{
'img'
:
numpy
.
random
.
random
(
size
=
(
1
,
784
)).
astype
(
'float32'
)},
fetch
=
[
'fc1.w@GRAD'
])
class
ParallelOpTestMultipleInput
(
BaseParallelForTest
):
@
staticmethod
def
__network__
():
x
=
fluid
.
layers
.
data
(
shape
=
[
784
],
dtype
=
'float32'
,
name
=
'img1'
,
stop_gradient
=
False
)
y
=
fluid
.
layers
.
data
(
shape
=
[
784
],
dtype
=
'float32'
,
name
=
'img2'
,
stop_gradient
=
False
)
yield
[
x
,
y
]
x
=
x
+
y
hidden1
=
fluid
.
layers
.
fc
(
input
=
x
,
size
=
200
,
param_attr
=
'fc1.w'
)
hidden2
=
fluid
.
layers
.
fc
(
input
=
hidden1
,
size
=
200
,
param_attr
=
'fc2.w'
)
hidden3
=
fluid
.
layers
.
fc
(
input
=
hidden2
,
size
=
200
,
param_attr
=
'fc3.w'
)
loss
=
fluid
.
layers
.
mean
(
hidden3
)
yield
loss
def
test_simple_fc
(
self
):
self
.
run_test
(
callback
=
self
.
__network__
,
feed
=
{
'img1'
:
numpy
.
random
.
random
(
size
=
(
51
,
784
)).
astype
(
'float32'
),
'img2'
:
numpy
.
random
.
random
(
size
=
(
51
,
784
)).
astype
(
'float32'
)
},
fetch
=
[
'fc1.w@GRAD'
,
'fc2.w@GRAD'
,
'fc3.w@GRAD'
])
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_transpose_mkldnn_op.py
0 → 100644
浏览文件 @
1c116462
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
test_transpose_op
import
TestTransposeOp
class
TestTransposeMKLDNN
(
TestTransposeOp
):
def
init_op_type
(
self
):
self
.
op_type
=
"transpose2"
self
.
use_mkldnn
=
True
self
.
is_test
=
True
return
def
test_check_grad
(
self
):
return
def
test_check_grad_no_input
(
self
):
return
def
test_check_grad_no_filter
(
self
):
return
class
TestCase0MKLDNN
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
)
self
.
axis
=
(
0
,
)
class
TestCase1a
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
,
5
)
self
.
axis
=
(
0
,
2
,
1
)
class
TestCase1b
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
3
,
4
,
5
)
self
.
axis
=
(
2
,
1
,
0
)
class
TestCase2
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
2
,
3
,
4
,
5
)
self
.
axis
=
(
0
,
2
,
3
,
1
)
class
TestCase3
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
2
,
3
,
4
,
5
,
6
)
self
.
axis
=
(
4
,
2
,
3
,
1
,
0
)
class
TestCase4
(
TestTransposeMKLDNN
):
def
initTestCase
(
self
):
self
.
shape
=
(
2
,
3
,
4
,
5
,
6
,
1
)
self
.
axis
=
(
4
,
2
,
3
,
1
,
0
,
5
)
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_transpose_op.py
浏览文件 @
1c116462
...
@@ -21,15 +21,24 @@ from op_test import OpTest
...
@@ -21,15 +21,24 @@ from op_test import OpTest
class
TestTransposeOp
(
OpTest
):
class
TestTransposeOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
init_op_type
()
self
.
initTestCase
()
self
.
initTestCase
()
self
.
op_type
=
"transpose2"
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)}
self
.
inputs
=
{
'X'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
)}
self
.
attrs
=
{
'axis'
:
list
(
self
.
axis
),
'use_mkldnn'
:
self
.
use_mkldnn
,
'is_test'
:
self
.
is_test
,
}
self
.
outputs
=
{
self
.
outputs
=
{
'XShape'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
),
'XShape'
:
np
.
random
.
random
(
self
.
shape
).
astype
(
"float32"
),
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)
'Out'
:
self
.
inputs
[
'X'
].
transpose
(
self
.
axis
)
}
}
def
init_op_type
(
self
):
self
.
op_type
=
"transpose2"
self
.
use_mkldnn
=
False
self
.
is_test
=
False
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
(
no_check_set
=
[
'XShape'
])
self
.
check_output
(
no_check_set
=
[
'XShape'
])
...
...
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
1c116462
...
@@ -35,11 +35,10 @@ dtype_to_size = {
...
@@ -35,11 +35,10 @@ dtype_to_size = {
}
}
SUB_BLOCK_OPS
=
[
SUB_BLOCK_OPS
=
[
"while"
,
"while_grad"
,
"parallel_do"
,
"parallel_do_grad"
,
"while"
,
"while_grad"
,
"conditional_block"
,
"conditional_block_grad"
"conditional_block"
,
"conditional_block_grad"
]
]
SUB_BLOCK_PAIR
=
[(
"while"
,
"while_grad"
),
(
"parallel_do"
,
"parallel_do_grad"
),
SUB_BLOCK_PAIR
=
[(
"while"
,
"while_grad"
),
(
"conditional_block"
,
"conditional_block_grad"
)]
(
"conditional_block"
,
"conditional_block_grad"
)]
PRINT_LOG
=
False
PRINT_LOG
=
False
...
...
python/setup.py.in
浏览文件 @
1c116462
...
@@ -107,9 +107,9 @@ packages=['paddle',
...
@@ -107,9 +107,9 @@ packages=['paddle',
'paddle.fluid.distributed',
'paddle.fluid.distributed',
'paddle.fluid.layers',
'paddle.fluid.layers',
'paddle.fluid.contrib',
'paddle.fluid.contrib',
'paddle.fluid.contrib.utils',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.decoder',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.quantize',
'paddle.fluid.contrib.utils',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler',
'paddle.fluid.transpiler.details']
'paddle.fluid.transpiler.details']
...
@@ -160,10 +160,11 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
...
@@ -160,10 +160,11 @@ if '${WITH_FLUID_ONLY}'== 'OFF':
# put all thirdparty libraries in paddle.libs
# put all thirdparty libraries in paddle.libs
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
libs_path='${PADDLE_BINARY_DIR}/python/paddle/libs'
if os.name != 'nt':
package_data['paddle.libs']= []
package_data['paddle.libs']= []
package_data['paddle.libs']=['libwarpctc' + ext_name]
package_data['paddle.libs']=[('libwarpctc' if os.name != 'nt' else 'warpctc') + ext_name]
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
shutil.copy('${WARPCTC_LIBRARIES}', libs_path)
if '${WITH_MKL}' == 'ON':
if '${WITH_MKL}' == 'ON':
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
shutil.copy('${MKLML_IOMP_LIB}', libs_path)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录