Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
e0a89503
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e0a89503
编写于
10月 29, 2018
作者:
J
JiabinYang
浏览文件
操作
浏览文件
下载
差异文件
test=develop
上级
0e303868
79da263b
变更
84
隐藏空白更改
内联
并排
Showing
84 changed file
with
2559 addition
and
355 deletion
+2559
-355
CMakeLists.txt
CMakeLists.txt
+7
-0
benchmark/fluid/args.py
benchmark/fluid/args.py
+5
-0
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+1
-1
cmake/external/xxhash.cmake
cmake/external/xxhash.cmake
+46
-0
cmake/inference_lib.cmake
cmake/inference_lib.cmake
+12
-2
paddle/fluid/API.spec
paddle/fluid/API.spec
+4
-0
paddle/fluid/framework/details/CMakeLists.txt
paddle/fluid/framework/details/CMakeLists.txt
+4
-2
paddle/fluid/framework/details/broadcast_op_handle.cc
paddle/fluid/framework/details/broadcast_op_handle.cc
+14
-7
paddle/fluid/framework/details/broadcast_op_handle.h
paddle/fluid/framework/details/broadcast_op_handle.h
+4
-1
paddle/fluid/framework/details/build_strategy.cc
paddle/fluid/framework/details/build_strategy.cc
+1
-0
paddle/fluid/framework/details/build_strategy.h
paddle/fluid/framework/details/build_strategy.h
+2
-0
paddle/fluid/framework/details/fused_broadcast_op_handle.cc
paddle/fluid/framework/details/fused_broadcast_op_handle.cc
+55
-0
paddle/fluid/framework/details/fused_broadcast_op_handle.h
paddle/fluid/framework/details/fused_broadcast_op_handle.h
+57
-0
paddle/fluid/framework/details/multi_devices_graph_pass.cc
paddle/fluid/framework/details/multi_devices_graph_pass.cc
+52
-10
paddle/fluid/framework/details/multi_devices_graph_pass.h
paddle/fluid/framework/details/multi_devices_graph_pass.h
+6
-1
paddle/fluid/framework/ir/CMakeLists.txt
paddle/fluid/framework/ir/CMakeLists.txt
+1
-0
paddle/fluid/framework/ir/graph.cc
paddle/fluid/framework/ir/graph.cc
+12
-63
paddle/fluid/framework/ir/graph.h
paddle/fluid/framework/ir/graph.h
+6
-0
paddle/fluid/framework/ir/graph_helper.cc
paddle/fluid/framework/ir/graph_helper.cc
+12
-5
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
+315
-0
paddle/fluid/framework/ir/multi_batch_merge_pass.h
paddle/fluid/framework/ir/multi_batch_merge_pass.h
+44
-0
paddle/fluid/framework/ir/node.h
paddle/fluid/framework/ir/node.h
+2
-0
paddle/fluid/framework/lod_tensor_array.h
paddle/fluid/framework/lod_tensor_array.h
+77
-1
paddle/fluid/framework/op_desc.h
paddle/fluid/framework/op_desc.h
+0
-4
paddle/fluid/framework/op_proto_maker.h
paddle/fluid/framework/op_proto_maker.h
+3
-3
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+19
-11
paddle/fluid/framework/scope.h
paddle/fluid/framework/scope.h
+2
-0
paddle/fluid/inference/CMakeLists.txt
paddle/fluid/inference/CMakeLists.txt
+2
-2
paddle/fluid/inference/analysis/analyzer.cc
paddle/fluid/inference/analysis/analyzer.cc
+3
-0
paddle/fluid/inference/analysis/analyzer.h
paddle/fluid/inference/analysis/analyzer.h
+0
-1
paddle/fluid/inference/api/CMakeLists.txt
paddle/fluid/inference/api/CMakeLists.txt
+15
-6
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+8
-0
paddle/fluid/inference/api/analysis_predictor.h
paddle/fluid/inference/api/analysis_predictor.h
+2
-0
paddle/fluid/inference/api/api_impl.cc
paddle/fluid/inference/api/api_impl.cc
+5
-0
paddle/fluid/inference/api/api_impl.h
paddle/fluid/inference/api/api_impl.h
+3
-2
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
+7
-5
paddle/fluid/inference/api/demo_ci/run.sh
paddle/fluid/inference/api/demo_ci/run.sh
+10
-7
paddle/fluid/inference/api/details/reset_tensor_array.cc
paddle/fluid/inference/api/details/reset_tensor_array.cc
+50
-0
paddle/fluid/inference/api/details/reset_tensor_array.h
paddle/fluid/inference/api/details/reset_tensor_array.h
+37
-0
paddle/fluid/inference/api/helper.h
paddle/fluid/inference/api/helper.h
+2
-1
paddle/fluid/inference/api/paddle_inference_api.h
paddle/fluid/inference/api/paddle_inference_api.h
+1
-7
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
+1
-0
paddle/fluid/inference/tests/api/tester_helper.h
paddle/fluid/inference/tests/api/tester_helper.h
+3
-0
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+1
-0
paddle/fluid/operators/beam_search_decode_op.cc
paddle/fluid/operators/beam_search_decode_op.cc
+3
-0
paddle/fluid/operators/hash_op.cc
paddle/fluid/operators/hash_op.cc
+74
-0
paddle/fluid/operators/hash_op.h
paddle/fluid/operators/hash_op.h
+56
-0
paddle/fluid/operators/lars_momentum_op.cc
paddle/fluid/operators/lars_momentum_op.cc
+86
-0
paddle/fluid/operators/lars_momentum_op.cu
paddle/fluid/operators/lars_momentum_op.cu
+94
-0
paddle/fluid/operators/lars_momentum_op.h
paddle/fluid/operators/lars_momentum_op.h
+72
-0
paddle/fluid/operators/lookup_table_op.cc
paddle/fluid/operators/lookup_table_op.cc
+6
-0
paddle/fluid/operators/lookup_table_op.h
paddle/fluid/operators/lookup_table_op.h
+31
-19
paddle/fluid/operators/math/algorithm.h
paddle/fluid/operators/math/algorithm.h
+46
-0
paddle/fluid/operators/momentum_op.cc
paddle/fluid/operators/momentum_op.cc
+0
-48
paddle/fluid/operators/momentum_op.h
paddle/fluid/operators/momentum_op.h
+48
-0
paddle/fluid/operators/sequence_reverse_op.cc
paddle/fluid/operators/sequence_reverse_op.cc
+29
-0
paddle/fluid/operators/sequence_reverse_op.cu
paddle/fluid/operators/sequence_reverse_op.cu
+25
-0
paddle/fluid/operators/sequence_reverse_op.h
paddle/fluid/operators/sequence_reverse_op.h
+157
-0
paddle/fluid/platform/device_context.cc
paddle/fluid/platform/device_context.cc
+50
-15
paddle/fluid/platform/device_context.h
paddle/fluid/platform/device_context.h
+8
-2
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+7
-3
paddle/fluid/train/demo/CMakeLists.txt
paddle/fluid/train/demo/CMakeLists.txt
+3
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+7
-7
python/paddle/dataset/wmt16.py
python/paddle/dataset/wmt16.py
+1
-1
python/paddle/fluid/evaluator.py
python/paddle/fluid/evaluator.py
+1
-1
python/paddle/fluid/layers/learning_rate_scheduler.py
python/paddle/fluid/layers/learning_rate_scheduler.py
+14
-12
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+79
-22
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+183
-64
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+89
-2
python/paddle/fluid/tests/unittests/dist_mnist.py
python/paddle/fluid/tests/unittests/dist_mnist.py
+1
-1
python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
...on/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
+80
-0
python/paddle/fluid/tests/unittests/dist_mnist_lars.py
python/paddle/fluid/tests/unittests/dist_mnist_lars.py
+73
-0
python/paddle/fluid/tests/unittests/dist_transformer.py
python/paddle/fluid/tests/unittests/dist_transformer.py
+1
-0
python/paddle/fluid/tests/unittests/test_dist_base.py
python/paddle/fluid/tests/unittests/test_dist_base.py
+22
-5
python/paddle/fluid/tests/unittests/test_dist_ctr.py
python/paddle/fluid/tests/unittests/test_dist_ctr.py
+2
-3
python/paddle/fluid/tests/unittests/test_dist_mnist.py
python/paddle/fluid/tests/unittests/test_dist_mnist.py
+10
-2
python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py
...ddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py
+67
-0
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
+1
-2
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
+1
-2
python/paddle/fluid/tests/unittests/test_hash_op.py
python/paddle/fluid/tests/unittests/test_hash_op.py
+57
-0
python/paddle/fluid/tests/unittests/test_metrics.py
python/paddle/fluid/tests/unittests/test_metrics.py
+49
-0
python/paddle/fluid/tests/unittests/test_momentum_op.py
python/paddle/fluid/tests/unittests/test_momentum_op.py
+39
-0
python/paddle/fluid/tests/unittests/test_sequence_reverse.py
python/paddle/fluid/tests/unittests/test_sequence_reverse.py
+69
-0
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+5
-1
未找到文件。
CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -69,6 +69,7 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
...
@@ -69,6 +69,7 @@ option(WITH_ANAKIN "Compile with Anakin library" OFF)
option
(
WITH_GRPC
"Use grpc as the default rpc framework"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_GRPC
"Use grpc as the default rpc framework"
${
WITH_DISTRIBUTE
}
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
WITH_BRPC_RDMA
"Use brpc rdma as the rpc protocal"
OFF
)
option
(
WITH_INFERENCE
"Compile fluid inference library"
ON
)
option
(
WITH_INFERENCE
"Compile fluid inference library"
ON
)
option
(
ON_INFER
"Turn on inference optimization."
OFF
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference high-level api interface"
OFF
)
option
(
WITH_INFERENCE_API_TEST
"Test fluid inference high-level api interface"
OFF
)
option
(
WITH_SYSTEM_BLAS
"Use system blas library"
OFF
)
option
(
WITH_SYSTEM_BLAS
"Use system blas library"
OFF
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
option
(
PY_VERSION
"Compile PaddlePaddle with python3 support"
${
PY_VERSION
}
)
...
@@ -179,6 +180,7 @@ include(external/eigen) # download eigen3
...
@@ -179,6 +180,7 @@ include(external/eigen) # download eigen3
include
(
external/pybind11
)
# download pybind11
include
(
external/pybind11
)
# download pybind11
include
(
external/cares
)
include
(
external/cares
)
include
(
external/cub
)
include
(
external/cub
)
include
(
external/xxhash
)
# download xxhash
if
(
NOT WIN32
)
if
(
NOT WIN32
)
# there is no official support of snappystream, warpctc, nccl, cupti in windows
# there is no official support of snappystream, warpctc, nccl, cupti in windows
...
@@ -301,3 +303,8 @@ if(WITH_DOC)
...
@@ -301,3 +303,8 @@ if(WITH_DOC)
find_python_module
(
recommonmark REQUIRED
)
find_python_module
(
recommonmark REQUIRED
)
add_subdirectory
(
doc
)
add_subdirectory
(
doc
)
endif
()
endif
()
if
(
ON_INFER
)
message
(
WARNING
"On inference mode, will take place some specific optimization."
)
add_definitions
(
-DPADDLE_ON_INFERENCE
)
endif
()
benchmark/fluid/args.py
浏览文件 @
e0a89503
...
@@ -142,5 +142,10 @@ def parse_args():
...
@@ -142,5 +142,10 @@ def parse_args():
choices
=
[
'reduce'
,
'all_reduce'
],
choices
=
[
'reduce'
,
'all_reduce'
],
default
=
'all_reduce'
,
default
=
'all_reduce'
,
help
=
'Specify the reduce strategy, can be reduce, all_reduce'
)
help
=
'Specify the reduce strategy, can be reduce, all_reduce'
)
parser
.
add_argument
(
'--fuse_broadcast_op'
,
action
=
'store_true'
,
help
=
'If set, would fuse multiple broadcast operators into one fused_broadcast operator.'
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
return
args
return
args
benchmark/fluid/fluid_benchmark.py
浏览文件 @
e0a89503
...
@@ -177,6 +177,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
...
@@ -177,6 +177,7 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
else
:
else
:
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
(
build_strategy
.
reduce_strategy
=
fluid
.
BuildStrategy
(
).
ReduceStrategy
.
AllReduce
).
ReduceStrategy
.
AllReduce
build_strategy
.
fuse_broadcast_op
=
args
.
fuse_broadcast_op
avg_loss
=
train_args
[
0
]
avg_loss
=
train_args
[
0
]
...
@@ -240,7 +241,6 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
...
@@ -240,7 +241,6 @@ def train_parallel(train_args, test_args, args, train_prog, test_prog,
if
args
.
use_fake_data
or
args
.
use_reader_op
:
if
args
.
use_fake_data
or
args
.
use_reader_op
:
try
:
try
:
fetch_ret
=
exe
.
run
(
fetch_list
)
fetch_ret
=
exe
.
run
(
fetch_list
)
except
fluid
.
core
.
EOFException
as
eof
:
except
fluid
.
core
.
EOFException
as
eof
:
break
break
...
...
cmake/external/xxhash.cmake
0 → 100644
浏览文件 @
e0a89503
INCLUDE
(
ExternalProject
)
set
(
XXHASH_SOURCE_DIR
${
THIRD_PARTY_PATH
}
/xxhash
)
set
(
XXHASH_INSTALL_DIR
${
THIRD_PARTY_PATH
}
/install/xxhash
)
set
(
XXHASH_INCLUDE_DIR
"
${
XXHASH_INSTALL_DIR
}
/include"
)
IF
(
WITH_STATIC_LIB
)
SET
(
BUILD_CMD make lib
)
ELSE
()
SET
(
BUILD_CMD sed -i
"s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g"
${
XXHASH_SOURCE_DIR
}
/src/extern_xxhash/Makefile && make lib
)
ENDIF
()
ExternalProject_Add
(
extern_xxhash
${
EXTERNAL_PROJECT_LOG_ARGS
}
GIT_REPOSITORY
"https://github.com/Cyan4973/xxHash"
GIT_TAG
"v0.6.5"
PREFIX
${
XXHASH_SOURCE_DIR
}
DOWNLOAD_NAME
"xxhash"
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
BUILD_IN_SOURCE 1
PATCH_COMMAND
BUILD_COMMAND
${
BUILD_CMD
}
INSTALL_COMMAND export PREFIX=
${
XXHASH_INSTALL_DIR
}
/ && make install
TEST_COMMAND
""
)
set
(
XXHASH_LIBRARIES
"
${
XXHASH_INSTALL_DIR
}
/lib/libxxhash.a"
)
INCLUDE_DIRECTORIES
(
${
XXHASH_INCLUDE_DIR
}
)
add_library
(
xxhash STATIC IMPORTED GLOBAL
)
set_property
(
TARGET xxhash PROPERTY IMPORTED_LOCATION
${
XXHASH_LIBRARIES
}
)
include_directories
(
${
XXHASH_INCLUDE_DIR
}
)
add_dependencies
(
xxhash extern_xxhash
)
LIST
(
APPEND external_project_dependencies xxhash
)
IF
(
WITH_C_API
)
INSTALL
(
DIRECTORY
${
XXHASH_INCLUDE_DIR
}
DESTINATION third_party/xxhash
)
IF
(
ANDROID
)
INSTALL
(
FILES
${
XXHASH_LIBRARIES
}
DESTINATION third_party/xxhash/lib/
${
ANDROID_ABI
}
)
ELSE
()
INSTALL
(
FILES
${
XXHASH_LIBRARIES
}
DESTINATION third_party/xxhash/lib
)
ENDIF
()
ENDIF
()
cmake/inference_lib.cmake
浏览文件 @
e0a89503
...
@@ -14,6 +14,9 @@
...
@@ -14,6 +14,9 @@
# make package for paddle fluid shared and static library
# make package for paddle fluid shared and static library
function
(
copy TARGET
)
function
(
copy TARGET
)
if
(
NOT ON_INFER
)
message
(
WARNING
"Turn on the ON_INFER flag when building inference_lib only."
)
endif
()
set
(
options
""
)
set
(
options
""
)
set
(
oneValueArgs
""
)
set
(
oneValueArgs
""
)
set
(
multiValueArgs SRCS DSTS DEPS
)
set
(
multiValueArgs SRCS DSTS DEPS
)
...
@@ -31,7 +34,7 @@ function(copy TARGET)
...
@@ -31,7 +34,7 @@ function(copy TARGET)
foreach
(
index RANGE
${
len
}
)
foreach
(
index RANGE
${
len
}
)
list
(
GET copy_lib_SRCS
${
index
}
src
)
list
(
GET copy_lib_SRCS
${
index
}
src
)
list
(
GET copy_lib_DSTS
${
index
}
dst
)
list
(
GET copy_lib_DSTS
${
index
}
dst
)
add_custom_command
(
TARGET
${
TARGET
}
PRE_BUILD
add_custom_command
(
TARGET
${
TARGET
}
PRE_BUILD
COMMAND mkdir -p
"
${
dst
}
"
COMMAND mkdir -p
"
${
dst
}
"
COMMAND cp -r
"
${
src
}
"
"
${
dst
}
"
COMMAND cp -r
"
${
src
}
"
"
${
dst
}
"
COMMENT
"copying
${
src
}
->
${
dst
}
"
)
COMMENT
"copying
${
src
}
->
${
dst
}
"
)
...
@@ -67,6 +70,13 @@ copy(boost_lib
...
@@ -67,6 +70,13 @@ copy(boost_lib
DEPS boost
DEPS boost
)
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/xxhash"
)
copy
(
xxhash_lib
SRCS
${
XXHASH_INCLUDE_DIR
}
${
XXHASH_LIBRARIES
}
DSTS
${
dst_dir
}
${
dst_dir
}
/lib
DEPS xxhash
)
if
(
NOT PROTOBUF_FOUND
)
if
(
NOT PROTOBUF_FOUND
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/protobuf"
)
set
(
dst_dir
"
${
FLUID_INSTALL_DIR
}
/third_party/install/protobuf"
)
copy
(
protobuf_lib
copy
(
protobuf_lib
...
@@ -186,7 +196,7 @@ copy(cmake_cache
...
@@ -186,7 +196,7 @@ copy(cmake_cache
DSTS
${
FLUID_INSTALL_DIR
}
)
DSTS
${
FLUID_INSTALL_DIR
}
)
# This command generates a complete fluid library for both train and inference
# This command generates a complete fluid library for both train and inference
add_custom_target
(
fluid_lib_dist DEPENDS
${
fluid_lib_dist_dep
}
)
add_custom_target
(
fluid_lib_dist DEPENDS
${
fluid_lib_dist_dep
}
)
# Following commands generate a inference-only fluid library
# Following commands generate a inference-only fluid library
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
# third_party, version.txt and CMakeCache.txt are the same position with ${FLUID_INSTALL_DIR}
...
...
paddle/fluid/API.spec
浏览文件 @
e0a89503
...
@@ -175,7 +175,9 @@ paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dim
...
@@ -175,7 +175,9 @@ paddle.fluid.layers.mul ArgSpec(args=['x', 'y', 'x_num_col_dims', 'y_num_col_dim
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sigmoid_cross_entropy_with_logits ArgSpec(args=['x', 'label', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.maxout ArgSpec(args=['x', 'groups', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.space_to_depth ArgSpec(args=['x', 'blocksize', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.sequence_reverse ArgSpec(args=['x', 'name'], varargs=None, keywords=None, defaults=(None,))
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
paddle.fluid.layers.affine_channel ArgSpec(args=['x', 'scale', 'bias', 'data_layout', 'name'], varargs=None, keywords=None, defaults=(None, None, 'NCHW', None))
paddle.fluid.layers.hash ArgSpec(args=['input', 'hash_size', 'num_hash', 'name'], varargs=None, keywords=None, defaults=(1, None))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.open_files ArgSpec(args=['filenames', 'shapes', 'lod_levels', 'dtypes', 'thread_num', 'buffer_size', 'pass_num', 'is_test'], varargs=None, keywords=None, defaults=(None, None, 1, None))
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
paddle.fluid.layers.read_file ArgSpec(args=['reader'], varargs=None, keywords=None, defaults=None)
...
@@ -354,6 +356,8 @@ paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_wind
...
@@ -354,6 +356,8 @@ paddle.fluid.optimizer.ModelAverage.__init__ ArgSpec(args=['self', 'average_wind
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.optimizer.ModelAverage.apply ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None)
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.ModelAverage.restore ArgSpec(args=['self', 'executor'], varargs=None, keywords=None, defaults=None)
paddle.fluid.optimizer.LarsMomentumOptimizer.__init__ ArgSpec(args=['self', 'learning_rate', 'momentum', 'lars_coeff', 'lars_weight_decay', 'regularization', 'name'], varargs=None, keywords=None, defaults=(0.001, 0.0005, None, None))
paddle.fluid.optimizer.LarsMomentumOptimizer.minimize ArgSpec(args=['self', 'loss', 'startup_program', 'parameter_list', 'no_grad_set'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.backward.append_backward ArgSpec(args=['loss', 'parameter_list', 'no_grad_set', 'callbacks'], varargs=None, keywords=None, defaults=(None, None, None))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
paddle.fluid.regularizer.L1DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
paddle.fluid.regularizer.L2DecayRegularizer.__init__ ArgSpec(args=['self', 'regularization_coeff'], varargs=None, keywords=None, defaults=(0.0,))
...
...
paddle/fluid/framework/details/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -16,12 +16,14 @@ if(WITH_GPU)
...
@@ -16,12 +16,14 @@ if(WITH_GPU)
dynload_cuda variable_visitor
)
dynload_cuda variable_visitor
)
nv_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim dynload_cuda
)
nv_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim dynload_cuda
)
nv_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda
)
nv_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor dynload_cuda
)
nv_library
(
fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle
)
else
()
else
()
cc_library
(
all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
cc_library
(
all_reduce_op_handle SRCS all_reduce_op_handle.cc DEPS op_handle_base scope lod_tensor ddim memory
variable_visitor
)
variable_visitor
)
cc_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim
)
cc_library
(
reduce_op_handle SRCS reduce_op_handle.cc DEPS op_handle_base variable_visitor scope ddim
)
cc_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
broadcast_op_handle SRCS broadcast_op_handle.cc DEPS op_handle_base scope ddim memory variable_visitor
)
cc_library
(
fused_broadcast_op_handle SRCS fused_broadcast_op_handle.cc DEPS broadcast_op_handle
)
endif
()
endif
()
cc_library
(
data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor
)
cc_library
(
data_balance_op_handle SRCS data_balance_op_handle.cc DEPS op_handle_base scope lod_tensor
)
...
@@ -34,7 +36,7 @@ if(WITH_GPU)
...
@@ -34,7 +36,7 @@ if(WITH_GPU)
endif
()
endif
()
cc_library
(
multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
cc_library
(
multi_devices_graph_pass SRCS multi_devices_graph_pass.cc DEPS multi_devices_helper computation_op_handle
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle
)
scale_loss_grad_op_handle rpc_op_handle all_reduce_op_handle reduce_op_handle broadcast_op_handle data_balance_op_handle
fused_broadcast_op_handle
)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
cc_library
(
ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto reference_count_pass
)
cc_library
(
ssa_graph_executor SRCS ssa_graph_executor.cc DEPS graph framework_proto reference_count_pass
)
...
@@ -58,4 +60,4 @@ cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executo
...
@@ -58,4 +60,4 @@ cc_library(fast_threaded_ssa_graph_executor SRCS fast_threaded_ssa_graph_executo
cc_library
(
build_strategy SRCS build_strategy.cc DEPS
cc_library
(
build_strategy SRCS build_strategy.cc DEPS
graph_viz_pass multi_devices_graph_pass
graph_viz_pass multi_devices_graph_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
multi_devices_graph_print_pass multi_devices_graph_check_pass
fuse_elewise_add_act_pass
)
fuse_elewise_add_act_pass
multi_batch_merge_pass
)
paddle/fluid/framework/details/broadcast_op_handle.cc
浏览文件 @
e0a89503
...
@@ -48,16 +48,23 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -48,16 +48,23 @@ void BroadcastOpHandle::RunImpl() {
var_scopes
.
emplace_back
(
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
());
var_scopes
.
emplace_back
(
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
());
}
}
BroadcastOneVar
(
*
in_var_handle
,
out_var_handles
,
var_scopes
);
}
void
BroadcastOpHandle
::
BroadcastOneVar
(
const
VarHandle
&
in_var_handle
,
const
std
::
vector
<
VarHandle
*>
&
out_var_handles
,
const
std
::
vector
<
const
Scope
*>
&
var_scopes
)
{
auto
*
in_var
=
auto
*
in_var
=
var_scopes
.
at
(
in_var_handle
->
scope_idx_
)
->
FindVar
(
in_var_handle
->
name_
);
var_scopes
.
at
(
in_var_handle
.
scope_idx_
)
->
FindVar
(
in_var_handle
.
name_
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
PADDLE_ENFORCE_NOT_NULL
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
Tensor
&
in_tensor
=
VariableVisitor
::
GetMutableTensor
(
in_var
);
InitOutputValue
(
*
in_var_handle
,
out_var_handles
);
InitOutputValue
(
in_var_handle
,
out_var_handles
);
if
(
platform
::
is_cpu_place
(
in_tensor
.
place
()))
{
if
(
platform
::
is_cpu_place
(
in_tensor
.
place
()))
{
for
(
auto
*
out_var_handle
:
out_var_handles
)
{
for
(
auto
*
out_var_handle
:
out_var_handles
)
{
if
(
out_var_handle
->
IsTheSameVar
(
*
in_var_handle
))
{
if
(
out_var_handle
->
IsTheSameVar
(
in_var_handle
))
{
continue
;
continue
;
}
}
auto
&
out_p
=
out_var_handle
->
place_
;
auto
&
out_p
=
out_var_handle
->
place_
;
...
@@ -114,12 +121,12 @@ void BroadcastOpHandle::RunImpl() {
...
@@ -114,12 +121,12 @@ void BroadcastOpHandle::RunImpl() {
}
}
}
}
if
(
!
out_handle
->
IsTheSameVar
(
*
in_var_handle
))
{
if
(
!
out_handle
->
IsTheSameVar
(
in_var_handle
))
{
auto
out_var
=
var_scopes
.
at
(
in_var_handle
->
scope_idx_
)
auto
out_var
=
var_scopes
.
at
(
in_var_handle
.
scope_idx_
)
->
FindVar
(
out_var_handles
[
0
]
->
name_
);
->
FindVar
(
out_var_handles
[
0
]
->
name_
);
paddle
::
framework
::
TensorCopy
(
paddle
::
framework
::
TensorCopy
(
in_tensor
,
in_var_handle
->
place_
,
in_tensor
,
in_var_handle
.
place_
,
*
(
dev_ctxes_
.
at
(
in_var_handle
->
place_
)),
*
(
dev_ctxes_
.
at
(
in_var_handle
.
place_
)),
&
VariableVisitor
::
GetMutableTensor
(
out_var
));
&
VariableVisitor
::
GetMutableTensor
(
out_var
));
}
}
});
});
...
...
paddle/fluid/framework/details/broadcast_op_handle.h
浏览文件 @
e0a89503
...
@@ -61,7 +61,10 @@ struct BroadcastOpHandle : public OpHandleBase {
...
@@ -61,7 +61,10 @@ struct BroadcastOpHandle : public OpHandleBase {
protected:
protected:
void
RunImpl
()
override
;
void
RunImpl
()
override
;
private:
void
BroadcastOneVar
(
const
VarHandle
&
in_var_handle
,
const
std
::
vector
<
VarHandle
*>
&
out_var_handles
,
const
std
::
vector
<
const
Scope
*>
&
var_scopes
);
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
std
::
vector
<
platform
::
Place
>
places_
;
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
...
...
paddle/fluid/framework/details/build_strategy.cc
浏览文件 @
e0a89503
...
@@ -121,6 +121,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
...
@@ -121,6 +121,7 @@ std::unique_ptr<ir::Graph> BuildStrategy::Apply(
USE_PASS
(
fuse_elewise_add_act_pass
);
USE_PASS
(
fuse_elewise_add_act_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
graph_viz_pass
);
USE_PASS
(
multi_batch_merge_pass
);
USE_PASS
(
multi_devices_pass
);
USE_PASS
(
multi_devices_pass
);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_check_pass
);
USE_PASS
(
multi_devices_print_pass
);
USE_PASS
(
multi_devices_print_pass
);
paddle/fluid/framework/details/build_strategy.h
浏览文件 @
e0a89503
...
@@ -69,6 +69,8 @@ struct BuildStrategy {
...
@@ -69,6 +69,8 @@ struct BuildStrategy {
bool
enable_data_balance_
{
false
};
bool
enable_data_balance_
{
false
};
bool
fuse_broadcast_op_
{
false
};
// User normally doesn't need to call this API.
// User normally doesn't need to call this API.
// The PassBuilder allows for more customized insert, remove of passes
// The PassBuilder allows for more customized insert, remove of passes
// from python side.
// from python side.
...
...
paddle/fluid/framework/details/fused_broadcast_op_handle.cc
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include "paddle/fluid/framework/details/container_cast.h"
#include "paddle/fluid/framework/details/variable_visitor.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
namespace
framework
{
namespace
details
{
void
FusedBroadcastOpHandle
::
RunImpl
()
{
platform
::
RecordEvent
record_event
(
Name
(),
dev_ctxes_
.
begin
()
->
second
);
if
(
places_
.
size
()
==
1UL
)
return
;
auto
in_var_handles
=
DynamicCast
<
VarHandle
>
(
inputs_
);
auto
out_var_handles
=
DynamicCast
<
VarHandle
>
(
outputs_
);
WaitInputVarGenerated
();
std
::
vector
<
const
Scope
*>
var_scopes
;
for
(
auto
*
s
:
local_scopes_
)
{
var_scopes
.
emplace_back
(
s
->
FindVar
(
kLocalExecScopeName
)
->
Get
<
Scope
*>
());
}
size_t
place_num
=
places_
.
size
();
PADDLE_ENFORCE_EQ
(
in_var_handles
.
size
()
*
place_num
,
out_var_handles
.
size
());
for
(
size_t
i
=
0
;
i
<
in_var_handles
.
size
();
++
i
)
{
BroadcastOneVar
(
*
in_var_handles
[
i
],
std
::
vector
<
VarHandle
*>
(
out_var_handles
.
begin
()
+
i
*
place_num
,
out_var_handles
.
begin
()
+
(
i
+
1
)
*
place_num
),
var_scopes
);
}
}
std
::
string
FusedBroadcastOpHandle
::
Name
()
const
{
return
"fused_broadcast"
;
}
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/fused_broadcast_op_handle.h
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_helper.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/platform/device_context.h"
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/platform/nccl_helper.h"
#endif
namespace
paddle
{
namespace
framework
{
namespace
details
{
struct
FusedBroadcastOpHandle
:
public
BroadcastOpHandle
{
public:
#ifdef PADDLE_WITH_CUDA
FusedBroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
local_scopes
,
const
std
::
vector
<
platform
::
Place
>
&
places
,
const
platform
::
NCCLContextMap
*
nccl_ctx
)
:
BroadcastOpHandle
(
node
,
local_scopes
,
places
,
nccl_ctx
)
{}
#else
FusedBroadcastOpHandle
(
ir
::
Node
*
node
,
const
std
::
vector
<
Scope
*>
local_scopes
,
const
std
::
vector
<
platform
::
Place
>&
places
)
:
BroadcastOpHandle
(
node
,
local_scopes
,
places
)
{}
#endif
std
::
string
Name
()
const
override
;
protected:
void
RunImpl
()
override
;
};
}
// namespace details
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/details/multi_devices_graph_pass.cc
浏览文件 @
e0a89503
...
@@ -21,6 +21,7 @@
...
@@ -21,6 +21,7 @@
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/broadcast_op_handle.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/computation_op_handle.h"
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
#include "paddle/fluid/framework/details/data_balance_op_handle.h"
#include "paddle/fluid/framework/details/fused_broadcast_op_handle.h"
#include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/details/multi_devices_graph_pass.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/reduce_op_handle.h"
#include "paddle/fluid/framework/details/rpc_op_handle.h"
#include "paddle/fluid/framework/details/rpc_op_handle.h"
...
@@ -347,7 +348,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -347,7 +348,7 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
// TODO(paddle-dev): Why is there no input for this op_handle?
// TODO(paddle-dev): Why is there no input for this op_handle?
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
auto
loss_grad_name
=
node
->
Op
()
->
OutputArgumentNames
()[
0
];
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
);
CreateScaleLossGradOp
(
&
result
,
loss_grad_name
,
node
->
outputs
[
0
]
);
}
}
// This assumes the backward generating code will ensure IsScaleLossOp
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
// is true only for the op that scale the final scalar loss.
...
@@ -436,10 +437,14 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
...
@@ -436,10 +437,14 @@ std::unique_ptr<ir::Graph> MultiDevSSAGraphBuilder::ApplyImpl(
if
((
use_gpu
&&
if
((
use_gpu
&&
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kReduce
)
||
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kReduce
)
||
is_dist_train
)
{
is_dist_train
)
{
for
(
size_t
dev_id
=
0
;
dev_id
<
bcast_var_name_set
.
size
();
++
dev_id
)
{
if
(
strategy_
.
fuse_broadcast_op_
)
{
auto
&
to_bcast_set
=
bcast_var_name_set
[
dev_id
];
CreateFusedBroadcastOp
(
&
result
,
bcast_var_name_set
);
for
(
auto
&
bcast_name
:
to_bcast_set
)
{
}
else
{
CreateBroadcastOp
(
&
result
,
bcast_name
,
dev_id
);
for
(
size_t
dev_id
=
0
;
dev_id
<
bcast_var_name_set
.
size
();
++
dev_id
)
{
auto
&
to_bcast_set
=
bcast_var_name_set
[
dev_id
];
for
(
auto
&
bcast_name
:
to_bcast_set
)
{
CreateBroadcastOp
(
&
result
,
bcast_name
,
dev_id
);
}
}
}
}
}
}
}
...
@@ -508,6 +513,44 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
...
@@ -508,6 +513,44 @@ void MultiDevSSAGraphBuilder::CreateBroadcastOp(ir::Graph *result,
}
}
}
}
void
MultiDevSSAGraphBuilder
::
CreateFusedBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
bcast_varnames
)
const
{
#ifdef PADDLE_WITH_CUDA
auto
*
op_handle
=
new
FusedBroadcastOpHandle
(
result
->
CreateEmptyNode
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
,
nccl_ctxs_
);
#else
auto
*
op_handle
=
new
FusedBroadcastOpHandle
(
result
->
CreateEmptyNode
(
"fused_broadcast"
,
ir
::
Node
::
Type
::
kOperation
),
local_scopes_
,
places_
);
#endif
result
->
Get
<
GraphOps
>
(
kGraphOps
).
emplace_back
(
op_handle
);
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
auto
&
p
=
places_
[
i
];
SetCommunicationContext
(
op_handle
,
p
);
}
for
(
size_t
dev_id
=
0
;
dev_id
<
bcast_varnames
.
size
();
++
dev_id
)
{
for
(
auto
&
p_name
:
bcast_varnames
[
dev_id
])
{
auto
*
in
=
result
->
Get
<
GraphVars
>
(
kGraphVars
).
at
(
dev_id
).
at
(
p_name
).
back
().
get
();
op_handle
->
AddInput
(
in
);
for
(
size_t
out_dev_id
=
0
;
out_dev_id
<
places_
.
size
();
++
out_dev_id
)
{
auto
&
p
=
places_
[
out_dev_id
];
auto
&
vars
=
result
->
Get
<
GraphVars
>
(
kGraphVars
).
at
(
out_dev_id
).
at
(
p_name
);
auto
*
out_var
=
new
VarHandle
(
result
->
CreateEmptyNode
(
p_name
,
ir
::
Node
::
Type
::
kVariable
),
vars
.
size
(),
out_dev_id
,
p_name
,
p
);
vars
.
emplace_back
(
out_var
);
op_handle
->
AddOutput
(
out_var
);
}
}
}
}
void
MultiDevSSAGraphBuilder
::
CreateComputationalOp
(
ir
::
Graph
*
result
,
void
MultiDevSSAGraphBuilder
::
CreateComputationalOp
(
ir
::
Graph
*
result
,
ir
::
Node
*
node
,
ir
::
Node
*
node
,
int
dev_id
)
const
{
int
dev_id
)
const
{
...
@@ -602,7 +645,8 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(const ir::Graph &graph,
...
@@ -602,7 +645,8 @@ int MultiDevSSAGraphBuilder::GetVarDeviceID(const ir::Graph &graph,
}
}
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
)
const
{
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
)
const
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
places_
.
size
();
++
i
)
{
// Insert ScaleCost OpHandle
// Insert ScaleCost OpHandle
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
auto
*
dev_ctx
=
platform
::
DeviceContextPool
::
Instance
().
Get
(
places_
[
i
]);
...
@@ -617,10 +661,8 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(
...
@@ -617,10 +661,8 @@ void MultiDevSSAGraphBuilder::CreateScaleLossGradOp(
// loss->pending_ops_.emplace_back(op_handle);
// loss->pending_ops_.emplace_back(op_handle);
// op_handle->inputs_.emplace_back(loss);
// op_handle->inputs_.emplace_back(loss);
CreateOpOutput
(
CreateOpOutput
(
result
,
op_handle
,
result
,
op_handle
,
result
->
CreateVarNode
(
out_var_node
->
Var
()),
places_
[
i
],
i
);
result
->
CreateEmptyNode
(
loss_grad_name
,
ir
::
Node
::
Type
::
kVariable
),
places_
[
i
],
i
);
}
}
}
}
...
...
paddle/fluid/framework/details/multi_devices_graph_pass.h
浏览文件 @
e0a89503
...
@@ -61,7 +61,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -61,7 +61,8 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
size_t
num_places
)
const
;
size_t
num_places
)
const
;
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
void
CreateScaleLossGradOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
loss_grad_name
)
const
;
const
std
::
string
&
loss_grad_name
,
ir
::
Node
*
out_var_node
)
const
;
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
VarHandle
*
CreateReduceOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
og
,
int
dst_dev_id
)
const
;
int
dst_dev_id
)
const
;
...
@@ -78,6 +79,10 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
...
@@ -78,6 +79,10 @@ class MultiDevSSAGraphBuilder : public ir::Pass {
void
CreateBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
p_name
,
void
CreateBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
;
size_t
src_dev_id
)
const
;
void
CreateFusedBroadcastOp
(
ir
::
Graph
*
result
,
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
bcast_varnames
)
const
;
bool
IsSparseGradient
(
const
std
::
string
&
og
)
const
;
bool
IsSparseGradient
(
const
std
::
string
&
og
)
const
;
size_t
GetAppropriateDeviceID
(
size_t
GetAppropriateDeviceID
(
...
...
paddle/fluid/framework/ir/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -36,6 +36,7 @@ pass_library(fc_lstm_fuse_pass inference)
...
@@ -36,6 +36,7 @@ pass_library(fc_lstm_fuse_pass inference)
pass_library
(
embedding_fc_lstm_fuse_pass inference
)
pass_library
(
embedding_fc_lstm_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
fc_gru_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
pass_library
(
seq_concat_fc_fuse_pass inference
)
pass_library
(
multi_batch_merge_pass base
)
pass_library
(
conv_bn_fuse_pass inference
)
pass_library
(
conv_bn_fuse_pass inference
)
pass_library
(
seqconv_eltadd_relu_fuse_pass inference
)
pass_library
(
seqconv_eltadd_relu_fuse_pass inference
)
if
(
WITH_MKLDNN
)
if
(
WITH_MKLDNN
)
...
...
paddle/fluid/framework/ir/graph.cc
浏览文件 @
e0a89503
...
@@ -24,79 +24,23 @@ namespace paddle {
...
@@ -24,79 +24,23 @@ namespace paddle {
namespace
framework
{
namespace
framework
{
namespace
ir
{
namespace
ir
{
std
::
vector
<
std
::
string
>
FindDistTrainSendVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
{
std
::
vector
<
std
::
string
>
send_vars
;
// since parameters are all in block 0,
// it's enough to only scan send ops in block 0
for
(
auto
&
node
:
nodes
)
{
auto
op_vars
=
node
->
Op
()
->
InputArgumentNames
();
send_vars
.
reserve
(
send_vars
.
size
()
+
std
::
distance
(
op_vars
.
begin
(),
op_vars
.
end
()));
send_vars
.
insert
(
send_vars
.
end
(),
op_vars
.
begin
(),
op_vars
.
end
());
}
return
send_vars
;
}
std
::
vector
<
std
::
string
>
FindDistTrainRecvVars
(
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
{
std
::
vector
<
std
::
string
>
recv_vars
;
for
(
auto
&
node
:
nodes
)
{
auto
op_vars
=
node
->
Op
()
->
OutputArgumentNames
();
recv_vars
.
reserve
(
recv_vars
.
size
()
+
std
::
distance
(
op_vars
.
begin
(),
op_vars
.
end
()));
recv_vars
.
insert
(
recv_vars
.
end
(),
op_vars
.
begin
(),
op_vars
.
end
());
}
return
recv_vars
;
}
bool
IsDistTrainOp
(
ir
::
Node
*
node
,
const
std
::
vector
<
std
::
string
>
&
send_vars
,
const
std
::
vector
<
std
::
string
>
&
recv_vars
)
{
if
(
send_vars
.
size
()
==
0
||
recv_vars
.
size
()
==
0
)
{
return
false
;
}
/**
* Check any of opvars contains `.block` and in sendvars
*/
auto
checker
=
[](
const
std
::
vector
<
std
::
string
>
&
opvars
,
const
std
::
vector
<
std
::
string
>
&
rpc_vars
)
->
bool
{
for
(
auto
&
var
:
opvars
)
{
// a variable name with the suffix `.block` means it's a splited
// variable by (DistributeTranspiler)
// [python/paddle/fluid/transpiler/distribute_transpiler.py]
if
(
var
.
find
(
".block"
)
!=
std
::
string
::
npos
&&
std
::
find
(
rpc_vars
.
begin
(),
rpc_vars
.
end
(),
var
)
!=
rpc_vars
.
end
())
{
return
true
;
}
}
return
false
;
};
std
::
vector
<
std
::
string
>
input_var_names
;
std
::
vector
<
std
::
string
>
output_var_names
;
for
(
ir
::
Node
*
input
:
node
->
inputs
)
{
input_var_names
.
push_back
(
input
->
Name
());
}
for
(
ir
::
Node
*
output
:
node
->
outputs
)
{
output_var_names
.
push_back
(
output
->
Name
());
}
return
checker
(
output_var_names
,
send_vars
)
||
checker
(
input_var_names
,
recv_vars
);
}
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
Graph
::
Graph
(
const
ProgramDesc
&
program
)
:
program_
(
program
)
{
// Make the nodes id start from 0.
// Make the nodes id start from 0.
Node
::
ResetId
();
Node
::
ResetId
();
auto
var_nodes
=
InitFromProgram
(
program_
);
ResolveHazard
(
var_nodes
);
}
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
Graph
::
InitFromProgram
(
const
ProgramDesc
&
program
)
{
VLOG
(
3
)
<<
"block in program:"
<<
program_
.
Size
();
VLOG
(
3
)
<<
"block in program:"
<<
program_
.
Size
();
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
// var nodes for each var name, will have multiple versions in SSA
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
var_nodes
;
for
(
auto
*
var
:
program
.
Block
(
0
).
AllVars
())
{
for
(
auto
*
var
:
program
.
Block
(
0
).
AllVars
())
{
all_vars
.
emplace
(
var
->
Name
(),
var
);
all_vars
.
emplace
(
var
->
Name
(),
var
);
}
}
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
var_nodes
;
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
ir
::
Node
*
node
=
CreateOpNode
(
op
);
ir
::
Node
*
node
=
CreateOpNode
(
op
);
// For input args, reuse the same var name if it was created before.
// For input args, reuse the same var name if it was created before.
...
@@ -134,7 +78,11 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
...
@@ -134,7 +78,11 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
var
->
inputs
.
push_back
(
node
);
var
->
inputs
.
push_back
(
node
);
}
}
}
}
return
std
::
move
(
var_nodes
);
}
void
Graph
::
ResolveHazard
(
const
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
&
var_nodes
)
{
/**
/**
* We should handle write after read(WAR) and write after write(WAW) here.
* We should handle write after read(WAR) and write after write(WAW) here.
* Because some of the operators of the program can be executed parallelly.
* Because some of the operators of the program can be executed parallelly.
...
@@ -153,6 +101,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
...
@@ -153,6 +101,7 @@ Graph::Graph(const ProgramDesc &program) : program_(program) {
auto
it_old
=
versions
.
rbegin
();
auto
it_old
=
versions
.
rbegin
();
++
it_old
;
++
it_old
;
for
(;
it_old
!=
versions
.
rend
();
it_new
=
it_old
,
++
it_old
)
{
for
(;
it_old
!=
versions
.
rend
();
it_new
=
it_old
,
++
it_old
)
{
VLOG
(
3
)
<<
"deal with var: "
<<
(
*
it_new
)
->
Name
();
ir
::
Node
*
write_op
=
ir
::
Node
*
write_op
=
(
*
it_new
)
->
inputs
.
empty
()
?
nullptr
:
(
*
it_new
)
->
inputs
[
0
];
(
*
it_new
)
->
inputs
.
empty
()
?
nullptr
:
(
*
it_new
)
->
inputs
[
0
];
const
auto
&
read_ops
=
(
*
it_old
)
->
outputs
;
const
auto
&
read_ops
=
(
*
it_old
)
->
outputs
;
...
...
paddle/fluid/framework/ir/graph.h
浏览文件 @
e0a89503
...
@@ -160,6 +160,12 @@ class Graph {
...
@@ -160,6 +160,12 @@ class Graph {
return
nullptr
;
return
nullptr
;
}
}
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
InitFromProgram
(
const
ProgramDesc
&
program
);
void
ResolveHazard
(
const
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
&
var_nodes
);
private:
private:
// This method takes ownership of `node`.
// This method takes ownership of `node`.
ir
::
Node
*
AddNode
(
ir
::
Node
*
node
)
{
ir
::
Node
*
AddNode
(
ir
::
Node
*
node
)
{
...
...
paddle/fluid/framework/ir/graph_helper.cc
浏览文件 @
e0a89503
...
@@ -120,19 +120,25 @@ size_t GraphNum(const Graph &graph) {
...
@@ -120,19 +120,25 @@ size_t GraphNum(const Graph &graph) {
std
::
deque
<
ir
::
Node
*>
q_nodes
;
std
::
deque
<
ir
::
Node
*>
q_nodes
;
std
::
vector
<
std
::
unordered_set
<
ir
::
Node
*>>
graph_nodes
;
std
::
vector
<
std
::
unordered_set
<
ir
::
Node
*>>
graph_nodes
;
std
::
unordered_set
<
ir
::
Node
*>
g_nodes
;
std
::
unordered_set
<
ir
::
Node
*>
g_nodes
;
// q_set used to record records in the queue.
std
::
unordered_set
<
ir
::
Node
*>
q_set
;
size_t
graph_count
=
0
;
size_t
graph_count
=
0
;
auto
traverse_nodes
=
[
&
visited_nodes
,
auto
traverse_nodes
=
[
&
visited_nodes
,
&
q_nodes
,
&
q_nodes
](
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
{
&
q_set
](
const
std
::
vector
<
ir
::
Node
*>
&
nodes
)
{
std
::
copy_if
(
for
(
auto
n
:
nodes
)
{
nodes
.
begin
(),
nodes
.
end
(),
std
::
back_inserter
(
q_nodes
),
if
(
visited_nodes
.
count
(
n
)
==
0
&&
q_set
.
count
(
n
)
==
0
)
{
[
&
visited_nodes
](
Node
*
node
)
{
return
!
visited_nodes
.
count
(
node
);
});
q_nodes
.
push_back
(
n
);
q_set
.
insert
(
n
);
}
}
};
};
while
(
visited_nodes
.
size
()
!=
nodes
.
size
())
{
while
(
visited_nodes
.
size
()
!=
nodes
.
size
())
{
if
(
!
q_nodes
.
empty
())
{
if
(
!
q_nodes
.
empty
())
{
auto
cur_node
=
q_nodes
.
front
();
auto
cur_node
=
q_nodes
.
front
();
q_nodes
.
pop_front
();
q_nodes
.
pop_front
();
q_set
.
erase
(
cur_node
);
visited_nodes
.
insert
(
cur_node
);
visited_nodes
.
insert
(
cur_node
);
g_nodes
.
insert
(
cur_node
);
g_nodes
.
insert
(
cur_node
);
traverse_nodes
(
cur_node
->
inputs
);
traverse_nodes
(
cur_node
->
inputs
);
...
@@ -146,6 +152,7 @@ size_t GraphNum(const Graph &graph) {
...
@@ -146,6 +152,7 @@ size_t GraphNum(const Graph &graph) {
for
(
auto
&
n
:
nodes
)
{
for
(
auto
&
n
:
nodes
)
{
if
(
visited_nodes
.
count
(
n
)
==
0
)
{
if
(
visited_nodes
.
count
(
n
)
==
0
)
{
q_nodes
.
push_back
(
n
);
q_nodes
.
push_back
(
n
);
q_set
.
insert
(
n
);
break
;
break
;
}
}
}
}
...
...
paddle/fluid/framework/ir/multi_batch_merge_pass.cc
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/framework/ir/multi_batch_merge_pass.h"
#include <map>
#include <string>
#include <vector>
#include "paddle/fluid/framework/ir/graph_helper.h"
#include "paddle/fluid/framework/op_proto_maker.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
static
const
char
kNumRepeats
[]
=
"num_repeats"
;
typedef
std
::
unordered_map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
SSAVarList
;
ir
::
Node
*
SameNameVar
(
std
::
unordered_set
<
ir
::
Node
*>
all
,
ir
::
Node
*
target
)
{
for
(
auto
n
:
all
)
{
if
(
target
->
IsVar
()
&&
target
->
Name
()
==
n
->
Name
())
{
return
n
;
}
}
return
nullptr
;
}
VarDesc
CopyVarDesc
(
VarDesc
*
var_desc
)
{
VarDesc
repeated_var
(
var_desc
->
Name
());
// copy other variable attributes
if
(
var_desc
->
GetType
()
!=
proto
::
VarType
::
READER
)
{
repeated_var
.
SetType
(
var_desc
->
GetType
());
repeated_var
.
SetShape
(
var_desc
->
GetShape
());
repeated_var
.
SetDataType
(
var_desc
->
GetDataType
());
repeated_var
.
SetLoDLevel
(
var_desc
->
GetLoDLevel
());
repeated_var
.
SetPersistable
(
var_desc
->
Persistable
());
}
else
{
// TODO(typhoonzero): copy reader var
}
return
repeated_var
;
}
VarDesc
UpdateGradVarDesc
(
VarDesc
*
var_desc
,
int
repeat
,
const
std
::
unordered_set
<
std
::
string
>&
grad_names
,
const
std
::
unordered_set
<
std
::
string
>&
bn_vars_need_rename
)
{
if
(
grad_names
.
find
(
var_desc
->
Name
())
!=
grad_names
.
end
()
||
bn_vars_need_rename
.
find
(
var_desc
->
Name
())
!=
bn_vars_need_rename
.
end
())
{
std
::
string
new_gname
=
string
::
Sprintf
(
"%s.repeat.%d"
,
var_desc
->
Name
(),
repeat
);
VarDesc
repeated_var
=
CopyVarDesc
(
var_desc
);
repeated_var
.
SetName
(
new_gname
);
VLOG
(
3
)
<<
"update "
<<
var_desc
->
Name
()
<<
" to repeat "
<<
repeat
;
return
repeated_var
;
}
return
*
var_desc
;
}
std
::
unique_ptr
<
Graph
>
BatchMergePass
::
ApplyImpl
(
std
::
unique_ptr
<
Graph
>
graph
)
const
{
int
num_repeats
=
Get
<
const
int
>
(
kNumRepeats
);
std
::
vector
<
Node
*>
forward_backward_ops
;
std
::
vector
<
Node
*>
optimize_ops
;
std
::
vector
<
Node
*>
lr_ops
;
// ops other than forward/backward/optimize
std
::
unordered_set
<
std
::
string
>
grad_names
;
std
::
vector
<
ir
::
Node
*>
nodes
=
TopologySortOperations
(
*
graph
);
auto
origin_nodes
=
graph
->
ReleaseNodes
();
VLOG
(
3
)
<<
"origin nodes count: "
<<
origin_nodes
.
size
();
ir
::
Graph
&
result
=
*
graph
;
// 1. record op nodes of different roles
for
(
auto
node
:
nodes
)
{
if
(
node
->
IsVar
())
continue
;
int
op_role
=
boost
::
get
<
int
>
(
node
->
Op
()
->
GetAttr
(
framework
::
OpProtoAndCheckerMaker
::
OpRoleAttrName
()));
if
((
op_role
==
static_cast
<
int
>
(
framework
::
OpRole
::
kForward
))
||
(
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kBackward
))
||
(
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kLoss
)))
{
forward_backward_ops
.
push_back
(
node
);
}
else
if
((
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kOptimize
))
||
(
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kDist
))
||
(
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kRPC
)))
{
optimize_ops
.
push_back
(
node
);
auto
op_role_var
=
node
->
Op
()
->
GetNullableAttr
(
OpProtoAndCheckerMaker
::
OpRoleVarAttrName
());
auto
op_role_vars
=
boost
::
get
<
std
::
vector
<
std
::
string
>>
(
op_role_var
);
for
(
size_t
i
=
0
;
i
<
op_role_vars
.
size
();
i
+=
2
)
{
grad_names
.
insert
(
op_role_vars
[
i
+
1
]);
}
}
else
if
(
op_role
&
static_cast
<
int
>
(
framework
::
OpRole
::
kLRSched
))
{
lr_ops
.
push_back
(
node
);
}
else
{
// NOLINT
PADDLE_THROW
(
"Invalid op_role: %d"
,
static_cast
<
int
>
(
op_role
));
}
}
// 2. copy forward backward
ir
::
Node
*
prev_repeat_last_op_node
=
nullptr
;
// record origin_grad -> repeated grad list map.
std
::
map
<
ir
::
Node
*
,
std
::
vector
<
ir
::
Node
*>>
grad_repeated_map
;
std
::
map
<
std
::
string
,
std
::
vector
<
ir
::
Node
*>>
created
;
std
::
unordered_set
<
std
::
string
>
bn_vars_need_rename
;
for
(
int
i
=
0
;
i
<
num_repeats
;
++
i
)
{
std
::
unordered_set
<
ir
::
Node
*>
copied
;
for
(
size_t
node_idx
=
0
;
node_idx
<
forward_backward_ops
.
size
();
++
node_idx
)
{
auto
node
=
forward_backward_ops
[
node_idx
];
OpDesc
repeated_op
(
*
(
node
->
Op
()),
node
->
Op
()
->
Block
());
// 3. rename grad outputs to current repeat.
for
(
auto
outname
:
repeated_op
.
OutputArgumentNames
())
{
if
(
grad_names
.
find
(
outname
)
!=
grad_names
.
end
())
{
std
::
string
new_gname
=
string
::
Sprintf
(
"%s.repeat.%d"
,
outname
,
i
);
repeated_op
.
RenameOutput
(
outname
,
new_gname
);
}
}
// 3.5 let batch_norm ops use independent vars, note batch_norm_grad do
// not need this update
if
(
node
->
Name
()
==
"batch_norm"
)
{
// NOTE: assume bn op created by layers use save var as output mean and
// variance
std
::
string
new_mean_name
=
string
::
Sprintf
(
"%s.repeat.%d"
,
repeated_op
.
Input
(
"Mean"
)[
0
],
i
);
std
::
string
new_var_name
=
string
::
Sprintf
(
"%s.repeat.%d"
,
repeated_op
.
Input
(
"Variance"
)[
0
],
i
);
bn_vars_need_rename
.
insert
(
repeated_op
.
Input
(
"Mean"
)[
0
]);
bn_vars_need_rename
.
insert
(
repeated_op
.
Input
(
"Variance"
)[
0
]);
VLOG
(
3
)
<<
"renaming "
<<
repeated_op
.
Input
(
"Mean"
)[
0
]
<<
" to "
<<
new_mean_name
;
repeated_op
.
RenameInput
(
repeated_op
.
Input
(
"Mean"
)[
0
],
new_mean_name
);
repeated_op
.
RenameInput
(
repeated_op
.
Input
(
"Variance"
)[
0
],
new_var_name
);
repeated_op
.
RenameOutput
(
repeated_op
.
Output
(
"MeanOut"
)[
0
],
new_mean_name
);
repeated_op
.
RenameOutput
(
repeated_op
.
Output
(
"VarianceOut"
)[
0
],
new_var_name
);
}
// 3.9 do copy
auto
repeated_node
=
result
.
CreateOpNode
(
&
repeated_op
);
copied
.
insert
(
node
);
// 4. add deps between repeats
if
(
node_idx
==
forward_backward_ops
.
size
()
-
1
)
{
prev_repeat_last_op_node
=
repeated_node
;
}
if
(
node_idx
==
0
&&
prev_repeat_last_op_node
)
{
auto
*
depvar
=
result
.
CreateControlDepVar
();
prev_repeat_last_op_node
->
outputs
.
push_back
(
depvar
);
depvar
->
inputs
.
push_back
(
prev_repeat_last_op_node
);
repeated_node
->
inputs
.
push_back
(
depvar
);
depvar
->
outputs
.
push_back
(
repeated_node
);
}
for
(
auto
in_node
:
node
->
inputs
)
{
if
(
in_node
->
IsCtrlVar
())
{
continue
;
}
ir
::
Node
*
var
=
nullptr
;
auto
updated_var
=
UpdateGradVarDesc
(
in_node
->
Var
(),
i
,
grad_names
,
bn_vars_need_rename
);
// should be initialized by startup, how to initilize tensor in the
// scope?
if
(
node
->
Name
()
==
"batch_norm"
&&
bn_vars_need_rename
.
find
(
in_node
->
Name
())
!=
bn_vars_need_rename
.
end
())
{
// Create bn mean/variance for each repeat
var
=
result
.
CreateVarNode
(
&
updated_var
);
created
[
updated_var
.
Name
()].
push_back
(
var
);
copied
.
insert
(
in_node
);
repeated_node
->
inputs
.
push_back
(
var
);
var
->
outputs
.
push_back
(
repeated_node
);
continue
;
}
// for other ops
if
(
in_node
->
inputs
.
empty
()
&&
i
>
0
)
{
// do not copy head vars (inputs, params) in repeats > 0
var
=
created
.
at
(
in_node
->
Name
()).
back
();
}
else
{
if
(
copied
.
find
(
in_node
)
==
copied
.
end
())
{
var
=
result
.
CreateVarNode
(
&
updated_var
);
if
(
grad_names
.
find
(
in_node
->
Var
()
->
Name
())
!=
grad_names
.
end
())
{
grad_repeated_map
[
in_node
].
push_back
(
var
);
}
copied
.
insert
(
in_node
);
created
[
updated_var
.
Name
()].
push_back
(
var
);
}
else
{
var
=
created
.
at
(
updated_var
.
Name
()).
back
();
}
}
repeated_node
->
inputs
.
push_back
(
var
);
var
->
outputs
.
push_back
(
repeated_node
);
}
for
(
auto
out_node
:
node
->
outputs
)
{
if
(
out_node
->
IsCtrlVar
())
{
continue
;
}
ir
::
Node
*
var
=
nullptr
;
auto
updated_var
=
UpdateGradVarDesc
(
out_node
->
Var
(),
i
,
grad_names
,
bn_vars_need_rename
);
if
(
copied
.
find
(
out_node
)
==
copied
.
end
())
{
var
=
result
.
CreateVarNode
(
&
updated_var
);
if
(
grad_names
.
find
(
out_node
->
Var
()
->
Name
())
!=
grad_names
.
end
())
{
grad_repeated_map
[
out_node
].
push_back
(
var
);
}
copied
.
insert
(
out_node
);
created
[
updated_var
.
Name
()].
push_back
(
var
);
}
else
{
var
=
created
.
at
(
updated_var
.
Name
()).
back
();
}
repeated_node
->
outputs
.
push_back
(
var
);
var
->
inputs
.
push_back
(
repeated_node
);
}
}
}
// 5. create GRAD merge op node
for
(
auto
kv
:
grad_repeated_map
)
{
OpDesc
sum_op
;
sum_op
.
SetType
(
"sum"
);
std
::
vector
<
std
::
string
>
repeated_grad_names
;
for
(
auto
r
:
kv
.
second
)
{
repeated_grad_names
.
push_back
(
r
->
Var
()
->
Name
());
}
sum_op
.
SetInput
(
"X"
,
repeated_grad_names
);
sum_op
.
SetOutput
(
"Out"
,
{
kv
.
first
->
Var
()
->
Name
()});
sum_op
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
static_cast
<
int
>
(
OpRole
::
kBackward
));
auto
sum_op_node
=
result
.
CreateOpNode
(
&
sum_op
);
for
(
auto
r
:
kv
.
second
)
{
sum_op_node
->
inputs
.
push_back
(
r
);
r
->
outputs
.
push_back
(
sum_op_node
);
}
auto
sum_out_var_node
=
result
.
CreateVarNode
(
kv
.
first
->
Var
());
sum_op_node
->
outputs
.
push_back
(
sum_out_var_node
);
sum_out_var_node
->
inputs
.
push_back
(
sum_op_node
);
created
[
sum_out_var_node
->
Name
()].
push_back
(
sum_out_var_node
);
OpDesc
scale_op
;
scale_op
.
SetType
(
"scale"
);
scale_op
.
SetInput
(
"X"
,
{
sum_out_var_node
->
Var
()
->
Name
()});
// NOTE: inplace scale.
scale_op
.
SetOutput
(
"Out"
,
{
sum_out_var_node
->
Var
()
->
Name
()});
scale_op
.
SetAttr
(
"scale"
,
static_cast
<
float
>
(
1.0
f
/
num_repeats
));
scale_op
.
SetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
(),
static_cast
<
int
>
(
OpRole
::
kBackward
));
auto
scale_op_node
=
result
.
CreateOpNode
(
&
scale_op
);
scale_op_node
->
inputs
.
push_back
(
sum_out_var_node
);
sum_out_var_node
->
outputs
.
push_back
(
scale_op_node
);
auto
scale_out_var_node
=
result
.
CreateVarNode
(
sum_out_var_node
->
Var
());
scale_op_node
->
outputs
.
push_back
(
scale_out_var_node
);
scale_out_var_node
->
inputs
.
push_back
(
scale_op_node
);
created
[
scale_out_var_node
->
Name
()].
push_back
(
scale_out_var_node
);
}
// 6. add optimize ops
{
auto
copy_node
=
[
&
result
,
&
created
](
ir
::
Node
*
node
)
{
auto
op_node
=
result
.
CreateOpNode
(
node
->
Op
());
// copy op ins/outs
// NOTE: for send/recv ops, the OpDesc uses ctrldepvar to describe
// dependencies, so create those depvars if OpDesc have in/outs.
for
(
auto
in_node
:
node
->
inputs
)
{
if
(
in_node
->
IsCtrlVar
()
&&
!
in_node
->
Var
())
{
continue
;
}
ir
::
Node
*
var
=
nullptr
;
if
(
created
.
find
(
in_node
->
Name
())
==
created
.
end
())
{
var
=
result
.
CreateVarNode
(
in_node
->
Var
());
created
[
in_node
->
Name
()].
push_back
(
var
);
}
else
{
var
=
created
.
at
(
in_node
->
Name
()).
back
();
}
op_node
->
inputs
.
push_back
(
var
);
var
->
outputs
.
push_back
(
op_node
);
}
for
(
auto
out_node
:
node
->
outputs
)
{
if
(
out_node
->
IsCtrlVar
()
&&
!
out_node
->
Var
())
{
continue
;
}
auto
var
=
result
.
CreateVarNode
(
out_node
->
Var
());
created
[
out_node
->
Name
()].
push_back
(
var
);
op_node
->
outputs
.
push_back
(
var
);
var
->
inputs
.
push_back
(
op_node
);
}
};
for
(
auto
node
:
lr_ops
)
{
copy_node
(
node
);
}
for
(
auto
node
:
optimize_ops
)
{
copy_node
(
node
);
}
}
result
.
ResolveHazard
(
created
);
return
graph
;
}
}
// namespace ir
}
// namespace framework
}
// namespace paddle
REGISTER_PASS
(
multi_batch_merge_pass
,
paddle
::
framework
::
ir
::
BatchMergePass
)
.
RequirePassAttr
(
paddle
::
framework
::
ir
::
kNumRepeats
);
paddle/fluid/framework/ir/multi_batch_merge_pass.h
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/ir/fuse_pass_base.h"
#include "paddle/fluid/framework/ir/graph.h"
#include "paddle/fluid/framework/ir/pass.h"
namespace
paddle
{
namespace
framework
{
namespace
ir
{
// BatchMergePass is used to copy forward and backward ops for several
// times to run several batches to simulate large batch size training
// as if we have more than 1 GPUs.
// User can define how many batches to run, gradients will be merged
// through those repeats, and then do optimization using merged gradients.
// This pass is extremely useful when doing large batch-size distributed
// sync training, we can simulate even large batch size as if we have more
// GPUs.
class
BatchMergePass
:
public
Pass
{
public:
virtual
~
BatchMergePass
()
{}
protected:
std
::
unique_ptr
<
Graph
>
ApplyImpl
(
std
::
unique_ptr
<
Graph
>
graph
)
const
override
;
};
}
// namespace ir
}
// namespace framework
}
// namespace paddle
paddle/fluid/framework/ir/node.h
浏览文件 @
e0a89503
...
@@ -44,6 +44,7 @@ class Node {
...
@@ -44,6 +44,7 @@ class Node {
return
op_desc_
.
get
();
return
op_desc_
.
get
();
}
}
// Please don't use this API!
int
id
()
const
{
return
id_
;
}
int
id
()
const
{
return
id_
;
}
bool
IsOp
()
const
{
return
type_
==
Type
::
kOperation
;
}
bool
IsOp
()
const
{
return
type_
==
Type
::
kOperation
;
}
...
@@ -92,6 +93,7 @@ class Node {
...
@@ -92,6 +93,7 @@ class Node {
Node
()
=
delete
;
Node
()
=
delete
;
static
int
count_
;
static
int
count_
;
// Please don't use this API or make this public.
static
void
ResetId
()
{
count_
=
0
;
}
static
void
ResetId
()
{
count_
=
0
;
}
DISABLE_COPY_AND_ASSIGN
(
Node
);
DISABLE_COPY_AND_ASSIGN
(
Node
);
};
};
...
...
paddle/fluid/framework/lod_tensor_array.h
浏览文件 @
e0a89503
...
@@ -18,6 +18,82 @@ limitations under the License. */
...
@@ -18,6 +18,82 @@ limitations under the License. */
namespace
paddle
{
namespace
paddle
{
namespace
framework
{
namespace
framework
{
// NOTE The vector<LoDTensor> can't be replaced with the class LoDTensorArray
// directly, because there are many vector<LoDTensor> used accross the project,
// and some of them are treated as LoDTensorArray.
#if !defined(PADDLE_ON_INFERENCE)
using
LoDTensorArray
=
std
::
vector
<
LoDTensor
>
;
using
LoDTensorArray
=
std
::
vector
<
LoDTensor
>
;
}
#else // !PADDLE_ON_INFERENCE
#pragma message "LoDTensorArray is replaced with the inference one."
/*
* A LoDTensorArray which will not deallocate buffer when resized, fix the data
* diff in inference, and more performance friendly in the concurrency
* scenerios.
*/
class
LoDTensorArray
{
public:
LoDTensorArray
()
=
default
;
using
iterator
=
std
::
vector
<
LoDTensor
>::
iterator
;
using
const_iterator
=
std
::
vector
<
LoDTensor
>::
const_iterator
;
const_iterator
begin
()
const
{
return
array_
.
begin
();
}
const_iterator
end
()
const
{
return
array_
.
begin
()
+
size_
;
}
iterator
begin
()
{
return
array_
.
begin
();
}
iterator
end
()
{
return
array_
.
begin
()
+
size_
;
}
void
push_back
(
const
LoDTensor
&
x
)
{
if
(
size_
<
array_
.
size
())
{
array_
[
size_
++
]
=
x
;
}
else
{
array_
.
push_back
(
x
);
++
size_
;
}
}
void
resize
(
size_t
size
)
{
if
(
array_
.
size
()
<
size
)
{
array_
.
resize
(
size
);
}
size_
=
size
;
}
void
emplace_back
()
{
array_
.
emplace_back
();
}
void
emplace_back
(
LoDTensor
&&
x
)
{
array_
.
emplace_back
(
std
::
move
(
x
));
}
LoDTensor
&
back
()
{
return
array_
.
back
();
}
size_t
space
()
const
{
return
array_
.
size
();
}
void
reserve
(
size_t
size
)
{
// Naive warning to tell user this array might be to large. The memory and
// buffer used by this TensorArray will not be deleted during the training
// and inference phase, so attention not to make it expand too long.
if
(
size
>
800UL
)
{
LOG
(
WARNING
)
<<
"TensorArray has more than 800 items"
;
}
array_
.
reserve
(
size
);
}
bool
empty
()
const
{
return
size_
==
0UL
;
}
void
clear
()
{
size_
=
0UL
;
}
LoDTensor
&
operator
[](
size_t
id
)
{
return
array_
[
id
];
}
const
LoDTensor
&
operator
[](
size_t
id
)
const
{
return
array_
[
id
];
}
LoDTensor
&
at
(
size_t
id
)
{
return
array_
.
at
(
id
);
}
const
LoDTensor
&
at
(
size_t
id
)
const
{
return
array_
.
at
(
id
);
}
size_t
size
()
const
{
return
size_
;
}
private:
size_t
size_
{
0
};
std
::
vector
<
LoDTensor
>
array_
;
};
#endif // !PADDLE_ON_INFERENCE
}
// namespace framework
}
// namespace paddle
}
// namespace paddle
paddle/fluid/framework/op_desc.h
浏览文件 @
e0a89503
...
@@ -121,10 +121,6 @@ class OpDesc {
...
@@ -121,10 +121,6 @@ class OpDesc {
BlockDesc
*
Block
()
{
return
this
->
block_
;
}
BlockDesc
*
Block
()
{
return
this
->
block_
;
}
const
BlockDesc
&
BlockRef
()
const
{
return
*
this
->
block_
;
}
void
SetBlock
(
BlockDesc
*
block
)
{
this
->
block_
=
block
;
}
private:
private:
template
<
typename
MapType
>
template
<
typename
MapType
>
static
std
::
vector
<
typename
MapType
::
key_type
>
MapKeys
(
const
MapType
&
map
)
{
static
std
::
vector
<
typename
MapType
::
key_type
>
MapKeys
(
const
MapType
&
map
)
{
...
...
paddle/fluid/framework/op_proto_maker.h
浏览文件 @
e0a89503
...
@@ -28,12 +28,12 @@ enum class OpRole {
...
@@ -28,12 +28,12 @@ enum class OpRole {
kBackward
=
0x0001
,
kBackward
=
0x0001
,
kOptimize
=
0x0002
,
kOptimize
=
0x0002
,
// RPC role is for send/recv releated op
// RPC role is for send/recv releated op
kRPC
=
0x000
3
,
kRPC
=
0x000
4
,
// Dist role is for split_byref/split_selected_rows/concat
// Dist role is for split_byref/split_selected_rows/concat
// used for distributed training.
// used for distributed training.
kDist
=
0x000
4
,
kDist
=
0x000
8
,
// Tag all learning rate scheduler operators.
// Tag all learning rate scheduler operators.
kLRSched
=
0x00
05
,
kLRSched
=
0x00
16
,
kLoss
=
0x0100
,
kLoss
=
0x0100
,
// The default value of op's role. This should be only used for unittests and
// The default value of op's role. This should be only used for unittests and
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
e0a89503
...
@@ -109,18 +109,9 @@ ParallelExecutor::ParallelExecutor(
...
@@ -109,18 +109,9 @@ ParallelExecutor::ParallelExecutor(
if
(
member_
->
local_scopes_
.
size
()
!=
1
&&
local_scopes
.
empty
())
{
if
(
member_
->
local_scopes_
.
size
()
!=
1
&&
local_scopes
.
empty
())
{
BCastParamsToDevices
(
bcast_vars
);
BCastParamsToDevices
(
bcast_vars
);
}
}
// Startup Program has been run. All local scopes has correct parameters.
// Startup Program has been run. All local scopes has correct parameters.
// Step 2. Create vars in each scope;
// Step 2. Convert main_program to SSA form and dependency graph. Also, insert
std
::
vector
<
details
::
VariableInfo
>
var_infos
;
for
(
auto
*
var
:
main_program
.
Block
(
0
).
AllVars
())
{
var_infos
.
emplace_back
();
var_infos
.
back
().
name_
=
var
->
Name
();
var_infos
.
back
().
type_
=
var
->
GetType
();
var_infos
.
back
().
persistable_
=
var
->
Persistable
();
}
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
// ncclOp
#ifdef PADDLE_WITH_CUDA
#ifdef PADDLE_WITH_CUDA
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
std
::
unique_ptr
<
ir
::
Graph
>
graph
=
build_strategy
.
Apply
(
...
@@ -156,6 +147,23 @@ ParallelExecutor::ParallelExecutor(
...
@@ -156,6 +147,23 @@ ParallelExecutor::ParallelExecutor(
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
params
,
member_
->
local_scopes_
,
member_
->
use_cuda_
);
#endif
#endif
// Step 3. Create vars in each scope. Passes may also create new vars.
// skip control vars and empty vars
std
::
vector
<
details
::
VariableInfo
>
var_infos
;
for
(
auto
&
node
:
graph
->
Nodes
())
{
if
(
node
->
IsVar
()
&&
!
node
->
IsCtrlVar
()
&&
node
->
Var
())
{
var_infos
.
emplace_back
();
var_infos
.
back
().
name_
=
node
->
Var
()
->
Name
();
var_infos
.
back
().
type_
=
node
->
Var
()
->
GetType
();
var_infos
.
back
().
persistable_
=
node
->
Var
()
->
Persistable
();
}
}
// If the loss_var_name is given, the number of graph should be only one.
if
(
loss_var_name
.
size
())
{
PADDLE_ENFORCE_EQ
(
ir
::
GraphNum
(
*
graph
),
1
,
"The number of graph should be only one"
);
}
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
if
(
exec_strategy
.
type_
==
ExecutionStrategy
::
kDefault
)
{
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
exec_strategy
,
member_
->
local_scopes_
,
places
,
std
::
move
(
graph
)));
...
...
paddle/fluid/framework/scope.h
浏览文件 @
e0a89503
...
@@ -78,6 +78,8 @@ class Scope {
...
@@ -78,6 +78,8 @@ class Scope {
/// Drop all kids scopes belonged to this scope.
/// Drop all kids scopes belonged to this scope.
void
DropKids
();
void
DropKids
();
std
::
list
<
Scope
*>&
kids
()
const
{
return
kids_
;
}
/// Find if a scope exists in the kid scopes
/// Find if a scope exists in the kid scopes
bool
HasKid
(
const
Scope
*
scope
)
const
;
bool
HasKid
(
const
Scope
*
scope
)
const
;
...
...
paddle/fluid/inference/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -30,7 +30,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
...
@@ -30,7 +30,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
endif
()
endif
()
# Create static library
# Create static library
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor
)
cc_library
(
paddle_fluid DEPS
${
fluid_modules
}
${
STATIC_INFERENCE_APIS
}
zero_copy_tensor
reset_tensor_array
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
# TODO(liuyiqu: Temporarily disable the link flag because it is not support on Mac.
...
@@ -40,7 +40,7 @@ endif()
...
@@ -40,7 +40,7 @@ endif()
# Create shared library
# Create shared library
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
cc_library
(
paddle_fluid_shared SHARED SRCS
${
SHARED_INFERENCE_SRCS
}
DEPS
${
fluid_modules
}
paddle_fluid_api
)
DEPS
${
fluid_modules
}
paddle_fluid_api
reset_tensor_array
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
set_target_properties
(
paddle_fluid_shared PROPERTIES OUTPUT_NAME paddle_fluid
)
if
(
NOT APPLE
)
if
(
NOT APPLE
)
...
...
paddle/fluid/inference/analysis/analyzer.cc
浏览文件 @
e0a89503
...
@@ -107,6 +107,9 @@ void Analyzer::Run(Argument* argument) {
...
@@ -107,6 +107,9 @@ void Analyzer::Run(Argument* argument) {
passes
.
push_back
(
"mkldnn_placement_pass"
);
passes
.
push_back
(
"mkldnn_placement_pass"
);
}
}
#endif
#endif
// infer_clean_graph_pass should be the first default pass
// after mkldnn_placement_pass.
passes
.
push_back
(
"infer_clean_graph_pass"
);
for
(
auto
&
pass
:
ir_passes_
)
{
for
(
auto
&
pass
:
ir_passes_
)
{
if
(
!
disabled_ir_passes_
.
count
(
pass
))
{
if
(
!
disabled_ir_passes_
.
count
(
pass
))
{
passes
.
push_back
(
pass
);
passes
.
push_back
(
pass
);
...
...
paddle/fluid/inference/analysis/analyzer.h
浏览文件 @
e0a89503
...
@@ -67,7 +67,6 @@ class Analyzer : public OrderedRegistry<PassManager> {
...
@@ -67,7 +67,6 @@ class Analyzer : public OrderedRegistry<PassManager> {
// larger fusion.
// larger fusion.
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
const
std
::
vector
<
std
::
string
>
all_ir_passes_
{{
// Manual update the passes here.
// Manual update the passes here.
"infer_clean_graph_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"attention_lstm_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
"seqconv_eltadd_relu_fuse_pass"
,
//
"embedding_fc_lstm_fuse_pass"
,
//
"embedding_fc_lstm_fuse_pass"
,
//
...
...
paddle/fluid/inference/api/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -18,7 +18,8 @@ if(APPLE)
...
@@ -18,7 +18,8 @@ if(APPLE)
endif
(
APPLE
)
endif
(
APPLE
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor
${
GLOB_PASS_LIB
}
)
set
(
inference_deps paddle_inference_api paddle_fluid_api analysis pass ir_pass_manager naive_executor
${
GLOB_PASS_LIB
}
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
if
(
WITH_GPU AND TENSORRT_FOUND
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine analysis_predictor
)
set
(
inference_deps
${
inference_deps
}
paddle_inference_tensorrt_subgraph_engine analysis_predictor
)
...
@@ -31,10 +32,17 @@ function(inference_api_test TARGET_NAME)
...
@@ -31,10 +32,17 @@ function(inference_api_test TARGET_NAME)
set
(
multiValueArgs ARGS
)
set
(
multiValueArgs ARGS
)
cmake_parse_arguments
(
inference_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cmake_parse_arguments
(
inference_test
"
${
options
}
"
"
${
oneValueArgs
}
"
"
${
multiValueArgs
}
"
${
ARGN
}
)
cc_test
(
${
TARGET_NAME
}
if
(
WITH_GPU
)
SRCS
${
inference_test_SRC
}
cc_test
(
${
TARGET_NAME
}
DEPS
"
${
inference_deps
}
"
SRCS
${
inference_test_SRC
}
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book/
)
DEPS
"
${
inference_deps
}
"
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book/ --fraction_of_gpu_memory_to_use=0.15
)
else
()
cc_test
(
${
TARGET_NAME
}
SRCS
${
inference_test_SRC
}
DEPS
"
${
inference_deps
}
"
ARGS --dirname=
${
PYTHON_TESTS_DIR
}
/book/
)
endif
()
if
(
inference_test_ARGS
)
if
(
inference_test_ARGS
)
set_tests_properties
(
${
TARGET_NAME
}
set_tests_properties
(
${
TARGET_NAME
}
PROPERTIES DEPENDS
"
${
inference_test_ARGS
}
"
)
PROPERTIES DEPENDS
"
${
inference_test_ARGS
}
"
)
...
@@ -42,7 +50,8 @@ function(inference_api_test TARGET_NAME)
...
@@ -42,7 +50,8 @@ function(inference_api_test TARGET_NAME)
endif
(
WITH_TESTING
)
endif
(
WITH_TESTING
)
endfunction
(
inference_api_test
)
endfunction
(
inference_api_test
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS lod_tensor scope
)
cc_library
(
reset_tensor_array SRCS details/reset_tensor_array.cc DEPS lod_tensor scope
)
cc_library
(
paddle_inference_api SRCS api.cc api_impl.cc helper.cc DEPS reset_tensor_array lod_tensor scope
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
analysis_predictor SRCS analysis_predictor.cc DEPS paddle_inference_api analysis naive_executor zero_copy_tensor
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor SRCS details/zero_copy_tensor.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
cc_library
(
zero_copy_tensor_dummy SRCS details/zero_copy_tensor_dummy.cc DEPS paddle_inference_api
)
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
e0a89503
...
@@ -82,6 +82,7 @@ bool AnalysisPredictor::Init(
...
@@ -82,6 +82,7 @@ bool AnalysisPredictor::Init(
// Get the feed_target_names and fetch_target_names
// Get the feed_target_names and fetch_target_names
PrepareFeedFetch
();
PrepareFeedFetch
();
return
true
;
return
true
;
}
}
...
@@ -109,6 +110,10 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -109,6 +110,10 @@ bool AnalysisPredictor::Run(const std::vector<PaddleTensor> &inputs,
return
false
;
return
false
;
}
}
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
tensor_array_batch_cleaner_
.
ResetTensorArray
();
return
true
;
return
true
;
}
}
...
@@ -322,6 +327,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
...
@@ -322,6 +327,9 @@ std::unique_ptr<ZeroCopyTensor> AnalysisPredictor::GetOutputTensor(
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
bool
AnalysisPredictor
::
ZeroCopyRun
()
{
executor_
->
Run
();
executor_
->
Run
();
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
tensor_array_batch_cleaner_
.
ResetTensorArray
();
return
true
;
return
true
;
}
}
...
...
paddle/fluid/inference/api/analysis_predictor.h
浏览文件 @
e0a89503
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/analysis/analyzer.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/string/printf.h"
#include "paddle/fluid/string/printf.h"
...
@@ -88,6 +89,7 @@ class AnalysisPredictor : public PaddlePredictor {
...
@@ -88,6 +89,7 @@ class AnalysisPredictor : public PaddlePredictor {
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// Memory buffer for feed inputs. The temporary LoDTensor will cause serious
// concurrency problems, so cache them.
// concurrency problems, so cache them.
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
std
::
vector
<
framework
::
LoDTensor
>
feed_tensors_
;
details
::
TensorArrayBatchCleaner
tensor_array_batch_cleaner_
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/api_impl.cc
浏览文件 @
e0a89503
...
@@ -22,6 +22,7 @@ limitations under the License. */
...
@@ -22,6 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/framework/feed_fetch_method.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/api_impl.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/inference/api/helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/cpu_helper.h"
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/platform/profiler.h"
...
@@ -157,6 +158,10 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
...
@@ -157,6 +158,10 @@ bool NativePaddlePredictor::Run(const std::vector<PaddleTensor> &inputs,
return
false
;
return
false
;
}
}
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
VLOG
(
3
)
<<
"predict cost: "
<<
timer
.
toc
()
<<
"ms"
;
// Fix TensorArray reuse not cleaned bug.
tensor_array_batch_cleaner_
.
CollectTensorArrays
(
scope_
.
get
());
tensor_array_batch_cleaner_
.
ResetTensorArray
();
return
true
;
return
true
;
}
}
...
...
paddle/fluid/inference/api/api_impl.h
浏览文件 @
e0a89503
...
@@ -26,11 +26,11 @@ limitations under the License. */
...
@@ -26,11 +26,11 @@ limitations under the License. */
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/framework/naive_executor.h"
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/inference/io.h"
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/init.h"
...
@@ -77,6 +77,7 @@ class NativePaddlePredictor : public PaddlePredictor {
...
@@ -77,6 +77,7 @@ class NativePaddlePredictor : public PaddlePredictor {
std
::
vector
<
framework
::
OpDesc
*>
fetchs_
;
std
::
vector
<
framework
::
OpDesc
*>
fetchs_
;
// Do not use unique_ptr, use parent scope to delete
// Do not use unique_ptr, use parent scope to delete
framework
::
Scope
*
sub_scope_
{
nullptr
};
framework
::
Scope
*
sub_scope_
{
nullptr
};
details
::
TensorArrayBatchCleaner
tensor_array_batch_cleaner_
;
};
};
}
// namespace paddle
}
// namespace paddle
paddle/fluid/inference/api/demo_ci/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -52,6 +52,7 @@ include_directories("${PADDLE_LIB}")
...
@@ -52,6 +52,7 @@ include_directories("${PADDLE_LIB}")
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/xxhash/include"
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
...
@@ -61,8 +62,8 @@ endif(NOT WIN32)
...
@@ -61,8 +62,8 @@ endif(NOT WIN32)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/boost"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/eigen3"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/eigen3"
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
if
(
USE_TENSORRT AND WITH_GPU
)
if
(
USE_TENSORRT AND WITH_GPU
)
include_directories
(
"
${
TENSORRT_INCLUDE_DIR
}
"
)
include_directories
(
"
${
TENSORRT_INCLUDE_DIR
}
"
)
link_directories
(
"
${
TENSORRT_LIB_DIR
}
"
)
link_directories
(
"
${
TENSORRT_LIB_DIR
}
"
)
endif
()
endif
()
...
@@ -77,13 +78,14 @@ endif(NOT WIN32)
...
@@ -77,13 +78,14 @@ endif(NOT WIN32)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/xxhash/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/paddle/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/paddle/lib"
)
add_executable
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
add_executable
(
${
DEMO_NAME
}
${
DEMO_NAME
}
.cc
)
if
(
WITH_MKL
)
if
(
WITH_MKL
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/mklml/include"
)
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
set
(
MATH_LIB
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libmklml_intel
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
${
PADDLE_LIB
}
/third_party/install/mklml/lib/libiomp5
${
CMAKE_SHARED_LIBRARY_SUFFIX
}
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
set
(
MKLDNN_PATH
"
${
PADDLE_LIB
}
/third_party/install/mkldnn"
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
if
(
EXISTS
${
MKLDNN_PATH
}
)
...
@@ -107,7 +109,7 @@ if (NOT WIN32)
...
@@ -107,7 +109,7 @@ if (NOT WIN32)
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
EXTERNAL_LIB
"-lrt -ldl -lpthread"
)
set
(
DEPS
${
DEPS
}
set
(
DEPS
${
DEPS
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
glog gflags protobuf snappystream snappy z
glog gflags protobuf snappystream snappy z
xxhash
${
EXTERNAL_LIB
}
)
${
EXTERNAL_LIB
}
)
else
()
else
()
set
(
DEPS
${
DEPS
}
set
(
DEPS
${
DEPS
}
...
@@ -120,7 +122,7 @@ endif(NOT WIN32)
...
@@ -120,7 +122,7 @@ endif(NOT WIN32)
if
(
WITH_GPU
)
if
(
WITH_GPU
)
if
(
NOT WIN32
)
if
(
NOT WIN32
)
if
(
USE_TENSORRT
)
if
(
USE_TENSORRT
)
set
(
DEPS
${
DEPS
}
${
TENSORRT_LIB_DIR
}
/libnvinfer
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
TENSORRT_LIB_DIR
}
/libnvinfer
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
TENSORRT_LIB_DIR
}
/libnvinfer_plugin
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
set
(
DEPS
${
DEPS
}
${
TENSORRT_LIB_DIR
}
/libnvinfer_plugin
${
CMAKE_STATIC_LIBRARY_SUFFIX
}
)
endif
()
endif
()
...
...
paddle/fluid/inference/api/demo_ci/run.sh
浏览文件 @
e0a89503
...
@@ -16,7 +16,7 @@ if [ $2 == ON ]; then
...
@@ -16,7 +16,7 @@ if [ $2 == ON ]; then
fi
fi
if
[
$3
==
ON
]
;
then
if
[
$3
==
ON
]
;
then
use_gpu_list
=
'true false'
use_gpu_list
=
'true false'
else
else
use_gpu_list
=
'false'
use_gpu_list
=
'false'
fi
fi
...
@@ -60,7 +60,8 @@ for WITH_STATIC_LIB in ON OFF; do
...
@@ -60,7 +60,8 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_MKL
=
$TURN_ON_MKL
\
-DWITH_MKL
=
$TURN_ON_MKL
\
-DDEMO_NAME
=
simple_on_word2vec
\
-DDEMO_NAME
=
simple_on_word2vec
\
-DWITH_GPU
=
$TEST_GPU_CPU
\
-DWITH_GPU
=
$TEST_GPU_CPU
\
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
\
-DON_INFER
=
ON
make
-j
make
-j
word2vec_model
=
${
PADDLE_ROOT
}
'/build/python/paddle/fluid/tests/book/word2vec.inference.model'
word2vec_model
=
${
PADDLE_ROOT
}
'/build/python/paddle/fluid/tests/book/word2vec.inference.model'
if
[
-d
$word2vec_model
]
;
then
if
[
-d
$word2vec_model
]
;
then
...
@@ -80,10 +81,11 @@ for WITH_STATIC_LIB in ON OFF; do
...
@@ -80,10 +81,11 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_MKL
=
$TURN_ON_MKL
\
-DWITH_MKL
=
$TURN_ON_MKL
\
-DDEMO_NAME
=
vis_demo
\
-DDEMO_NAME
=
vis_demo
\
-DWITH_GPU
=
$TEST_GPU_CPU
\
-DWITH_GPU
=
$TEST_GPU_CPU
\
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
\
-DON_INFER
=
ON
make
-j
make
-j
for
use_gpu
in
$use_gpu_list
;
do
for
use_gpu
in
$use_gpu_list
;
do
for
vis_demo_name
in
$vis_demo_list
;
do
for
vis_demo_name
in
$vis_demo_list
;
do
./vis_demo
\
./vis_demo
\
--modeldir
=
$DATA_DIR
/
$vis_demo_name
/model
\
--modeldir
=
$DATA_DIR
/
$vis_demo_name
/model
\
--data
=
$DATA_DIR
/
$vis_demo_name
/data.txt
\
--data
=
$DATA_DIR
/
$vis_demo_name
/data.txt
\
...
@@ -95,7 +97,7 @@ for WITH_STATIC_LIB in ON OFF; do
...
@@ -95,7 +97,7 @@ for WITH_STATIC_LIB in ON OFF; do
fi
fi
done
done
done
done
# --------tensorrt mobilenet------
# --------tensorrt mobilenet------
if
[
$USE_TENSORRT
==
ON
-a
$TEST_GPU_CPU
==
ON
]
;
then
if
[
$USE_TENSORRT
==
ON
-a
$TEST_GPU_CPU
==
ON
]
;
then
rm
-rf
*
rm
-rf
*
...
@@ -106,8 +108,9 @@ for WITH_STATIC_LIB in ON OFF; do
...
@@ -106,8 +108,9 @@ for WITH_STATIC_LIB in ON OFF; do
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
\
-DWITH_STATIC_LIB
=
$WITH_STATIC_LIB
\
-DUSE_TENSORRT
=
$USE_TENSORRT
\
-DUSE_TENSORRT
=
$USE_TENSORRT
\
-DTENSORRT_INCLUDE_DIR
=
$TENSORRT_INCLUDE_DIR
\
-DTENSORRT_INCLUDE_DIR
=
$TENSORRT_INCLUDE_DIR
\
-DTENSORRT_LIB_DIR
=
$TENSORRT_LIB_DIR
-DTENSORRT_LIB_DIR
=
$TENSORRT_LIB_DIR
\
make
-j
-DON_INFER
=
ON
make
-j
./trt_mobilenet_demo
\
./trt_mobilenet_demo
\
--modeldir
=
$DATA_DIR
/mobilenet/model
\
--modeldir
=
$DATA_DIR
/mobilenet/model
\
--data
=
$DATA_DIR
/mobilenet/data.txt
\
--data
=
$DATA_DIR
/mobilenet/data.txt
\
...
...
paddle/fluid/inference/api/details/reset_tensor_array.cc
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/inference/api/details/reset_tensor_array.h"
namespace
paddle
{
namespace
details
{
// Should be called after the parameters are loaded.
void
TensorArrayBatchCleaner
::
CollectTensorArrays
(
framework
::
Scope
*
scope
)
{
if
(
flag_
)
{
for
(
auto
&
var_name
:
scope
->
LocalVarNames
())
{
auto
*
var
=
scope
->
FindVar
(
var_name
);
// TODO(Superjomn) should avoid the case when a TensorArray is a
// parameter.
if
(
var_name
==
"feed"
||
var_name
==
"fetch"
)
continue
;
if
(
var
->
Type
()
==
typeid
(
framework
::
LoDTensorArray
))
{
VLOG
(
4
)
<<
"collect "
<<
var_name
;
arrays_
.
push_back
(
var
->
GetMutable
<
framework
::
LoDTensorArray
>
());
}
}
for
(
auto
*
kid
:
scope
->
kids
())
{
CollectTensorArrays
(
kid
);
}
VLOG
(
3
)
<<
"Collect "
<<
arrays_
.
size
()
<<
" arrays"
;
flag_
=
false
;
}
}
// Should be called when `Run` finished.
void
TensorArrayBatchCleaner
::
ResetTensorArray
()
{
for
(
auto
*
arr
:
arrays_
)
{
arr
->
clear
();
}
}
}
// namespace details
}
// namespace paddle
paddle/fluid/inference/api/details/reset_tensor_array.h
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <vector>
#include "paddle/fluid/framework/lod_tensor_array.h"
#include "paddle/fluid/framework/scope.h"
namespace
paddle
{
namespace
details
{
// Clean the TensorArray each batch to make the behavior the same with the
// training phase.
struct
TensorArrayBatchCleaner
{
// Fix the tensor array not clear in the inference scenarios.
void
CollectTensorArrays
(
framework
::
Scope
*
scope
);
void
ResetTensorArray
();
private:
bool
flag_
{
true
};
std
::
vector
<
framework
::
LoDTensorArray
*>
arrays_
;
};
}
// namespace details
}
// namespace paddle
paddle/fluid/inference/api/helper.h
浏览文件 @
e0a89503
...
@@ -160,7 +160,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
...
@@ -160,7 +160,8 @@ static void PrintTime(int batch_size, int repeat, int num_threads, int tid,
double
latency
,
int
epoch
=
1
)
{
double
latency
,
int
epoch
=
1
)
{
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
LOG
(
INFO
)
<<
"====== batch_size: "
<<
batch_size
<<
", repeat: "
<<
repeat
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", threads: "
<<
num_threads
<<
", thread id: "
<<
tid
<<
", latency: "
<<
latency
<<
"ms ======"
;
<<
", latency: "
<<
latency
<<
"ms, fps: "
<<
1
/
(
latency
/
1000.
f
)
<<
" ======"
;
if
(
epoch
>
1
)
{
if
(
epoch
>
1
)
{
int
samples
=
batch_size
*
epoch
;
int
samples
=
batch_size
*
epoch
;
LOG
(
INFO
)
<<
"====== sample number: "
<<
samples
LOG
(
INFO
)
<<
"====== sample number: "
<<
samples
...
...
paddle/fluid/inference/api/paddle_inference_api.h
浏览文件 @
e0a89503
...
@@ -124,7 +124,7 @@ class ZeroCopyTensor {
...
@@ -124,7 +124,7 @@ class ZeroCopyTensor {
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
std
::
vector
<
std
::
vector
<
size_t
>>
lod
()
const
;
protected:
protected:
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
explicit
ZeroCopyTensor
(
void
*
scope
)
:
scope_
{
scope
}
{}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
SetName
(
const
std
::
string
&
name
)
{
name_
=
name
;
}
void
*
FindTensor
()
const
;
void
*
FindTensor
()
const
;
...
@@ -259,12 +259,6 @@ struct AnalysisConfig : public NativeConfig {
...
@@ -259,12 +259,6 @@ struct AnalysisConfig : public NativeConfig {
kExclude
// Specify the disabled passes in `ir_passes`.
kExclude
// Specify the disabled passes in `ir_passes`.
};
};
void
SetIncludeMode
()
{
ir_mode
=
IrPassMode
::
kInclude
;
// this pass has to be run at the beginning of all fuse passes
ir_passes
=
{
"infer_clean_graph_pass"
};
}
// Determine whether to perform graph optimization.
// Determine whether to perform graph optimization.
bool
enable_ir_optim
=
true
;
bool
enable_ir_optim
=
true
;
// Manually determine the IR passes to run.
// Manually determine the IR passes to run.
...
...
paddle/fluid/inference/tests/api/analyzer_rnn1_tester.cc
浏览文件 @
e0a89503
...
@@ -228,6 +228,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
...
@@ -228,6 +228,7 @@ void SetInput(std::vector<std::vector<PaddleTensor>> *inputs) {
TEST
(
Analyzer_rnn1
,
profile
)
{
TEST
(
Analyzer_rnn1
,
profile
)
{
contrib
::
AnalysisConfig
cfg
;
contrib
::
AnalysisConfig
cfg
;
SetConfig
(
&
cfg
);
SetConfig
(
&
cfg
);
cfg
.
use_gpu
=
false
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
PaddleTensor
>
outputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
input_slots_all
;
...
...
paddle/fluid/inference/tests/api/tester_helper.h
浏览文件 @
e0a89503
...
@@ -139,6 +139,9 @@ void TestMultiThreadPrediction(
...
@@ -139,6 +139,9 @@ void TestMultiThreadPrediction(
}
}
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
for
(
int
tid
=
0
;
tid
<
num_threads
;
++
tid
)
{
threads
.
emplace_back
([
&
,
tid
]()
{
threads
.
emplace_back
([
&
,
tid
]()
{
#ifdef PADDLE_WITH_MKLDNN
platform
::
set_cur_thread_id
(
static_cast
<
int
>
(
tid
)
+
1
);
#endif
// Each thread should have local inputs and outputs.
// Each thread should have local inputs and outputs.
// The inputs of each thread are all the same.
// The inputs of each thread are all the same.
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_tid
=
inputs
;
std
::
vector
<
std
::
vector
<
PaddleTensor
>>
inputs_tid
=
inputs
;
...
...
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -268,6 +268,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
...
@@ -268,6 +268,7 @@ if (WITH_GPU AND TENSORRT_FOUND)
else
()
else
()
set
(
DEPS_OPS
${
DEPS_OPS
}
tensorrt_engine_op
)
set
(
DEPS_OPS
${
DEPS_OPS
}
tensorrt_engine_op
)
endif
()
endif
()
op_library
(
hash_op DEPS xxhash
)
op_library
(
clip_by_norm_op DEPS selected_rows_functor selected_rows
)
op_library
(
clip_by_norm_op DEPS selected_rows_functor selected_rows
)
op_library
(
sum_op DEPS selected_rows_functor
)
op_library
(
sum_op DEPS selected_rows_functor
)
op_library
(
sgd_op DEPS selected_rows_functor
)
op_library
(
sgd_op DEPS selected_rows_functor
)
...
...
paddle/fluid/operators/beam_search_decode_op.cc
浏览文件 @
e0a89503
...
@@ -79,6 +79,9 @@ struct BeamSearchDecodeFunctor {
...
@@ -79,6 +79,9 @@ struct BeamSearchDecodeFunctor {
bool
tensor_on_gpu_
;
bool
tensor_on_gpu_
;
size_t
beam_size_
;
size_t
beam_size_
;
int
end_id_
;
int
end_id_
;
// TODO(Superjomn) Here might result serious performance issue in the
// concurrency
// scenarios.
const
LoDTensorArray
&
step_ids_origin_
;
const
LoDTensorArray
&
step_ids_origin_
;
const
LoDTensorArray
&
step_scores_origin_
;
const
LoDTensorArray
&
step_scores_origin_
;
LoDTensorArray
step_ids_
=
LoDTensorArray
();
LoDTensorArray
step_ids_
=
LoDTensorArray
();
...
...
paddle/fluid/operators/hash_op.cc
0 → 100644
浏览文件 @
e0a89503
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/hash_op.h"
#include <string>
#include <vector>
namespace
paddle
{
namespace
operators
{
class
HashOp
:
public
framework
::
OperatorWithKernel
{
public:
HashOp
(
const
std
::
string
&
type
,
const
framework
::
VariableNameMap
&
inputs
,
const
framework
::
VariableNameMap
&
outputs
,
const
framework
::
AttributeMap
&
attrs
)
:
OperatorWithKernel
(
type
,
inputs
,
outputs
,
attrs
)
{}
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) of HashOp should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Out"
),
"Output(Out) of HashOp should not be null."
);
auto
dims
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_EQ
(
dims
.
size
(),
2UL
,
"The input of hash_op's dimensions must be 2"
);
std
::
vector
<
int64_t
>
out_dims
;
out_dims
.
reserve
(
dims
.
size
()
+
1
);
// copy all dims except the last one
for
(
size_t
i
=
0u
;
i
!=
dims
.
size
()
-
1
;
++
i
)
{
out_dims
.
emplace_back
(
dims
[
i
]);
}
int
num_hash
=
ctx
->
Attrs
().
Get
<
int
>
(
"num_hash"
);
out_dims
.
emplace_back
(
num_hash
);
// keep the last dim to 1
out_dims
.
emplace_back
(
1
);
ctx
->
SetOutputDim
(
"Out"
,
framework
::
make_ddim
(
out_dims
));
ctx
->
ShareLoD
(
"X"
,
/*->*/
"Out"
);
}
};
class
HashOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"(Tensor) Input tensor of scale operator."
);
AddOutput
(
"Out"
,
"(Tensor) Output tensor of scale operator."
);
AddComment
(
R"DOC(
**Hash Operator**
$$Out = scale * X$$
)DOC"
);
AddAttr
<
int
>
(
"num_hash"
,
""
).
SetDefault
(
1
);
AddAttr
<
int
>
(
"mod_by"
,
""
).
SetDefault
(
100000
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_WITHOUT_GRADIENT
(
hash
,
ops
::
HashOp
,
ops
::
HashOpMaker
);
REGISTER_OP_CPU_KERNEL
(
hash
,
ops
::
HashKerel
<
int
>
,
ops
::
HashKerel
<
int64_t
>
);
paddle/fluid/operators/hash_op.h
0 → 100644
浏览文件 @
e0a89503
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
extern
"C"
{
#include <xxhash.h>
}
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
// template <typename DeviceContext, typename T>
template
<
typename
T
>
class
HashKerel
:
public
framework
::
OpKernel
<
T
>
{
public:
virtual
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
{
auto
*
out_t
=
context
.
Output
<
framework
::
LoDTensor
>
(
"Out"
);
auto
*
in_t
=
context
.
Input
<
framework
::
LoDTensor
>
(
"X"
);
int
mod_by
=
context
.
Attr
<
int
>
(
"mod_by"
);
int
num_hash
=
context
.
Attr
<
int
>
(
"num_hash"
);
auto
*
output
=
out_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
in_dims
=
in_t
->
dims
();
auto
in_lod
=
in_t
->
lod
();
PADDLE_ENFORCE_EQ
(
static_cast
<
uint64_t
>
(
in_dims
[
0
]),
in_lod
[
0
].
back
(),
"The actual input data's size mismatched with LoD information."
);
auto
seq_length
=
in_dims
[
0
];
auto
last_dim
=
in_dims
[
in_dims
.
size
()
-
1
];
auto
*
input
=
in_t
->
data
<
T
>
();
for
(
int
idx
=
0
;
idx
<
seq_length
;
++
idx
)
{
for
(
int
ihash
=
0
;
ihash
!=
num_hash
;
++
ihash
)
{
output
[
idx
*
num_hash
+
ihash
]
=
XXH64
(
input
,
sizeof
(
int
)
*
last_dim
,
ihash
)
%
mod_by
;
}
input
+=
last_dim
;
}
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/lars_momentum_op.cc
0 → 100644
浏览文件 @
e0a89503
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/lars_momentum_op.h"
#include "paddle/fluid/operators/momentum_op.h"
namespace
paddle
{
namespace
operators
{
class
LarsMomentumOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"Param"
,
"(LoDTensor, default LoDTensor<float>) "
"Input parameter that has to be updated"
);
AddInput
(
"Grad"
,
"(LoDTensor, default LoDTensor<float>) "
"Input gradient of the parameter"
);
AddInput
(
"Velocity"
,
"(LoDTensor, default LoDTensor<float>) "
"Input velocity (corresponding to the parameter) "
"that has to be updated"
);
AddInput
(
"LearningRate"
,
"(LoDTensor, default LoDTensor<float>) "
"Input learning rate"
);
AddOutput
(
"ParamOut"
,
"(LoDTensor) This output is updated parameter. "
"It shared memory with Input(Param)."
);
AddOutput
(
"VelocityOut"
,
"(LoDTensor) This output is updated velocity. "
"It shared memory with Input(Velocity)."
);
AddAttr
<
float
>
(
"mu"
,
"(float) Momentum coefficient"
);
AddAttr
<
float
>
(
"lars_coeff"
,
"(float, default 0.001) LARS coefficient."
)
.
SetDefault
(
0.001
);
AddAttr
<
float
>
(
"lars_weight_decay"
,
"(float, default 0.0005) LARS weight decay"
)
.
SetDefault
(
0.0005
);
AddComment
(
R"DOC(
Lars Momentum Optimizer.
This optimizer use LARS (https://arxiv.org/abs/1708.03888) to optimize each
weight using a local learning rate:
$$
local\_lr = \eta *
\frac{\left \| param \right \|}{\left \| grad \right \| + \beta *\left \| param \right \|} \\
velocity = mu * velocity +
local\_lr * (grad + \beta * param) \\
param = param - velocity. \\
$$
Note that we use lars_weight_decay here to decay weights, you may need not to
use L2 regularizers in case of using LARS.
)DOC"
);
}
};
class
LarsMomentumOpVarTypeInference
:
public
framework
::
VarTypeInference
{
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
framework
::
BlockDesc
*
block
)
const
override
{}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
lars_momentum
,
ops
::
MomentumOp
,
ops
::
LarsMomentumOpMaker
,
paddle
::
framework
::
EmptyGradOpMaker
,
ops
::
LarsMomentumOpVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
lars_momentum
,
ops
::
LarsMomentumOpKernel
<
float
>
,
ops
::
LarsMomentumOpKernel
<
double
>
);
paddle/fluid/operators/lars_momentum_op.cu
0 → 100644
浏览文件 @
e0a89503
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/lars_momentum_op.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
__global__
void
MomentumLarsKernel
(
const
T
*
p
,
const
T
*
g
,
const
T
*
v
,
const
T
*
learning_rate
,
const
T
mu
,
const
int64_t
num
,
const
T
lars_coeff
,
const
T
lars_weight_decay
,
const
T
*
p_norm
,
const
T
*
g_norm
,
T
*
p_out
,
T
*
v_out
)
{
T
lr
=
learning_rate
[
0
];
T
local_lr
=
learning_rate
[
0
];
for
(
int
i
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
i
<
num
;
i
+=
blockDim
.
x
*
gridDim
.
x
)
{
if
(
p_norm
[
0
]
>
0
&&
g_norm
[
0
]
>
0
)
{
local_lr
=
lr
*
lars_coeff
*
p_norm
[
0
]
/
(
g_norm
[
0
]
+
lars_weight_decay
*
p_norm
[
0
]);
}
T
v_new
=
v
[
i
]
*
mu
+
local_lr
*
(
g
[
i
]
+
lars_weight_decay
*
p
[
i
]);
v_out
[
i
]
=
v_new
;
p_out
[
i
]
=
p
[
i
]
-
v_new
;
}
}
template
<
typename
DeviceContext
,
typename
T
>
class
LarsMomentumOpCUDAKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
param_out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"ParamOut"
);
auto
velocity_out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"VelocityOut"
);
auto
param
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Param"
);
auto
velocity
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Velocity"
);
auto
grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Grad"
);
auto
learning_rate
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"LearningRate"
);
T
*
p_out
=
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
*
v_out
=
velocity_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
mu
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"mu"
));
T
lars_coeff
=
ctx
.
Attr
<
float
>
(
"lars_coeff"
);
T
lars_weight_decay
=
ctx
.
Attr
<
float
>
(
"lars_weight_decay"
);
auto
*
p
=
param
->
data
<
T
>
();
auto
*
v
=
velocity
->
data
<
T
>
();
auto
*
g
=
grad
->
data
<
T
>
();
auto
*
lr
=
learning_rate
->
data
<
T
>
();
int
block
=
512
;
int
grid
=
(
param
->
numel
()
+
block
-
1
)
/
block
;
auto
eigen_p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
auto
eigen_g
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
grad
);
// calculate norms using eigein and launch the kernel.
framework
::
Tensor
p_norm_t
,
g_norm_t
;
p_norm_t
.
Resize
({
1
});
g_norm_t
.
Resize
({
1
});
auto
*
p_norm_data
=
p_norm_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
*
g_norm_data
=
g_norm_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
ep_norm
=
framework
::
EigenScalar
<
T
>::
From
(
p_norm_t
);
auto
eg_norm
=
framework
::
EigenScalar
<
T
>::
From
(
g_norm_t
);
auto
*
place
=
ctx
.
template
device_context
<
DeviceContext
>().
eigen_device
();
ep_norm
.
device
(
*
place
)
=
eigen_p
.
square
().
sum
().
sqrt
();
eg_norm
.
device
(
*
place
)
=
eigen_g
.
square
().
sum
().
sqrt
();
MomentumLarsKernel
<<<
grid
,
block
,
0
,
ctx
.
cuda_device_context
().
stream
()
>>>
(
p
,
g
,
v
,
lr
,
mu
,
param
->
numel
(),
lars_coeff
,
lars_weight_decay
,
p_norm_data
,
g_norm_data
,
p_out
,
v_out
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
lars_momentum
,
ops
::
LarsMomentumOpCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
LarsMomentumOpCUDAKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/lars_momentum_op.h
0 → 100644
浏览文件 @
e0a89503
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/fluid/framework/eigen.h"
#include "paddle/fluid/framework/op_registry.h"
namespace
paddle
{
namespace
operators
{
template
<
typename
T
>
class
LarsMomentumOpKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
param_out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"ParamOut"
);
auto
velocity_out
=
ctx
.
Output
<
framework
::
LoDTensor
>
(
"VelocityOut"
);
auto
param
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Param"
);
auto
velocity
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Velocity"
);
auto
learning_rate
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"LearningRate"
);
auto
*
grad_var
=
ctx
.
InputVar
(
"Grad"
);
// only support dense for now.
PADDLE_ENFORCE
(
grad_var
->
IsType
<
framework
::
LoDTensor
>
());
auto
grad
=
ctx
.
Input
<
framework
::
LoDTensor
>
(
"Grad"
);
param_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
velocity_out
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
T
mu
=
static_cast
<
T
>
(
ctx
.
Attr
<
float
>
(
"mu"
));
T
lars_coeff
=
ctx
.
Attr
<
float
>
(
"lars_coeff"
);
T
lars_weight_decay
=
ctx
.
Attr
<
float
>
(
"lars_weight_decay"
);
auto
p_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param_out
);
auto
v_out
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
velocity_out
);
auto
p
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
param
);
auto
v
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
velocity
);
auto
g
=
framework
::
EigenVector
<
T
>::
Flatten
(
*
grad
);
auto
*
lr
=
learning_rate
->
data
<
T
>
();
framework
::
Tensor
p_norm_t
,
g_norm_t
;
p_norm_t
.
Resize
({
1
});
g_norm_t
.
Resize
({
1
});
p_norm_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
g_norm_t
.
mutable_data
<
T
>
(
ctx
.
GetPlace
());
auto
ep_norm
=
framework
::
EigenScalar
<
T
>::
From
(
p_norm_t
);
auto
eg_norm
=
framework
::
EigenScalar
<
T
>::
From
(
g_norm_t
);
ep_norm
=
p
.
square
().
sum
().
sqrt
();
eg_norm
=
g
.
square
().
sum
().
sqrt
();
T
local_lr
=
lr
[
0
];
if
(
ep_norm
(
0
)
>
0
&&
eg_norm
(
0
)
>
0
)
{
local_lr
=
lr
[
0
]
*
lars_coeff
*
ep_norm
(
0
)
/
(
eg_norm
(
0
)
+
lars_weight_decay
*
ep_norm
(
0
));
}
v_out
=
v
*
mu
+
local_lr
*
(
g
+
lars_weight_decay
*
p
);
p_out
=
p
-
v_out
;
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/lookup_table_op.cc
浏览文件 @
e0a89503
...
@@ -81,6 +81,12 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -81,6 +81,12 @@ class LookupTableOpMaker : public framework::OpProtoAndCheckerMaker {
"Otherwise the given value indicates padding the output "
"Otherwise the given value indicates padding the output "
"with zeros whenever lookup encounters it in Ids."
)
"with zeros whenever lookup encounters it in Ids."
)
.
SetDefault
(
kNoPadding
);
.
SetDefault
(
kNoPadding
);
// NOTE(minqiyang): grad_inplace is an temporal attribute,
// please do NOT set this attribute in python layer.
AddAttr
<
bool
>
(
"grad_inplace"
,
"(boolean, default false) "
"If the grad op reuse the input's variable."
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Lookup Table Operator.
Lookup Table Operator.
...
...
paddle/fluid/operators/lookup_table_op.h
浏览文件 @
e0a89503
...
@@ -21,6 +21,7 @@ limitations under the License. */
...
@@ -21,6 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/math/blas.h"
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -68,6 +69,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
...
@@ -68,6 +69,7 @@ class LookupTableKernel : public framework::OpKernel<T> {
const
auto
*
table
=
table_t
.
value
().
data
<
T
>
();
const
auto
*
table
=
table_t
.
value
().
data
<
T
>
();
auto
*
output
=
output_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
*
output
=
output_t
->
mutable_data
<
T
>
(
context
.
GetPlace
());
auto
blas
=
math
::
GetBlas
<
platform
::
CPUDeviceContext
,
T
>
(
context
);
for
(
int64_t
i
=
0
;
i
<
ids_numel
;
++
i
)
{
for
(
int64_t
i
=
0
;
i
<
ids_numel
;
++
i
)
{
if
(
padding_idx
!=
kNoPadding
&&
ids
[
i
]
==
padding_idx
)
{
if
(
padding_idx
!=
kNoPadding
&&
ids
[
i
]
==
padding_idx
)
{
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
memset
(
output
+
i
*
row_width
,
0
,
row_width
*
sizeof
(
T
));
...
@@ -75,8 +77,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
...
@@ -75,8 +77,8 @@ class LookupTableKernel : public framework::OpKernel<T> {
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
PADDLE_ENFORCE_GE
(
ids
[
i
],
0
);
auto
id_index
=
table_t
.
Index
(
ids
[
i
]);
auto
id_index
=
table_t
.
Index
(
ids
[
i
]);
PADDLE_ENFORCE_GE
(
id_index
,
0
,
"the input key should be exists."
);
PADDLE_ENFORCE_GE
(
id_index
,
0
,
"the input key should be exists."
);
memcpy
(
output
+
i
*
row_width
,
table
+
id_index
*
row_width
,
blas
.
VCOPY
(
row_width
,
table
+
id_index
*
row_width
,
row_width
*
sizeof
(
T
)
);
output
+
i
*
row_width
);
}
}
}
}
}
}
...
@@ -111,27 +113,37 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
...
@@ -111,27 +113,37 @@ class LookupTableGradKernel : public framework::OpKernel<T> {
auto
*
ids_data
=
ids
->
data
<
int64_t
>
();
auto
*
ids_data
=
ids
->
data
<
int64_t
>
();
int64_t
ids_num
=
ids
->
numel
();
int64_t
ids_num
=
ids
->
numel
();
framework
::
Vector
<
int64_t
>
new_rows
;
std
::
vector
<
int64_t
>
new_rows
;
new_rows
.
reserve
(
ids_num
);
new_rows
.
resize
(
ids_num
);
for
(
int64_t
i
=
0
;
i
<
ids_num
;
i
++
)
{
std
::
memcpy
(
&
new_rows
[
0
],
ids_data
,
ids_num
*
sizeof
(
int64_t
));
new_rows
.
push_back
(
ids_data
[
i
]);
}
d_table
->
set_rows
(
new_rows
);
d_table
->
set_rows
(
new_rows
);
auto
*
d_table_value
=
d_table
->
mutable_value
();
auto
*
d_table_value
=
d_table
->
mutable_value
();
d_table_value
->
Resize
({
ids_num
,
table_dim
[
1
]});
d_table_value
->
Resize
({
ids_num
,
table_dim
[
1
]});
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
// FIXME(minqiyang):
// memory optimization will NOT reuse Tensor with SelectedRows
d_table
->
set_height
(
table_dim
[
0
]);
// so we could just share the tensor here directly.
// However, the InferVarType method will infer the output SelectedRows
auto
*
d_output_data
=
d_output
->
data
<
T
>
();
// to Tensor sometimes, which is a bug, so we will add an attribute
auto
*
d_table_data
=
d_table_value
->
data
<
T
>
();
// here to indicate the inplace and remove this attribute after
// the InferVarType's bug was fixed
auto
d_output_dims
=
d_output
->
dims
();
bool
grad_inplace
=
context
.
Attr
<
bool
>
(
"grad_inplace"
);
PADDLE_ENFORCE_EQ
(
if
(
grad_inplace
)
{
d_table_value
->
dims
(),
d_table_value
->
ShareDataWith
(
*
d_output
);
framework
::
flatten_to_2d
(
d_output_dims
,
d_output_dims
.
size
()
-
1
));
}
else
{
memcpy
(
d_table_data
,
d_output_data
,
sizeof
(
T
)
*
d_output
->
numel
());
d_table_value
->
mutable_data
<
T
>
(
context
.
GetPlace
());
d_table
->
set_height
(
table_dim
[
0
]);
auto
*
d_output_data
=
d_output
->
data
<
T
>
();
auto
*
d_table_data
=
d_table_value
->
data
<
T
>
();
auto
d_output_dims
=
d_output
->
dims
();
PADDLE_ENFORCE_EQ
(
d_table_value
->
dims
(),
framework
::
flatten_to_2d
(
d_output_dims
,
d_output_dims
.
size
()
-
1
));
memcpy
(
d_table_data
,
d_output_data
,
sizeof
(
T
)
*
d_output
->
numel
());
}
}
else
{
}
else
{
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
ids
=
context
.
Input
<
LoDTensor
>
(
"Ids"
);
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
auto
*
d_output
=
context
.
Input
<
LoDTensor
>
(
framework
::
GradVarName
(
"Out"
));
...
...
paddle/fluid/operators/math/algorithm.h
浏览文件 @
e0a89503
...
@@ -39,6 +39,52 @@ HOSTDEVICE inline int64_t BinarySearch(const T *x, int64_t num, const T &val) {
...
@@ -39,6 +39,52 @@ HOSTDEVICE inline int64_t BinarySearch(const T *x, int64_t num, const T &val) {
return
-
1
;
return
-
1
;
}
}
template
<
typename
T
>
HOSTDEVICE
inline
size_t
LowerBound
(
const
T
*
x
,
size_t
num
,
const
T
&
val
)
{
#ifdef __CUDA_ARCH__
// The following code is from
// https://en.cppreference.com/w/cpp/algorithm/lower_bound
auto
*
first
=
x
;
int64_t
count
=
static_cast
<
int64_t
>
(
num
);
while
(
count
>
0
)
{
int64_t
step
=
(
count
>>
1
);
auto
*
it
=
first
+
step
;
if
(
*
it
<
val
)
{
first
=
++
it
;
count
-=
(
step
+
1
);
}
else
{
count
=
step
;
}
}
return
static_cast
<
size_t
>
(
first
-
x
);
#else
return
static_cast
<
size_t
>
(
std
::
lower_bound
(
x
,
x
+
num
,
val
)
-
x
);
#endif
}
template
<
typename
T
>
HOSTDEVICE
inline
size_t
UpperBound
(
const
T
*
x
,
size_t
num
,
const
T
&
val
)
{
#ifdef __CUDA_ARCH__
// The following code is from
// https://en.cppreference.com/w/cpp/algorithm/upper_bound
auto
*
first
=
x
;
int64_t
count
=
static_cast
<
int64_t
>
(
num
);
while
(
count
>
0
)
{
auto
step
=
(
count
>>
1
);
auto
*
it
=
first
+
step
;
if
(
val
<
*
it
)
{
count
=
step
;
}
else
{
first
=
++
it
;
count
-=
(
step
+
1
);
}
}
return
static_cast
<
size_t
>
(
first
-
x
);
#else
return
static_cast
<
size_t
>
(
std
::
upper_bound
(
x
,
x
+
num
,
val
)
-
x
);
#endif
}
}
// namespace math
}
// namespace math
}
// namespace operators
}
// namespace operators
}
// namespace paddle
}
// namespace paddle
paddle/fluid/operators/momentum_op.cc
浏览文件 @
e0a89503
...
@@ -19,54 +19,6 @@ namespace operators {
...
@@ -19,54 +19,6 @@ namespace operators {
using
Tensor
=
framework
::
Tensor
;
using
Tensor
=
framework
::
Tensor
;
class
MomentumOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
"Input(param) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(grad) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Velocity"
),
"Input(velocity) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"VelocityOut"
),
"Output(VelocityOut) of Momentum should not be null."
);
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of MomentumOp should have the same dimension."
);
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Velocity"
),
"Param and Velocity of MomentumOp should have the same dimension."
);
}
PADDLE_ENFORCE_EQ
(
framework
::
product
(
ctx
->
GetInputDim
(
"LearningRate"
)),
1
,
"Learning_rate should be a scalar"
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"VelocityOut"
,
param_dim
);
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
InputVar
(
"Param"
));
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
class
MomentumOpInferVarType
:
public
framework
::
VarTypeInference
{
class
MomentumOpInferVarType
:
public
framework
::
VarTypeInference
{
public:
public:
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
void
operator
()(
const
framework
::
OpDesc
&
op_desc
,
...
...
paddle/fluid/operators/momentum_op.h
浏览文件 @
e0a89503
...
@@ -28,6 +28,54 @@ using framework::SelectedRows;
...
@@ -28,6 +28,54 @@ using framework::SelectedRows;
struct
NoNesterov
;
struct
NoNesterov
;
struct
UseNesterov
;
struct
UseNesterov
;
class
MomentumOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
protected:
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Param"
),
"Input(param) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Grad"
),
"Input(grad) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"Velocity"
),
"Input(velocity) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"LearningRate"
),
"Input(LearningRate) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
GetInputsVarType
(
"Param"
).
front
()
==
framework
::
proto
::
VarType
::
LOD_TENSOR
,
"The input var's type should be LoDTensor, but the received is %s"
,
ctx
->
Inputs
(
"Param"
).
front
(),
ctx
->
GetInputsVarType
(
"Param"
).
front
());
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"ParamOut"
),
"Output(ParamOut) of Momentum should not be null."
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"VelocityOut"
),
"Output(VelocityOut) of Momentum should not be null."
);
auto
param_dim
=
ctx
->
GetInputDim
(
"Param"
);
if
(
ctx
->
GetInputsVarType
(
"Grad"
)[
0
]
==
framework
::
proto
::
VarType
::
LOD_TENSOR
)
{
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Grad"
),
"Param and Grad input of MomentumOp should have the same dimension."
);
PADDLE_ENFORCE_EQ
(
param_dim
,
ctx
->
GetInputDim
(
"Velocity"
),
"Param and Velocity of MomentumOp should have the same dimension."
);
}
PADDLE_ENFORCE_EQ
(
framework
::
product
(
ctx
->
GetInputDim
(
"LearningRate"
)),
1
,
"Learning_rate should be a scalar"
);
ctx
->
SetOutputDim
(
"ParamOut"
,
param_dim
);
ctx
->
SetOutputDim
(
"VelocityOut"
,
param_dim
);
}
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
input_data_type
=
framework
::
GetDataTypeOfVar
(
ctx
.
InputVar
(
"Param"
));
return
framework
::
OpKernelType
(
input_data_type
,
ctx
.
GetPlace
());
}
};
template
<
typename
T
>
template
<
typename
T
>
class
CPUDenseMomentumFunctor
{
class
CPUDenseMomentumFunctor
{
private:
private:
...
...
paddle/fluid/operators/sequence_reverse_op.cc
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/sequence_reverse_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OPERATOR
(
sequence_reverse
,
ops
::
SequenceReverseOp
,
ops
::
SequenceReverseOpMaker
,
ops
::
SequenceReverseGradOpDescMaker
);
REGISTER_OP_CPU_KERNEL
(
sequence_reverse
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
uint8_t
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
int64_t
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
);
paddle/fluid/operators/sequence_reverse_op.cu
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/sequence_reverse_op.h"
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_CUDA_KERNEL
(
sequence_reverse
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
uint8_t
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
int64_t
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
float
>
,
ops
::
SequenceReverseOpKernel
<
paddle
::
platform
::
CUDADeviceContext
,
double
>
);
paddle/fluid/operators/sequence_reverse_op.h
0 → 100644
浏览文件 @
e0a89503
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/math/algorithm.h"
#include "paddle/fluid/platform/for_range.h"
namespace
paddle
{
namespace
operators
{
class
SequenceReverseOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
void
InferShape
(
framework
::
InferShapeContext
*
ctx
)
const
override
{
PADDLE_ENFORCE
(
ctx
->
HasInput
(
"X"
),
"Input(X) must exist"
);
PADDLE_ENFORCE
(
ctx
->
HasOutput
(
"Y"
),
"Output(Y) must exist"
);
auto
x_dim
=
ctx
->
GetInputDim
(
"X"
);
PADDLE_ENFORCE_GE
(
x_dim
.
size
(),
2
,
"Rank of Input(X) must be not less than 2."
);
ctx
->
SetOutputDim
(
"Y"
,
x_dim
);
ctx
->
ShareLoD
(
"X"
,
"Y"
);
}
};
class
SequenceReverseOpMaker
:
public
framework
::
OpProtoAndCheckerMaker
{
public:
void
Make
()
override
{
AddInput
(
"X"
,
"The input LoDTensor of sequence_reverse op."
);
AddOutput
(
"Y"
,
"The output LoDTensor of sequence_reverse op."
);
AddComment
(
R"DOC(
SequenceReverse Operator.
Reverse each sequence in input X along dim 0.
Assuming X is a LoDTensor with dims [5, 4] and lod [[0, 2, 5]], where:
X.data() = [
[1, 2, 3, 4],
[5, 6, 7, 8], # the 0-th sequence with length 2
[9, 10, 11, 12],
[13, 14, 15, 16],
[17, 18, 19, 20] # the 1-st sequence with length 3
]
The output Y would be a LoDTensor sharing the same dims and lod with input X,
and:
Y.data() = [
[5, 6, 7, 8],
[1, 2, 3, 4], # the reversed 0-th sequence with length 2
[17, 18, 19, 20],
[13, 14, 15, 16],
[9, 10, 11, 12] # the reversed 1-st sequence with length 3
]
This Operator is useful to build a reverse dynamic RNN network.
This Operator only supports one-level lod currently.
)DOC"
);
}
};
template
<
typename
T
>
struct
SequenceReverseFunctor
{
SequenceReverseFunctor
(
const
T
*
x
,
T
*
y
,
const
size_t
*
lod
,
size_t
lod_count
,
size_t
row_numel
)
:
x_
(
x
),
y_
(
y
),
lod_
(
lod
),
lod_count_
(
lod_count
),
row_numel_
(
row_numel
)
{}
HOSTDEVICE
void
operator
()(
size_t
idx_x
)
const
{
auto
row_idx_x
=
idx_x
/
row_numel_
;
auto
lod_idx
=
math
::
UpperBound
(
lod_
,
lod_count_
,
row_idx_x
);
auto
row_idx_y
=
lod_
[
lod_idx
-
1
]
+
(
lod_
[
lod_idx
]
-
1
-
row_idx_x
);
auto
idx_y
=
row_idx_y
*
row_numel_
+
idx_x
%
row_numel_
;
y_
[
idx_y
]
=
x_
[
idx_x
];
}
const
T
*
x_
;
T
*
y_
;
const
size_t
*
lod_
;
size_t
lod_count_
;
size_t
row_numel_
;
};
template
<
typename
DeviceContext
,
typename
T
>
class
SequenceReverseOpKernel
:
public
framework
::
OpKernel
<
T
>
{
using
LoDTensor
=
framework
::
LoDTensor
;
public:
void
Compute
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
&
x
=
*
ctx
.
Input
<
LoDTensor
>
(
"X"
);
auto
*
y
=
ctx
.
Output
<
LoDTensor
>
(
"Y"
);
PADDLE_ENFORCE_EQ
(
x
.
lod
().
size
(),
1
,
"SequenceReverse Op only support one level lod."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
DeviceContext
>();
const
size_t
*
lod
;
size_t
lod_count
=
x
.
lod
()[
0
].
size
();
#ifdef PADDLE_WITH_CUDA
if
(
platform
::
is_gpu_place
(
ctx
.
GetPlace
()))
{
lod
=
x
.
lod
()[
0
].
CUDAData
(
ctx
.
GetPlace
());
}
else
{
#endif
lod
=
x
.
lod
()[
0
].
data
();
#ifdef PADDLE_WITH_CUDA
}
#endif
size_t
limit
=
static_cast
<
size_t
>
(
x
.
numel
());
size_t
row_numel
=
static_cast
<
size_t
>
(
limit
/
x
.
dims
()[
0
]);
auto
*
x_data
=
x
.
data
<
T
>
();
auto
*
y_data
=
y
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
PADDLE_ENFORCE_NE
(
x_data
,
y_data
,
"SequenceReverse Op does not support in-place operation"
);
SequenceReverseFunctor
<
T
>
functor
(
x_data
,
y_data
,
lod
,
lod_count
,
row_numel
);
platform
::
ForRange
<
DeviceContext
>
for_range
(
dev_ctx
,
limit
);
for_range
(
functor
);
}
};
class
SequenceReverseGradOpDescMaker
:
public
framework
::
SingleGradOpDescMaker
{
public:
using
framework
::
SingleGradOpDescMaker
::
SingleGradOpDescMaker
;
protected:
std
::
unique_ptr
<
framework
::
OpDesc
>
Apply
()
const
override
{
std
::
unique_ptr
<
framework
::
OpDesc
>
op
(
new
framework
::
OpDesc
());
op
->
SetType
(
"sequence_reverse"
);
op
->
SetInput
(
"X"
,
OutputGrad
(
"Y"
));
op
->
SetOutput
(
"Y"
,
InputGrad
(
"X"
));
op
->
SetAttrMap
(
Attrs
());
return
op
;
}
};
}
// namespace operators
}
// namespace paddle
paddle/fluid/platform/device_context.cc
浏览文件 @
e0a89503
...
@@ -296,38 +296,73 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; }
...
@@ -296,38 +296,73 @@ Place CUDAPinnedDeviceContext::GetPlace() const { return place_; }
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
MKLDNNDeviceContext
::
MKLDNNDeviceContext
(
CPUPlace
place
)
MKLDNNDeviceContext
::
MKLDNNDeviceContext
(
CPUPlace
place
)
:
CPUDeviceContext
(
place
),
engine_
(
mkldnn
::
engine
::
cpu
,
0
),
p_blobs_
()
{
:
CPUDeviceContext
(
place
),
engine_
(
mkldnn
::
engine
::
cpu
,
0
),
p_blobmap_
()
{
p_blobs_
.
reset
(
new
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
void
>>
());
p_blobmap_
.
reset
(
new
BlobMap
());
p_mutex_
.
reset
(
new
std
::
mutex
());
}
}
namespace
{
// Current thread's id.
thread_local
int
cur_thread_id
=
0
;
}
void
set_cur_thread_id
(
int
tid
)
{
cur_thread_id
=
tid
;
}
int
get_cur_thread_id
(
void
)
{
return
cur_thread_id
;
}
void
MKLDNNDeviceContext
::
SetBlob
(
const
std
::
string
&
name
,
void
MKLDNNDeviceContext
::
SetBlob
(
const
std
::
string
&
name
,
std
::
shared_ptr
<
void
>
data
)
const
{
std
::
shared_ptr
<
void
>
data
)
const
{
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
void
>>*
p
;
BlobMap
*
pMap
=
p_blobmap_
.
get
();
p
=
p_blobs_
.
get
();
std
::
shared_ptr
<
KeyBlob
>
pBlob
=
nullptr
;
int
tid
=
platform
::
get_cur_thread_id
();
auto
it
=
p
->
find
(
name
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
p_mutex_
.
get
()
);
if
(
it
==
p
->
end
())
{
// Find KeyBlob for current thread
(
*
p
)[
name
]
=
data
;
// create new blob
auto
map_it
=
pMap
->
find
(
tid
);
if
(
map_it
==
pMap
->
end
())
{
// 1st time to set blob in current thread
pBlob
=
std
::
shared_ptr
<
KeyBlob
>
(
new
KeyBlob
());
(
*
pMap
)[
tid
]
=
pBlob
;
}
else
{
}
else
{
it
->
second
=
data
;
// set data to existing blob
pBlob
=
map_it
->
second
;
}
}
// Find Key in found (or newly created) KeyBlob
auto
key_it
=
pBlob
->
find
(
name
);
if
(
key_it
==
pBlob
->
end
())
{
(
*
pBlob
)[
name
]
=
data
;
// create new blob
}
else
{
key_it
->
second
=
data
;
// set data to existing blob
}
// lock will be automatically released when out of scope
return
;
return
;
}
}
std
::
shared_ptr
<
void
>
MKLDNNDeviceContext
::
GetBlob
(
std
::
shared_ptr
<
void
>
MKLDNNDeviceContext
::
GetBlob
(
const
std
::
string
&
name
)
const
{
const
std
::
string
&
name
)
const
{
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
void
>>*
p
;
BlobMap
*
pMap
=
p_blobmap_
.
get
()
;
p
=
p_blobs_
.
get
()
;
std
::
shared_ptr
<
KeyBlob
>
pBlob
=
nullptr
;
auto
it
=
p
->
find
(
name
);
int
tid
=
platform
::
get_cur_thread_id
(
);
if
(
it
!=
p
->
end
())
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
*
p_mutex_
.
get
());
return
it
->
second
;
}
// Find KeyBlob for current thread firstly
auto
map_it
=
pMap
->
find
(
tid
);
if
(
map_it
==
pMap
->
end
())
return
nullptr
;
pBlob
=
map_it
->
second
;
// Find Blob via name
auto
key_it
=
pBlob
->
find
(
name
);
if
(
key_it
==
pBlob
->
end
())
return
nullptr
;
return
nullptr
;
// lock will be automatically released when out of scope
return
key_it
->
second
;
}
}
#endif
#endif
...
...
paddle/fluid/platform/device_context.h
浏览文件 @
e0a89503
...
@@ -176,6 +176,12 @@ struct DefaultDeviceContextType<platform::CUDAPinnedPlace> {
...
@@ -176,6 +176,12 @@ struct DefaultDeviceContextType<platform::CUDAPinnedPlace> {
#endif
#endif
#ifdef PADDLE_WITH_MKLDNN
#ifdef PADDLE_WITH_MKLDNN
using
KeyBlob
=
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
void
>>
;
using
BlobMap
=
std
::
unordered_map
<
int
,
std
::
shared_ptr
<
KeyBlob
>>
;
void
set_cur_thread_id
(
int
);
int
get_cur_thread_id
(
void
);
class
MKLDNNDeviceContext
:
public
CPUDeviceContext
{
class
MKLDNNDeviceContext
:
public
CPUDeviceContext
{
public:
public:
explicit
MKLDNNDeviceContext
(
CPUPlace
place
);
explicit
MKLDNNDeviceContext
(
CPUPlace
place
);
...
@@ -191,8 +197,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
...
@@ -191,8 +197,8 @@ class MKLDNNDeviceContext : public CPUDeviceContext {
private:
private:
mkldnn
::
engine
engine_
;
mkldnn
::
engine
engine_
;
std
::
shared_ptr
<
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
void
>>>
std
::
shared_ptr
<
BlobMap
>
p_blobmap_
;
p_blobs
_
;
std
::
shared_ptr
<
std
::
mutex
>
p_mutex
_
;
};
};
#endif
#endif
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e0a89503
...
@@ -645,9 +645,13 @@ All parameter, weight, gradient are variables in Paddle.
...
@@ -645,9 +645,13 @@ All parameter, weight, gradient are variables in Paddle.
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
py
::
class_
<
ir
::
Pass
,
std
::
shared_ptr
<
ir
::
Pass
>>
pass
(
m
,
"Pass"
);
pass
.
def
(
py
::
init
())
pass
.
def
(
py
::
init
())
.
def
(
"set_str"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
.
def
(
const
std
::
string
&
attr
)
{
"set_str"
,
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
const
std
::
string
&
attr
)
{
self
.
Set
<
std
::
string
>
(
name
,
new
std
::
string
(
attr
));
})
.
def
(
"set_int"
,
[](
ir
::
Pass
&
self
,
const
std
::
string
&
name
,
int
val
)
{
self
.
Set
<
const
int
>
(
name
,
new
int
(
val
));
});
});
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
py
::
class_
<
ir
::
PassBuilder
,
std
::
shared_ptr
<
ir
::
PassBuilder
>>
pb
(
...
...
paddle/fluid/train/demo/CMakeLists.txt
浏览文件 @
e0a89503
...
@@ -15,6 +15,7 @@ include_directories("${PADDLE_LIB}")
...
@@ -15,6 +15,7 @@ include_directories("${PADDLE_LIB}")
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/xxhash/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappy/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/snappystream/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
include_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/include"
)
...
@@ -27,6 +28,7 @@ link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
...
@@ -27,6 +28,7 @@ link_directories("${PADDLE_LIB}/third_party/install/snappystream/lib")
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/protobuf/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/glog/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/gflags/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/xxhash/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
link_directories
(
"
${
PADDLE_LIB
}
/third_party/install/zlib/lib"
)
add_executable
(
demo_trainer demo_trainer.cc
)
add_executable
(
demo_trainer demo_trainer.cc
)
...
@@ -62,5 +64,5 @@ target_link_libraries(demo_trainer
...
@@ -62,5 +64,5 @@ target_link_libraries(demo_trainer
${
ARCHIVE_END
}
${
ARCHIVE_END
}
${
MATH_LIB
}
${
MATH_LIB
}
${
MKLDNN_LIB
}
${
MKLDNN_LIB
}
glog gflags protobuf snappystream snappy z
glog gflags protobuf snappystream snappy z
xxhash
${
EXTERNAL_LIB
}
)
${
EXTERNAL_LIB
}
)
paddle/scripts/paddle_build.sh
浏览文件 @
e0a89503
...
@@ -95,9 +95,9 @@ function cmake_gen() {
...
@@ -95,9 +95,9 @@ function cmake_gen() {
exit
1
exit
1
fi
fi
fi
fi
else
else
if
[
"
$1
"
!=
""
]
;
then
if
[
"
$1
"
!=
""
]
;
then
echo
"using python abi:
$1
"
echo
"using python abi:
$1
"
if
[
"
$1
"
==
"cp27-cp27m"
]
;
then
if
[
"
$1
"
==
"cp27-cp27m"
]
;
then
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-2.7.11-ucs2/lib:
${
LD_LIBRARY_PATH
#/opt/_internal/cpython-2.7.11-ucs4/lib
:
}
export
LD_LIBRARY_PATH
=
/opt/_internal/cpython-2.7.11-ucs2/lib:
${
LD_LIBRARY_PATH
#/opt/_internal/cpython-2.7.11-ucs4/lib
:
}
export
PATH
=
/opt/python/cp27-cp27m/bin/:
${
PATH
}
export
PATH
=
/opt/python/cp27-cp27m/bin/:
${
PATH
}
...
@@ -119,7 +119,7 @@ function cmake_gen() {
...
@@ -119,7 +119,7 @@ function cmake_gen() {
fi
fi
fi
fi
fi
fi
if
[
"
$SYSTEM
"
==
"Darwin"
]
;
then
if
[
"
$SYSTEM
"
==
"Darwin"
]
;
then
WITH_DISTRIBUTE
=
${
WITH_DISTRIBUTE
:-
ON
}
WITH_DISTRIBUTE
=
${
WITH_DISTRIBUTE
:-
ON
}
WITH_AVX
=
${
WITH_AVX
:-
ON
}
WITH_AVX
=
${
WITH_AVX
:-
ON
}
...
@@ -127,7 +127,7 @@ function cmake_gen() {
...
@@ -127,7 +127,7 @@ function cmake_gen() {
else
else
INFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
:-
/root/.cache/inference_demo
}
INFERENCE_DEMO_INSTALL_DIR
=
${
INFERENCE_DEMO_INSTALL_DIR
:-
/root/.cache/inference_demo
}
fi
fi
cat
<<
EOF
cat
<<
EOF
========================================
========================================
Configuring cmake in /paddle/build ...
Configuring cmake in /paddle/build ...
...
@@ -394,8 +394,8 @@ EOF
...
@@ -394,8 +394,8 @@ EOF
export
http_proxy
=
export
http_proxy
=
export
https_proxy
=
export
https_proxy
=
# TODO: jiabin need to refine this part when these tests fixed on mac
# TODO: jiabin need to refine this part when these tests fixed on mac
ctest
--output-on-failure
-j
$1
ctest
--output-on-failure
-j
$1
# make install should also be test when unittest
# make install should also be test when unittest
make
install
-j
8
make
install
-j
8
pip
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
pip
install
--user
${
INSTALL_PREFIX
:-
/paddle/build
}
/opt/paddle/share/wheels/
*
.whl
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
if
[[
${
WITH_FLUID_ONLY
:-
OFF
}
==
"OFF"
]]
;
then
...
@@ -659,7 +659,7 @@ function gen_fluid_lib() {
...
@@ -659,7 +659,7 @@ function gen_fluid_lib() {
Generating fluid library for train and inference ...
Generating fluid library for train and inference ...
========================================
========================================
EOF
EOF
cmake ..
-DWITH_DISTRIBUTE
=
OFF
cmake ..
-DWITH_DISTRIBUTE
=
OFF
-DON_INFER
=
ON
make
-j
`
nproc
`
fluid_lib_dist
make
-j
`
nproc
`
fluid_lib_dist
make
-j
`
nproc
`
inference_lib_dist
make
-j
`
nproc
`
inference_lib_dist
fi
fi
...
...
python/paddle/dataset/wmt16.py
浏览文件 @
e0a89503
...
@@ -78,7 +78,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
...
@@ -78,7 +78,7 @@ def __build_dict(tar_file, dict_size, save_path, lang):
six
.
iteritems
(
word_dict
),
key
=
lambda
x
:
x
[
1
],
six
.
iteritems
(
word_dict
),
key
=
lambda
x
:
x
[
1
],
reverse
=
True
)):
reverse
=
True
)):
if
idx
+
3
==
dict_size
:
break
if
idx
+
3
==
dict_size
:
break
fout
.
write
(
"%s
\n
"
%
(
word
[
0
]
))
fout
.
write
(
"%s
\n
"
%
(
cpt
.
to_bytes
(
word
[
0
])
))
def
__load_dict
(
tar_file
,
dict_size
,
lang
,
reverse
=
False
):
def
__load_dict
(
tar_file
,
dict_size
,
lang
,
reverse
=
False
):
...
...
python/paddle/fluid/evaluator.py
浏览文件 @
e0a89503
...
@@ -316,7 +316,7 @@ class DetectionMAP(Evaluator):
...
@@ -316,7 +316,7 @@ class DetectionMAP(Evaluator):
gt_label (Variable): The ground truth label index, which is a LoDTensor
gt_label (Variable): The ground truth label index, which is a LoDTensor
with shape [N, 1].
with shape [N, 1].
gt_box (Variable): The ground truth bounding box (bbox), which is a
gt_box (Variable): The ground truth bounding box (bbox), which is a
LoDTensor with shape [N,
6
]. The layout is [xmin, ymin, xmax, ymax].
LoDTensor with shape [N,
4
]. The layout is [xmin, ymin, xmax, ymax].
gt_difficult (Variable|None): Whether this ground truth is a difficult
gt_difficult (Variable|None): Whether this ground truth is a difficult
bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
it means all the ground truth labels are not difficult bbox.
it means all the ground truth labels are not difficult bbox.
...
...
python/paddle/fluid/layers/learning_rate_scheduler.py
浏览文件 @
e0a89503
...
@@ -27,7 +27,7 @@ from . import nn
...
@@ -27,7 +27,7 @@ from . import nn
from
.
import
ops
from
.
import
ops
from
.
import
tensor
from
.
import
tensor
from
..initializer
import
init_on_cpu
from
..initializer
import
init_on_cpu
from
..framework
import
default_main_program
,
Parameter
,
unique_name
from
..framework
import
default_main_program
,
Parameter
,
unique_name
,
name_scope
__all__
=
[
__all__
=
[
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
'exponential_decay'
,
'natural_exp_decay'
,
'inverse_time_decay'
,
...
@@ -332,14 +332,16 @@ def append_LARS(params_grads, learning_rate, weight_decay):
...
@@ -332,14 +332,16 @@ def append_LARS(params_grads, learning_rate, weight_decay):
return
grad_norm
+
weight_decay
*
param_norm
return
grad_norm
+
weight_decay
*
param_norm
for
param
,
grad
in
params_grads
:
for
param
,
grad
in
params_grads
:
param_lr
=
param
.
optimize_attr
[
'learning_rate'
]
with
param
.
block
.
program
.
optimized_guard
(
param_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
param
)))
[
param
,
grad
]),
name_scope
(
"optimizer"
):
grad_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
grad
)))
param_lr
=
param
.
optimize_attr
[
'learning_rate'
]
if
type
(
param_lr
)
==
float
and
param_lr
==
1.0
:
param_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
param
)))
decayed_lr
=
learning_rate
*
param_norm
\
grad_norm
=
ops
.
sqrt
(
nn
.
reduce_sum
(
input
=
ops
.
square
(
grad
)))
/
_balanced_weight
(
param_norm
,
grad_norm
)
if
type
(
param_lr
)
==
float
and
param_lr
==
1.0
:
else
:
decayed_lr
=
learning_rate
*
param_norm
\
decayed_lr
=
learning_rate
*
param_lr
*
param_norm
\
/
_balanced_weight
(
param_norm
,
grad_norm
)
/
_balanced_weight
(
param_norm
,
grad_norm
)
else
:
# set back param local learning rate
decayed_lr
=
learning_rate
*
param_lr
*
param_norm
\
param
.
optimize_attr
[
'learning_rate'
]
=
decayed_lr
/
_balanced_weight
(
param_norm
,
grad_norm
)
# set back param local learning rate
param
.
optimize_attr
[
'learning_rate'
]
=
decayed_lr
python/paddle/fluid/layers/nn.py
浏览文件 @
e0a89503
...
@@ -155,7 +155,9 @@ __all__ = [
...
@@ -155,7 +155,9 @@ __all__ = [
'sigmoid_cross_entropy_with_logits'
,
'sigmoid_cross_entropy_with_logits'
,
'maxout'
,
'maxout'
,
'space_to_depth'
,
'space_to_depth'
,
'sequence_reverse'
,
'affine_channel'
,
'affine_channel'
,
'hash'
,
]
]
...
@@ -1991,17 +1993,17 @@ def sequence_slice(input, offset, length, name=None):
...
@@ -1991,17 +1993,17 @@ def sequence_slice(input, offset, length, name=None):
"""
"""
**Sequence Slice Layer**
**Sequence Slice Layer**
The layer crops a subsequence from given sequence with given start
The layer crops a subsequence from given sequence with given start
offset and subsequence length.
offset and subsequence length.
It only supports sequence data (LoDTensor with lod_level equal to 1).
It only supports sequence data (LoDTensor with lod_level equal to 1).
.. code-block:: text
.. code-block:: text
- Case:
- Case:
Given the input Variable **input**:
Given the input Variable **input**:
input.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]],
input.data = [[a1, a2], [b1, b2], [c1, c2], [d1, d2], [e1, e2]],
input.lod = [[3, 2]],
input.lod = [[3, 2]],
input.dims = (5, 2),
input.dims = (5, 2),
...
@@ -2009,16 +2011,16 @@ def sequence_slice(input, offset, length, name=None):
...
@@ -2009,16 +2011,16 @@ def sequence_slice(input, offset, length, name=None):
with offset.data = [[0], [1]] and length.data = [[2], [1]],
with offset.data = [[0], [1]] and length.data = [[2], [1]],
the output Variable will be
the output Variable will be
out.data = [[a1, a2], [b1, b2], [e1, e2]],
out.data = [[a1, a2], [b1, b2], [e1, e2]],
out.lod = [[2, 1]],
out.lod = [[2, 1]],
out.dims = (3, 2).
out.dims = (3, 2).
NOTE: The first dimension size of **input**, **offset** and **length**
NOTE: The first dimension size of **input**, **offset** and **length**
should be equal. The **offset** should start from 0.
should be equal. The **offset** should start from 0.
Args:
Args:
input(Variable): The input Variable which consists of the complete
input(Variable): The input Variable which consists of the complete
sequences.
sequences.
offset(Variable): The offset to slice each sequence.
offset(Variable): The offset to slice each sequence.
length(Variable): The length of each subsequence.
length(Variable): The length of each subsequence.
...
@@ -2037,7 +2039,7 @@ def sequence_slice(input, offset, length, name=None):
...
@@ -2037,7 +2039,7 @@ def sequence_slice(input, offset, length, name=None):
dtype='float32', lod_level=1)
dtype='float32', lod_level=1)
offset = fluid.layers.assign(input=np.array([[0, 1]]).astype("int32"))
offset = fluid.layers.assign(input=np.array([[0, 1]]).astype("int32"))
length = fluid.layers.assign(input=np.array([[2, 1]]).astype("int32"))
length = fluid.layers.assign(input=np.array([[2, 1]]).astype("int32"))
subseqs = fluid.layers.sequence_slice(input=seqs, offset=offset,
subseqs = fluid.layers.sequence_slice(input=seqs, offset=offset,
length=length)
length=length)
"""
"""
helper
=
LayerHelper
(
"sequence_slice"
,
**
locals
())
helper
=
LayerHelper
(
"sequence_slice"
,
**
locals
())
...
@@ -2420,12 +2422,12 @@ def layer_norm(input,
...
@@ -2420,12 +2422,12 @@ def layer_norm(input,
param_attr(ParamAttr|None): The parameter attribute for the learnable
param_attr(ParamAttr|None): The parameter attribute for the learnable
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
gain :math:`g`. If :attr:`scale` is False, :attr:`param_attr` is
omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
omitted. If :attr:`scale` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as scale. The
a default :code:`ParamAttr` would be added as scale. The
:attr:`param_attr` is initialized as 1 if it is added. Default None.
:attr:`param_attr` is initialized as 1 if it is added. Default None.
bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias_attr(ParamAttr|None): The parameter attribute for the learnable
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
bias :math:`b`. If :attr:`shift` is False, :attr:`bias_attr` is
omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
omitted. If :attr:`shift` is True and :attr:`param_attr` is None,
a default :code:`ParamAttr` would be added as bias. The
a default :code:`ParamAttr` would be added as bias. The
:attr:`bias_attr` is initialized as 0 if it is added. Default None.
:attr:`bias_attr` is initialized as 0 if it is added. Default None.
act(str): Activation to be applied to the output of layer normalizaiton.
act(str): Activation to be applied to the output of layer normalizaiton.
Default None.
Default None.
...
@@ -3043,8 +3045,8 @@ def sequence_unpad(x, length, name=None):
...
@@ -3043,8 +3045,8 @@ def sequence_unpad(x, length, name=None):
"""
"""
**Sequence Unpad Layer**
**Sequence Unpad Layer**
This layer removes the padding data in the input sequences and convert
This layer removes the padding data in the input sequences and convert
them into sequences with actual length as output, identitied by lod
them into sequences with actual length as output, identitied by lod
information.
information.
.. code-block:: text
.. code-block:: text
...
@@ -3054,9 +3056,9 @@ def sequence_unpad(x, length, name=None):
...
@@ -3054,9 +3056,9 @@ def sequence_unpad(x, length, name=None):
Given input Variable **x**:
Given input Variable **x**:
x.data = [[ 1.0, 2.0, 3.0, 4.0, 5.0],
x.data = [[ 1.0, 2.0, 3.0, 4.0, 5.0],
[ 6.0, 7.0, 8.0, 9.0, 10.0],
[ 6.0, 7.0, 8.0, 9.0, 10.0],
[11.0, 12.0, 13.0, 14.0, 15.0]],
[11.0, 12.0, 13.0, 14.0, 15.0]],
in which there are 3 sequences padded to length 5, and the acutal length
in which there are 3 sequences padded to length 5, and the acutal length
specified by input Variable **length**:
specified by input Variable **length**:
length.data = [[2], [3], [4]],
length.data = [[2], [3], [4]],
...
@@ -3064,7 +3066,7 @@ def sequence_unpad(x, length, name=None):
...
@@ -3064,7 +3066,7 @@ def sequence_unpad(x, length, name=None):
after unpadding, the output Variable will be:
after unpadding, the output Variable will be:
out.data = [[1.0, 2.0, 6.0, 7.0, 8.0, 11.0, 12.0, 13.0, 14.0]]
out.data = [[1.0, 2.0, 6.0, 7.0, 8.0, 11.0, 12.0, 13.0, 14.0]]
out.lod = [[2, 3, 4]]
out.lod = [[2, 3, 4]]
Args:
Args:
x(Variable): Input Variable which contains the padded sequences with
x(Variable): Input Variable which contains the padded sequences with
...
@@ -5499,9 +5501,9 @@ def roi_align(input,
...
@@ -5499,9 +5501,9 @@ def roi_align(input,
Examples:
Examples:
.. code-block:: python
.. code-block:: python
align_out = fluid.layers.roi_align(input=x,
align_out = fluid.layers.roi_align(input=x,
rois=rois,
rois=rois,
pooled_height=7,
pooled_height=7,
pooled_width=7,
pooled_width=7,
spatial_scale=0.5,
spatial_scale=0.5,
sampling_ratio=-1)
sampling_ratio=-1)
...
@@ -7538,13 +7540,40 @@ def space_to_depth(x, blocksize, name=None):
...
@@ -7538,13 +7540,40 @@ def space_to_depth(x, blocksize, name=None):
return
out
return
out
@
templatedoc
()
def
sequence_reverse
(
x
,
name
=
None
):
"""
${comment}
Args:
x(${x_type}): ${x_comment}
name(basestring|None): Name of the output.
Returns:
out(${y_type}): ${y_comment}
"""
helper
=
LayerHelper
(
"sequence_reverse"
,
**
locals
())
if
name
is
None
:
out
=
helper
.
create_variable_for_type_inference
(
dtype
=
x
.
dtype
)
else
:
out
=
helper
.
create_variable
(
name
=
name
,
dtype
=
x
.
dtype
,
persistable
=
False
)
helper
.
append_op
(
type
=
"sequence_reverse"
,
inputs
=
{
"X"
:
x
},
outputs
=
{
"Y"
:
out
},
attrs
=
dict
())
return
out
def
affine_channel
(
x
,
scale
=
None
,
bias
=
None
,
data_layout
=
'NCHW'
,
name
=
None
):
def
affine_channel
(
x
,
scale
=
None
,
bias
=
None
,
data_layout
=
'NCHW'
,
name
=
None
):
"""
"""
Applies a separate affine transformation to each channel of the input.
Applies a separate affine transformation to each channel of the input.
Useful for replacing spatial batch norm with its equivalent fixed
Useful for replacing spatial batch norm with its equivalent fixed
transformation. The input also can be 2D tensor and applies a affine
transformation. The input also can be 2D tensor and applies a affine
transformation in second dimension.
transformation in second dimension.
Args:
Args:
x (Variable): Feature map input can be a 4D tensor with order NCHW
x (Variable): Feature map input can be a 4D tensor with order NCHW
or NHWC. It also can be a 2D tensor and the affine transformation
or NHWC. It also can be a 2D tensor and the affine transformation
...
@@ -7577,3 +7606,31 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
...
@@ -7577,3 +7606,31 @@ def affine_channel(x, scale=None, bias=None, data_layout='NCHW', name=None):
attrs
=
{
"data_layout"
:
data_layout
},
attrs
=
{
"data_layout"
:
data_layout
},
outputs
=
{
"Out"
:
out
})
outputs
=
{
"Out"
:
out
})
return
out
return
out
def
hash
(
input
,
hash_size
,
num_hash
=
1
,
name
=
None
):
"""
hash the input
Args:
input (Variable): The input variable which is a one-hot word.
hash_size (int): The space size for hash algorithm.
num_hash (int): The times of hash, default 1.
name (str, default None): The name of this layer.
Returns:
Variable: The hash result variable which is a LoDTensor.
Examples:
.. code-block:: python
word_dict = paddle.dataset.imdb.word_dict()
x = fluid.layers.data(shape[1], dtype='int32', lod_level=1)
out = fluid.layers.hash(input=x, len(word_dict))
"""
helper
=
LayerHelper
(
'hash'
,
**
locals
())
out
=
helper
.
create_variable_for_type_inference
(
helper
.
input_dtype
(),
stop_gradient
=
True
)
helper
.
append_op
(
type
=
'hash'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'num_hash'
:
num_hash
,
'mod_by'
:
hash_size
})
return
out
python/paddle/fluid/metrics.py
浏览文件 @
e0a89503
...
@@ -13,8 +13,6 @@
...
@@ -13,8 +13,6 @@
# limitations under the License.
# limitations under the License.
"""
"""
Fluid Metrics
Fluid Metrics
The metrics are accomplished via Python natively.
"""
"""
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -24,6 +22,12 @@ import copy
...
@@ -24,6 +22,12 @@ import copy
import
warnings
import
warnings
import
six
import
six
from
.layer_helper
import
LayerHelper
from
.initializer
import
Constant
from
.
import
unique_name
from
.framework
import
Program
,
Variable
,
program_guard
from
.
import
layers
__all__
=
[
__all__
=
[
'MetricBase'
,
'MetricBase'
,
'CompositeMetric'
,
'CompositeMetric'
,
...
@@ -474,71 +478,10 @@ class EditDistance(MetricBase):
...
@@ -474,71 +478,10 @@ class EditDistance(MetricBase):
"There is no data in EditDistance Metric. Please check layers.edit_distance output has been added to EditDistance."
"There is no data in EditDistance Metric. Please check layers.edit_distance output has been added to EditDistance."
)
)
avg_distance
=
self
.
total_distance
/
self
.
seq_num
avg_distance
=
self
.
total_distance
/
self
.
seq_num
avg_instance_error
=
self
.
instance_error
/
self
.
seq_num
avg_instance_error
=
self
.
instance_error
/
float
(
self
.
seq_num
)
return
avg_distance
,
avg_instance_error
return
avg_distance
,
avg_instance_error
class
DetectionMAP
(
MetricBase
):
"""
Calculate the detection mean average precision (mAP).
mAP is the metric to measure the accuracy of object detectors
like Faster R-CNN, SSD, etc.
It is the average of the maximum precisions at different recall values.
Please get more information from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
The general steps are as follows:
1. calculate the true positive and false positive according to the input
of detection and labels.
2. calculate mAP value, support two versions: '11 point' and 'integral'.
Examples:
.. code-block:: python
pred = fluid.layers.fc(input=data, size=1000, act="tanh")
batch_map = layers.detection_map(
input,
label,
class_num,
background_label,
overlap_threshold=overlap_threshold,
evaluate_difficult=evaluate_difficult,
ap_version=ap_version)
metric = fluid.metrics.DetectionMAP()
for data in train_reader():
loss, preds, labels = exe.run(fetch_list=[cost, batch_map])
batch_size = data[0]
metric.update(value=batch_map, weight=batch_size)
numpy_map = metric.eval()
"""
def
__init__
(
self
,
name
=
None
):
super
(
DetectionMAP
,
self
).
__init__
(
name
)
# the current map value
self
.
value
=
.
0
self
.
weight
=
.
0
def
update
(
self
,
value
,
weight
):
if
not
_is_number_or_matrix_
(
value
):
raise
ValueError
(
"The 'value' must be a number(int, float) or a numpy ndarray."
)
if
not
_is_number_
(
weight
):
raise
ValueError
(
"The 'weight' must be a number(int, float)."
)
self
.
value
+=
value
self
.
weight
+=
weight
def
eval
(
self
):
if
self
.
weight
==
0
:
raise
ValueError
(
"There is no data in DetectionMAP Metrics. "
"Please check layers.detection_map output has added to DetectionMAP."
)
return
self
.
value
/
self
.
weight
class
Auc
(
MetricBase
):
class
Auc
(
MetricBase
):
"""
"""
Auc metric adapts to the binary classification.
Auc metric adapts to the binary classification.
...
@@ -616,3 +559,179 @@ class Auc(MetricBase):
...
@@ -616,3 +559,179 @@ class Auc(MetricBase):
idx
-=
1
idx
-=
1
return
auc
/
tot_pos
/
tot_neg
if
tot_pos
>
0.0
and
tot_neg
>
0.0
else
0.0
return
auc
/
tot_pos
/
tot_neg
if
tot_pos
>
0.0
and
tot_neg
>
0.0
else
0.0
class
DetectionMAP
(
object
):
"""
Calculate the detection mean average precision (mAP).
The general steps are as follows:
1. calculate the true positive and false positive according to the input
of detection and labels.
2. calculate mAP value, support two versions: '11 point' and 'integral'.
Please get more information from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
Args:
input (Variable): The detection results, which is a LoDTensor with shape
[M, 6]. The layout is [label, confidence, xmin, ymin, xmax, ymax].
gt_label (Variable): The ground truth label index, which is a LoDTensor
with shape [N, 1].
gt_box (Variable): The ground truth bounding box (bbox), which is a
LoDTensor with shape [N, 4]. The layout is [xmin, ymin, xmax, ymax].
gt_difficult (Variable|None): Whether this ground truth is a difficult
bounding bbox, which can be a LoDTensor [N, 1] or not set. If None,
it means all the ground truth labels are not difficult bbox.
class_num (int): The class number.
background_label (int): The index of background label, the background
label will be ignored. If set to -1, then all categories will be
considered, 0 by defalut.
overlap_threshold (float): The threshold for deciding true/false
positive, 0.5 by defalut.
evaluate_difficult (bool): Whether to consider difficult ground truth
for evaluation, True by defalut. This argument does not work when
gt_difficult is None.
ap_version (string): The average precision calculation ways, it must be
'integral' or '11point'. Please check
https://sanchom.wordpress.com/tag/average-precision/ for details.
- 11point: the 11-point interpolated average precision.
- integral: the natural integral of the precision-recall curve.
Examples:
.. code-block:: python
exe = fluid.Executor(place)
map_evaluator = fluid.Evaluator.DetectionMAP(input,
gt_label, gt_box, gt_difficult)
cur_map, accum_map = map_evaluator.get_map_var()
fetch = [cost, cur_map, accum_map]
for epoch in PASS_NUM:
map_evaluator.reset(exe)
for data in batches:
loss, cur_map_v, accum_map_v = exe.run(fetch_list=fetch)
In the above example:
'cur_map_v' is the mAP of current mini-batch.
'accum_map_v' is the accumulative mAP of one pass.
"""
def
__init__
(
self
,
input
,
gt_label
,
gt_box
,
gt_difficult
=
None
,
class_num
=
None
,
background_label
=
0
,
overlap_threshold
=
0.5
,
evaluate_difficult
=
True
,
ap_version
=
'integral'
):
self
.
helper
=
LayerHelper
(
'map_eval'
)
gt_label
=
layers
.
cast
(
x
=
gt_label
,
dtype
=
gt_box
.
dtype
)
if
gt_difficult
:
gt_difficult
=
layers
.
cast
(
x
=
gt_difficult
,
dtype
=
gt_box
.
dtype
)
label
=
layers
.
concat
([
gt_label
,
gt_difficult
,
gt_box
],
axis
=
1
)
else
:
label
=
layers
.
concat
([
gt_label
,
gt_box
],
axis
=
1
)
# calculate mean average precision (mAP) of current mini-batch
map
=
layers
.
detection_map
(
input
,
label
,
class_num
,
background_label
,
overlap_threshold
=
overlap_threshold
,
evaluate_difficult
=
evaluate_difficult
,
ap_version
=
ap_version
)
states
=
[]
states
.
append
(
self
.
_create_state
(
dtype
=
'int32'
,
shape
=
None
,
suffix
=
'accum_pos_count'
))
states
.
append
(
self
.
_create_state
(
dtype
=
'float32'
,
shape
=
None
,
suffix
=
'accum_true_pos'
))
states
.
append
(
self
.
_create_state
(
dtype
=
'float32'
,
shape
=
None
,
suffix
=
'accum_false_pos'
))
var
=
self
.
_create_state
(
dtype
=
'int32'
,
shape
=
[
1
],
suffix
=
'has_state'
)
self
.
helper
.
set_variable_initializer
(
var
,
initializer
=
Constant
(
value
=
int
(
0
)))
self
.
has_state
=
var
# calculate accumulative mAP
accum_map
=
layers
.
detection_map
(
input
,
label
,
class_num
,
background_label
,
overlap_threshold
=
overlap_threshold
,
evaluate_difficult
=
evaluate_difficult
,
has_state
=
self
.
has_state
,
input_states
=
states
,
out_states
=
states
,
ap_version
=
ap_version
)
layers
.
fill_constant
(
shape
=
self
.
has_state
.
shape
,
value
=
1
,
dtype
=
self
.
has_state
.
dtype
,
out
=
self
.
has_state
)
self
.
cur_map
=
map
self
.
accum_map
=
accum_map
def
_create_state
(
self
,
suffix
,
dtype
,
shape
):
"""
Create state variable.
Args:
suffix(str): the state suffix.
dtype(str|core.VarDesc.VarType): the state data type
shape(tuple|list): the shape of state
Returns: State variable
"""
state
=
self
.
helper
.
create_variable
(
name
=
"_"
.
join
([
unique_name
.
generate
(
self
.
helper
.
name
),
suffix
]),
persistable
=
True
,
dtype
=
dtype
,
shape
=
shape
)
return
state
def
get_map_var
(
self
):
"""
Returns: mAP variable of current mini-batch and
accumulative mAP variable cross mini-batches.
"""
return
self
.
cur_map
,
self
.
accum_map
def
reset
(
self
,
executor
,
reset_program
=
None
):
"""
Reset metric states at the begin of each pass/user specified batch.
Args:
executor(Executor): a executor for executing
the reset_program.
reset_program(Program|None): a single Program for reset process.
If None, will create a Program.
"""
def
_clone_var_
(
block
,
var
):
assert
isinstance
(
var
,
Variable
)
return
block
.
create_var
(
name
=
var
.
name
,
shape
=
var
.
shape
,
dtype
=
var
.
dtype
,
type
=
var
.
type
,
lod_level
=
var
.
lod_level
,
persistable
=
var
.
persistable
)
if
reset_program
is
None
:
reset_program
=
Program
()
with
program_guard
(
main_program
=
reset_program
):
var
=
_clone_var_
(
reset_program
.
current_block
(),
self
.
has_state
)
layers
.
fill_constant
(
shape
=
var
.
shape
,
value
=
0
,
dtype
=
var
.
dtype
,
out
=
var
)
executor
.
run
(
reset_program
)
python/paddle/fluid/optimizer.py
浏览文件 @
e0a89503
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
re
import
re
import
sys
from
collections
import
defaultdict
from
collections
import
defaultdict
from
paddle.fluid.framework
import
Program
,
Variable
,
name_scope
,
default_main_program
from
paddle.fluid.framework
import
Program
,
Variable
,
name_scope
,
default_main_program
from
.
import
framework
from
.
import
framework
...
@@ -32,7 +33,8 @@ __all__ = [
...
@@ -32,7 +33,8 @@ __all__ = [
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'Ftrl'
,
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'Ftrl'
,
'SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
,
'AdamOptimizer'
,
'SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
,
'AdamOptimizer'
,
'AdamaxOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'AdamaxOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'FtrlOptimizer'
,
'Adadelta'
,
'ModelAverage'
,
'RMSPropOptimizer'
'FtrlOptimizer'
,
'Adadelta'
,
'ModelAverage'
,
'LarsMomentum'
,
'LarsMomentumOptimizer'
]
]
...
@@ -105,7 +107,6 @@ class Optimizer(object):
...
@@ -105,7 +107,6 @@ class Optimizer(object):
param
=
param_and_grad
[
0
]
param
=
param_and_grad
[
0
]
param_lr
=
param
.
optimize_attr
[
'learning_rate'
]
param_lr
=
param
.
optimize_attr
[
'learning_rate'
]
if
type
(
param_lr
)
==
Variable
:
if
type
(
param_lr
)
==
Variable
:
print
(
"returns updated param lr "
,
param_lr
)
return
param_lr
return
param_lr
else
:
else
:
if
param_lr
==
1.0
:
if
param_lr
==
1.0
:
...
@@ -400,6 +401,91 @@ class MomentumOptimizer(Optimizer):
...
@@ -400,6 +401,91 @@ class MomentumOptimizer(Optimizer):
return
momentum_op
return
momentum_op
class
LarsMomentumOptimizer
(
Optimizer
):
"""
Momentum optimizer with LARS support
The update equations are as follows:
.. math::
& local\_learning\_rate = learning\_rate * lars\_coeff *
\\
\\
frac{||param||}{||gradient|| + lars\_weight\_decay * ||param||}
& velocity = mu * velocity + local\_learning\_rate * (gradient + lars\_weight\_decay * param)
& param = param - velocity
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
momentum (float): momentum factor
lars_coeff (float): defines how much we trust the layer to change its weights.
lars_weight_decay (float): weight decay coefficient for decaying using LARS.
regularization: A Regularizer, such as
fluid.regularizer.L2DecayRegularizer.
name: A optional name prefix.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.LarsMomentum(learning_rate=0.2, momentum=0.1, lars_weight_decay=0.001)
optimizer.minimize(cost)
"""
_velocity_acc_str
=
"velocity"
def
__init__
(
self
,
learning_rate
,
momentum
,
lars_coeff
=
0.001
,
lars_weight_decay
=
0.0005
,
regularization
=
None
,
name
=
None
):
assert
learning_rate
is
not
None
assert
momentum
is
not
None
super
(
LarsMomentumOptimizer
,
self
).
__init__
(
learning_rate
=
learning_rate
,
regularization
=
regularization
,
name
=
name
)
self
.
type
=
"lars_momentum"
self
.
_momentum
=
momentum
self
.
_lars_coeff
=
float
(
lars_coeff
)
self
.
_lars_weight_decay
=
float
(
lars_weight_decay
)
def
_create_accumulators
(
self
,
block
,
parameters
):
assert
isinstance
(
block
,
framework
.
Block
)
for
p
in
parameters
:
self
.
_add_accumulator
(
self
.
_velocity_acc_str
,
p
)
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
assert
isinstance
(
block
,
framework
.
Block
)
velocity_acc
=
self
.
_get_accumulator
(
self
.
_velocity_acc_str
,
param_and_grad
[
0
])
# create the momentum optimize op
momentum_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Grad"
:
param_and_grad
[
1
],
"Velocity"
:
velocity_acc
,
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
)
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"VelocityOut"
:
velocity_acc
},
attrs
=
{
"mu"
:
self
.
_momentum
,
"lars_coeff"
:
self
.
_lars_coeff
,
"lars_weight_decay"
:
self
.
_lars_weight_decay
})
return
momentum_op
class
AdagradOptimizer
(
Optimizer
):
class
AdagradOptimizer
(
Optimizer
):
"""
"""
**Adaptive Gradient Algorithm (Adagrad)**
**Adaptive Gradient Algorithm (Adagrad)**
...
@@ -1221,6 +1307,7 @@ DecayedAdagrad = DecayedAdagradOptimizer
...
@@ -1221,6 +1307,7 @@ DecayedAdagrad = DecayedAdagradOptimizer
Adadelta
=
AdadeltaOptimizer
Adadelta
=
AdadeltaOptimizer
RMSProp
=
RMSPropOptimizer
RMSProp
=
RMSPropOptimizer
Ftrl
=
FtrlOptimizer
Ftrl
=
FtrlOptimizer
LarsMomentum
=
LarsMomentumOptimizer
class
ModelAverage
(
Optimizer
):
class
ModelAverage
(
Optimizer
):
...
...
python/paddle/fluid/tests/unittests/dist_mnist.py
浏览文件 @
e0a89503
...
@@ -95,7 +95,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
...
@@ -95,7 +95,7 @@ class TestDistMnist2x2(TestDistRunnerBase):
# Reader
# Reader
train_reader
=
paddle
.
batch
(
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
t
rain
(),
batch_size
=
batch_size
)
paddle
.
dataset
.
mnist
.
t
est
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
opt
.
minimize
(
avg_cost
)
opt
.
minimize
(
avg_cost
)
...
...
python/paddle/fluid/tests/unittests/dist_mnist_batch_merge.py
0 → 100644
浏览文件 @
e0a89503
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
time
import
math
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
from
paddle.fluid
import
core
import
unittest
from
multiprocessing
import
Process
import
os
import
signal
from
functools
import
reduce
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
from
dist_mnist
import
cnn_model
DTYPE
=
"float32"
def
test_merge_reader
(
repeat_batch_size
=
8
):
orig_reader
=
paddle
.
dataset
.
mnist
.
test
()
record_batch
=
[]
b
=
0
for
d
in
orig_reader
():
if
b
>=
repeat_batch_size
:
break
record_batch
.
append
(
d
)
b
+=
1
while
True
:
for
d
in
record_batch
:
yield
d
class
TestDistMnist2x2
(
TestDistRunnerBase
):
def
get_model
(
self
,
batch_size
=
2
):
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
predict
=
cnn_model
(
images
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
# Evaluator
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
total
=
batch_size_tensor
)
inference_program
=
fluid
.
default_main_program
().
clone
()
# Optimization
opt
=
fluid
.
optimizer
.
Momentum
(
learning_rate
=
0.001
,
momentum
=
0.9
)
# Reader
train_reader
=
paddle
.
batch
(
test_merge_reader
,
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
opt
.
minimize
(
avg_cost
)
return
inference_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
if
__name__
==
"__main__"
:
runtime_main
(
TestDistMnist2x2
)
python/paddle/fluid/tests/unittests/dist_mnist_lars.py
0 → 100644
浏览文件 @
e0a89503
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
numpy
as
np
import
argparse
import
time
import
math
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.profiler
as
profiler
from
paddle.fluid
import
core
import
unittest
from
multiprocessing
import
Process
import
os
import
signal
from
functools
import
reduce
from
test_dist_base
import
TestDistRunnerBase
,
runtime_main
from
dist_mnist
import
cnn_model
DTYPE
=
"float32"
paddle
.
dataset
.
mnist
.
fetch
()
# Fix seed for test
fluid
.
default_startup_program
().
random_seed
=
1
fluid
.
default_main_program
().
random_seed
=
1
class
TestDistMnist2x2
(
TestDistRunnerBase
):
def
get_model
(
self
,
batch_size
=
2
):
# Input data
images
=
fluid
.
layers
.
data
(
name
=
'pixel'
,
shape
=
[
1
,
28
,
28
],
dtype
=
DTYPE
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
1
],
dtype
=
'int64'
)
# Train program
predict
=
cnn_model
(
images
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
predict
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
# Evaluator
batch_size_tensor
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
batch_acc
=
fluid
.
layers
.
accuracy
(
input
=
predict
,
label
=
label
,
total
=
batch_size_tensor
)
inference_program
=
fluid
.
default_main_program
().
clone
()
# Optimization
opt
=
fluid
.
optimizer
.
LarsMomentumOptimizer
(
learning_rate
=
0.001
,
momentum
=
0.9
)
# Reader
train_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
test_reader
=
paddle
.
batch
(
paddle
.
dataset
.
mnist
.
test
(),
batch_size
=
batch_size
)
opt
.
minimize
(
avg_cost
)
return
inference_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
if
__name__
==
"__main__"
:
runtime_main
(
TestDistMnist2x2
)
python/paddle/fluid/tests/unittests/dist_transformer.py
浏览文件 @
e0a89503
...
@@ -1159,6 +1159,7 @@ def prepare_encoder(src_word,
...
@@ -1159,6 +1159,7 @@ def prepare_encoder(src_word,
name
=
pos_enc_param_name
,
name
=
pos_enc_param_name
,
trainable
=
False
,
trainable
=
False
,
initializer
=
fluid
.
initializer
.
ConstantInitializer
(
0.001
)))
initializer
=
fluid
.
initializer
.
ConstantInitializer
(
0.001
)))
src_pos_enc
.
stop_gradient
=
True
enc_input
=
src_word_emb
+
src_pos_enc
enc_input
=
src_word_emb
+
src_pos_enc
return
layers
.
dropout
(
return
layers
.
dropout
(
enc_input
,
enc_input
,
...
...
python/paddle/fluid/tests/unittests/test_dist_base.py
浏览文件 @
e0a89503
...
@@ -26,10 +26,11 @@ import argparse
...
@@ -26,10 +26,11 @@ import argparse
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
RUN_STEP
=
10
RUN_STEP
=
10
DEFAULT_BATCH_SIZE
=
2
class
TestDistRunnerBase
(
object
):
class
TestDistRunnerBase
(
object
):
def
get_model
(
self
,
batch_size
=
2
):
def
get_model
(
self
,
batch_size
=
DEFAULT_BATCH_SIZE
):
raise
NotImplementedError
(
raise
NotImplementedError
(
"get_model should be implemented by child classes."
)
"get_model should be implemented by child classes."
)
...
@@ -48,8 +49,7 @@ class TestDistRunnerBase(object):
...
@@ -48,8 +49,7 @@ class TestDistRunnerBase(object):
return
t
return
t
def
run_pserver
(
self
,
args
):
def
run_pserver
(
self
,
args
):
self
.
get_model
(
batch_size
=
args
.
batch_size
)
self
.
get_model
(
batch_size
=
2
)
# NOTE: pserver should not call memory optimize
# NOTE: pserver should not call memory optimize
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
t
=
self
.
get_transpiler
(
args
.
trainer_id
,
fluid
.
default_main_program
(),
args
.
endpoints
,
fluid
.
default_main_program
(),
args
.
endpoints
,
...
@@ -65,7 +65,7 @@ class TestDistRunnerBase(object):
...
@@ -65,7 +65,7 @@ class TestDistRunnerBase(object):
def
run_trainer
(
self
,
args
):
def
run_trainer
(
self
,
args
):
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
test_program
,
avg_cost
,
train_reader
,
test_reader
,
batch_acc
,
predict
=
\
self
.
get_model
(
batch_size
=
2
)
self
.
get_model
(
batch_size
=
args
.
batch_size
)
if
args
.
mem_opt
:
if
args
.
mem_opt
:
fluid
.
memory_optimize
(
fluid
.
default_main_program
(),
skip_grads
=
True
)
fluid
.
memory_optimize
(
fluid
.
default_main_program
(),
skip_grads
=
True
)
...
@@ -92,6 +92,11 @@ class TestDistRunnerBase(object):
...
@@ -92,6 +92,11 @@ class TestDistRunnerBase(object):
strategy
.
allow_op_delay
=
False
strategy
.
allow_op_delay
=
False
build_stra
=
fluid
.
BuildStrategy
()
build_stra
=
fluid
.
BuildStrategy
()
if
args
.
batch_merge_repeat
>
1
:
pass_builder
=
build_stra
.
_create_passes_from_strategy
()
mypass
=
pass_builder
.
insert_pass
(
len
(
pass_builder
.
all_passes
())
-
2
,
"multi_batch_merge_pass"
)
mypass
.
set_int
(
"num_repeats"
,
args
.
batch_merge_repeat
)
if
args
.
use_reduce
:
if
args
.
use_reduce
:
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
build_stra
.
reduce_strategy
=
fluid
.
BuildStrategy
.
ReduceStrategy
.
Reduce
...
@@ -145,6 +150,9 @@ def runtime_main(test_class):
...
@@ -145,6 +150,9 @@ def runtime_main(test_class):
parser
.
add_argument
(
'--use_reduce'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--use_reduce'
,
action
=
'store_true'
)
parser
.
add_argument
(
parser
.
add_argument
(
'--use_reader_alloc'
,
action
=
'store_true'
,
required
=
False
,
default
=
True
)
'--use_reader_alloc'
,
action
=
'store_true'
,
required
=
False
,
default
=
True
)
parser
.
add_argument
(
'--batch_size'
,
required
=
False
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--batch_merge_repeat'
,
required
=
False
,
type
=
int
,
default
=
1
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
...
@@ -244,9 +252,18 @@ class TestDistBase(unittest.TestCase):
...
@@ -244,9 +252,18 @@ class TestDistBase(unittest.TestCase):
(
e
,
retry_times
))
(
e
,
retry_times
))
retry_times
-=
1
retry_times
-=
1
def
_run_local
(
self
,
model
,
envs
,
check_error_log
):
def
_run_local
(
self
,
model
,
envs
,
check_error_log
=
False
,
batch_size
=
DEFAULT_BATCH_SIZE
,
batch_merge_repeat
=
1
):
cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model
)
cmd
=
"%s %s --role trainer"
%
(
self
.
_python_interp
,
model
)
if
batch_size
!=
DEFAULT_BATCH_SIZE
:
cmd
+=
" --batch_size %d"
%
batch_size
if
batch_merge_repeat
>
1
:
cmd
+=
" --batch_merge_repeat %d"
%
batch_merge_repeat
if
self
.
__use_cuda
:
if
self
.
__use_cuda
:
cmd
+=
" --use_cuda"
cmd
+=
" --use_cuda"
...
...
python/paddle/fluid/tests/unittests/test_dist_ctr.py
浏览文件 @
e0a89503
...
@@ -23,9 +23,8 @@ class TestDistCTR2x2(TestDistBase):
...
@@ -23,9 +23,8 @@ class TestDistCTR2x2(TestDistBase):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
self
.
_enforce_place
=
"CPU"
self
.
_enforce_place
=
"CPU"
def
test_dist_ctr
(
self
):
def
test_dist_ctr
(
self
):
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
)
self
.
check_with_place
(
"dist_ctr.py"
,
delta
=
1e-7
,
check_error_log
=
False
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/test_dist_mnist.py
浏览文件 @
e0a89503
...
@@ -26,6 +26,15 @@ class TestDistMnist2x2(TestDistBase):
...
@@ -26,6 +26,15 @@ class TestDistMnist2x2(TestDistBase):
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1e-5
)
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
1e-5
)
class
TestDistMnist2x2Lars
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_reduce
=
False
def
test_se_resnext
(
self
):
self
.
check_with_place
(
"dist_mnist_lars.py"
,
delta
=
1e-5
)
class
TestDistMnist2x2WithMemopt
(
TestDistBase
):
class
TestDistMnist2x2WithMemopt
(
TestDistBase
):
def
_setup_config
(
self
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_sync_mode
=
True
...
@@ -40,8 +49,7 @@ class TestDistMnistAsync(TestDistBase):
...
@@ -40,8 +49,7 @@ class TestDistMnistAsync(TestDistBase):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_use_reduce
=
False
self
.
_use_reduce
=
False
# FIXME(typhoonzero): fix async mode test later
def
test_dist_train
(
self
):
def
no_test_dist_train
(
self
):
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
200
)
self
.
check_with_place
(
"dist_mnist.py"
,
delta
=
200
)
...
...
python/paddle/fluid/tests/unittests/test_dist_mnist_batch_merge.py
0 → 100644
浏览文件 @
e0a89503
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
unittest
from
test_dist_base
import
TestDistBase
import
os
class
TestDistMnist2x2
(
TestDistBase
):
def
_setup_config
(
self
):
self
.
_sync_mode
=
True
self
.
_use_reduce
=
False
def
test_dist_train
(
self
):
self
.
check_with_place
(
"dist_mnist_batch_merge.py"
,
delta
=
1e-5
)
def
check_with_place
(
self
,
model_file
,
delta
=
1e-3
,
check_error_log
=
False
,
need_envs
=
{}):
# TODO(typhoonzero): should auto adapt GPU count on the machine.
required_envs
=
{
"PATH"
:
os
.
getenv
(
"PATH"
,
""
),
"PYTHONPATH"
:
os
.
getenv
(
"PYTHONPATH"
,
""
),
"LD_LIBRARY_PATH"
:
os
.
getenv
(
"LD_LIBRARY_PATH"
,
""
),
"FLAGS_fraction_of_gpu_memory_to_use"
:
"0.15"
,
"FLAGS_cudnn_deterministic"
:
"1"
,
}
required_envs
.
update
(
need_envs
)
if
check_error_log
:
required_envs
[
"GLOG_v"
]
=
"7"
required_envs
[
"GLOG_logtostderr"
]
=
"1"
no_merge_losses
=
self
.
_run_local
(
model_file
,
required_envs
,
check_error_log
=
check_error_log
,
batch_size
=
4
)
batch_merge_losses
=
self
.
_run_local
(
model_file
,
required_envs
,
check_error_log
=
check_error_log
,
batch_size
=
2
,
batch_merge_repeat
=
2
)
# Ensure both result have values.
self
.
assertGreater
(
len
(
no_merge_losses
),
1
)
self
.
assertEqual
(
len
(
no_merge_losses
),
len
(
batch_merge_losses
))
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_dist_se_resnext.py
浏览文件 @
e0a89503
...
@@ -40,8 +40,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
...
@@ -40,8 +40,7 @@ class TestDistSeResneXt2x2Async(TestDistBase):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_use_reader_alloc
=
False
self
.
_use_reader_alloc
=
False
#FIXME(typhoonzero): fix async mode later
def
test_dist_train
(
self
):
def
no_test_dist_train
(
self
):
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
self
.
check_with_place
(
"dist_se_resnext.py"
,
delta
=
100
)
...
...
python/paddle/fluid/tests/unittests/test_dist_simnet_bow.py
浏览文件 @
e0a89503
...
@@ -79,8 +79,7 @@ class TestDistSimnetBow2x2SparseAsync(TestDistBase):
...
@@ -79,8 +79,7 @@ class TestDistSimnetBow2x2SparseAsync(TestDistBase):
self
.
_sync_mode
=
False
self
.
_sync_mode
=
False
self
.
_enforce_place
=
"CPU"
self
.
_enforce_place
=
"CPU"
#FIXME(typhoonzero): fix async tests later
def
test_simnet_bow
(
self
):
def
no_test_simnet_bow
(
self
):
need_envs
=
{
need_envs
=
{
"IS_DISTRIBUTED"
:
'0'
,
"IS_DISTRIBUTED"
:
'0'
,
"IS_SPARSE"
:
'1'
,
"IS_SPARSE"
:
'1'
,
...
...
python/paddle/fluid/tests/unittests/test_hash_op.py
0 → 100644
浏览文件 @
e0a89503
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
class
TestScaleOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"hash"
self
.
init_test_case
()
self
.
inputs
=
{
'X'
:
(
self
.
in_seq
,
self
.
lod
)}
self
.
attrs
=
{
'num_hash'
:
4
,
'mod_by'
:
10000
}
self
.
outputs
=
{
'Out'
:
(
self
.
out_seq
,
self
.
lod
)}
def
init_test_case
(
self
):
np
.
random
.
seed
=
1
self
.
in_seq
=
np
.
random
.
randint
(
0
,
10
,
(
30
,
1
)).
astype
(
"int32"
)
self
.
lod
=
[[
9
,
4
,
11
,
6
]]
# self.out_seq = np.ones([30, 4, 1], dtype=np.int32)
self
.
out_seq
=
[
[[
9662
],
[
9217
],
[
1129
],
[
8487
]],
[[
9662
],
[
9217
],
[
1129
],
[
8487
]],
[[
8310
],
[
1327
],
[
1654
],
[
4567
]],
[[
6897
],
[
3218
],
[
2013
],
[
1241
]],
[[
9407
],
[
6715
],
[
6949
],
[
8094
]],
[[
8473
],
[
694
],
[
5142
],
[
2479
]],
[[
8310
],
[
1327
],
[
1654
],
[
4567
]],
[[
6897
],
[
3218
],
[
2013
],
[
1241
]],
[[
4372
],
[
9456
],
[
8204
],
[
6695
]],
[[
6897
],
[
3218
],
[
2013
],
[
1241
]],
[[
8473
],
[
694
],
[
5142
],
[
2479
]],
[[
4372
],
[
9456
],
[
8204
],
[
6695
]],
[[
4372
],
[
9456
],
[
8204
],
[
6695
]],
[[
8473
],
[
694
],
[
5142
],
[
2479
]],
[[
9407
],
[
6715
],
[
6949
],
[
8094
]],
[[
9369
],
[
4525
],
[
8935
],
[
9210
]],
[[
4372
],
[
9456
],
[
8204
],
[
6695
]],
[[
4372
],
[
9456
],
[
8204
],
[
6695
]],
[[
9369
],
[
4525
],
[
8935
],
[
9210
]],
[[
6897
],
[
3218
],
[
2013
],
[
1241
]],
[[
9038
],
[
7951
],
[
5953
],
[
8657
]],
[[
9407
],
[
6715
],
[
6949
],
[
8094
]],
[[
9662
],
[
9217
],
[
1129
],
[
8487
]],
[[
9369
],
[
4525
],
[
8935
],
[
9210
]],
[[
9038
],
[
7951
],
[
5953
],
[
8657
]],
[[
9662
],
[
9217
],
[
1129
],
[
8487
]],
[[
9369
],
[
4525
],
[
8935
],
[
9210
]],
[[
1719
],
[
5986
],
[
9919
],
[
3421
]],
[[
4372
],
[
9456
],
[
8204
],
[
6695
]],
[[
9038
],
[
7951
],
[
5953
],
[
8657
]]
]
self
.
out_seq
=
np
.
array
(
self
.
out_seq
)
def
test_check_output
(
self
):
self
.
check_output
()
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_metrics.py
0 → 100644
浏览文件 @
e0a89503
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.fluid
as
fluid
from
paddle.fluid.framework
import
Program
,
program_guard
class
TestMetricsDetectionMap
(
unittest
.
TestCase
):
def
test_detection_map
(
self
):
program
=
fluid
.
Program
()
with
program_guard
(
program
):
detect_res
=
fluid
.
layers
.
data
(
name
=
'detect_res'
,
shape
=
[
10
,
6
],
append_batch_size
=
False
,
dtype
=
'float32'
)
label
=
fluid
.
layers
.
data
(
name
=
'label'
,
shape
=
[
10
,
1
],
append_batch_size
=
False
,
dtype
=
'float32'
)
box
=
fluid
.
layers
.
data
(
name
=
'bbox'
,
shape
=
[
10
,
4
],
append_batch_size
=
False
,
dtype
=
'float32'
)
map_eval
=
fluid
.
metrics
.
DetectionMAP
(
detect_res
,
label
,
box
,
class_num
=
21
)
cur_map
,
accm_map
=
map_eval
.
get_map_var
()
self
.
assertIsNotNone
(
cur_map
)
self
.
assertIsNotNone
(
accm_map
)
print
(
str
(
program
))
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_momentum_op.py
浏览文件 @
e0a89503
...
@@ -90,6 +90,45 @@ class TestMomentumOp2(OpTest):
...
@@ -90,6 +90,45 @@ class TestMomentumOp2(OpTest):
self
.
check_output
()
self
.
check_output
()
class
TestLarsMomentumOp
(
OpTest
):
def
setUp
(
self
):
self
.
op_type
=
"lars_momentum"
param
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
grad
=
np
.
random
.
random
((
123
,
321
)).
astype
(
"float32"
)
velocity
=
np
.
zeros
((
123
,
321
)).
astype
(
"float32"
)
learning_rate
=
np
.
array
([
0.001
]).
astype
(
"float32"
)
mu
=
0.0001
lars_coeff
=
0.001
lars_weight_decay
=
0.0005
self
.
inputs
=
{
'Param'
:
param
,
'Grad'
:
grad
,
'Velocity'
:
velocity
,
'LearningRate'
:
learning_rate
}
self
.
attrs
=
{
'mu'
:
mu
,
'lars_coeff'
:
lars_coeff
,
'lars_weight_decay'
:
lars_weight_decay
}
pnorm
=
np
.
sqrt
(
np
.
square
(
param
).
sum
())
gnorm
=
np
.
sqrt
(
np
.
square
(
grad
).
sum
())
local_lr
=
learning_rate
*
lars_coeff
*
pnorm
/
(
gnorm
+
lars_weight_decay
*
param
)
velocity_out
=
mu
*
velocity
+
local_lr
*
(
grad
+
lars_weight_decay
*
param
)
param_out
=
param
-
velocity_out
self
.
outputs
=
{
'ParamOut'
:
param_out
,
'VelocityOut'
:
velocity_out
}
def
test_check_output
(
self
):
self
.
check_output
()
class
TestSparseMomentumOp
(
unittest
.
TestCase
):
class
TestSparseMomentumOp
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
use_nesterov
=
False
self
.
use_nesterov
=
False
...
...
python/paddle/fluid/tests/unittests/test_sequence_reverse.py
0 → 100644
浏览文件 @
e0a89503
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
import
numpy
as
np
class
TestSequenceReverseBase
(
OpTest
):
def
initParameters
(
self
):
pass
def
setUp
(
self
):
self
.
size
=
(
10
,
3
,
4
)
self
.
lod
=
[
2
,
3
,
5
]
self
.
dtype
=
'float32'
self
.
initParameters
()
self
.
op_type
=
'sequence_reverse'
self
.
x
=
np
.
random
.
random
(
self
.
size
).
astype
(
self
.
dtype
)
self
.
y
=
self
.
get_output
()
self
.
inputs
=
{
'X'
:
(
self
.
x
,
[
self
.
lod
,
]),
}
self
.
outputs
=
{
'Y'
:
(
self
.
y
,
[
self
.
lod
,
]),
}
def
get_output
(
self
):
tmp_x
=
np
.
reshape
(
self
.
x
,
newshape
=
[
self
.
x
.
shape
[
0
],
-
1
])
tmp_y
=
np
.
ndarray
(
tmp_x
.
shape
).
astype
(
self
.
dtype
)
prev_idx
=
0
for
cur_len
in
self
.
lod
:
idx_range
=
range
(
prev_idx
,
prev_idx
+
cur_len
)
tmp_y
[
idx_range
,
:]
=
np
.
flip
(
tmp_x
[
idx_range
,
:],
0
)
prev_idx
+=
cur_len
return
np
.
reshape
(
tmp_y
,
newshape
=
self
.
x
.
shape
).
astype
(
self
.
dtype
)
def
test_output
(
self
):
self
.
check_output
(
0
)
def
test_grad
(
self
):
self
.
check_grad
([
'X'
],
'Y'
)
class
TestSequenceReserve1
(
TestSequenceReverseBase
):
def
initParameters
(
self
):
self
.
size
=
(
12
,
10
)
self
.
lod
=
[
4
,
5
,
3
]
class
TestSequenceReverse2
(
TestSequenceReverseBase
):
def
initParameters
(
self
):
self
.
size
=
(
12
,
10
)
self
.
lod
=
[
12
]
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
e0a89503
...
@@ -1431,7 +1431,7 @@ to transpile() call.")
...
@@ -1431,7 +1431,7 @@ to transpile() call.")
elif
op_type
==
"adamax"
:
elif
op_type
==
"adamax"
:
if
varkey
in
[
"Moment"
,
"InfNorm"
]:
if
varkey
in
[
"Moment"
,
"InfNorm"
]:
return
param_shape
return
param_shape
elif
op_type
==
"momentum"
:
elif
op_type
in
[
"momentum"
,
"lars_momentum"
]
:
if
varkey
==
"Velocity"
:
if
varkey
==
"Velocity"
:
return
param_shape
return
param_shape
elif
op_type
==
"rmsprop"
:
elif
op_type
==
"rmsprop"
:
...
@@ -1442,6 +1442,10 @@ to transpile() call.")
...
@@ -1442,6 +1442,10 @@ to transpile() call.")
return
param_shape
return
param_shape
elif
op_type
==
"sgd"
:
elif
op_type
==
"sgd"
:
pass
pass
else
:
raise
ValueError
(
"Not supported optimizer for distributed training: %s"
%
op_type
)
return
orig_shape
return
orig_shape
def
_get_varname_parts
(
self
,
varname
):
def
_get_varname_parts
(
self
,
varname
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录